2007-06-07 16:06:19 +04:00
/*
ctdb ltdb code - server side
Copyright ( C ) Andrew Tridgell 2007
This program is free software ; you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
2007-07-10 09:29:31 +04:00
the Free Software Foundation ; either version 3 of the License , or
2007-06-07 16:06:19 +04:00
( at your option ) any later version .
This program is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
2007-07-10 09:29:31 +04:00
along with this program ; if not , see < http : //www.gnu.org/licenses/>.
2007-06-07 16:06:19 +04:00
*/
# include "includes.h"
# include "lib/tdb/include/tdb.h"
# include "system/network.h"
# include "system/filesys.h"
2007-09-21 06:24:02 +04:00
# include "system/dir.h"
2009-12-07 15:28:11 +03:00
# include "system/time.h"
2007-06-07 16:06:19 +04:00
# include "../include/ctdb_private.h"
2010-12-17 03:38:09 +03:00
# include "../common/rb_tree.h"
2007-06-07 16:06:19 +04:00
# include "db_wrap.h"
# include "lib/util/dlinklist.h"
2009-10-28 14:54:29 +03:00
# include <ctype.h>
2007-06-07 16:06:19 +04:00
2009-11-29 14:39:37 +03:00
# define PERSISTENT_HEALTH_TDB "persistent_health.tdb"
2010-12-30 19:44:51 +03:00
/**
* write a record to a normal database
*
* This is the server - variant of the ctdb_ltdb_store function .
* It contains logic to determine whether a record should be
2010-12-28 15:19:22 +03:00
* stored or deleted . It also sends SCHEDULE_FOR_DELETION
* controls to the local ctdb daemon if apporpriate .
2010-12-30 19:44:51 +03:00
*/
static int ctdb_ltdb_store_server ( struct ctdb_db_context * ctdb_db ,
TDB_DATA key ,
struct ctdb_ltdb_header * header ,
TDB_DATA data )
{
struct ctdb_context * ctdb = ctdb_db - > ctdb ;
TDB_DATA rec ;
int ret ;
bool seqnum_suppressed = false ;
2010-12-03 17:29:21 +03:00
bool keep = false ;
2010-12-28 15:19:22 +03:00
bool schedule_for_deletion = false ;
2011-04-07 14:17:42 +04:00
bool remove_from_delete_queue = false ;
2010-12-21 19:50:52 +03:00
uint32_t lmaster ;
2010-12-30 19:44:51 +03:00
if ( ctdb - > flags & CTDB_FLAG_TORTURE ) {
struct ctdb_ltdb_header * h2 ;
rec = tdb_fetch ( ctdb_db - > ltdb - > tdb , key ) ;
h2 = ( struct ctdb_ltdb_header * ) rec . dptr ;
if ( rec . dptr & & rec . dsize > = sizeof ( h2 ) & & h2 - > rsn > header - > rsn ) {
DEBUG ( DEBUG_CRIT , ( " RSN regression! %llu %llu \n " ,
( unsigned long long ) h2 - > rsn , ( unsigned long long ) header - > rsn ) ) ;
}
if ( rec . dptr ) free ( rec . dptr ) ;
}
2010-12-21 19:50:52 +03:00
if ( ctdb - > vnn_map = = NULL ) {
/*
* Called from a client : always store the record
* Also don ' t call ctdb_lmaster since it uses the vnn_map !
*/
keep = true ;
goto store ;
}
lmaster = ctdb_lmaster ( ctdb_db - > ctdb , & key ) ;
2010-12-03 17:29:21 +03:00
/*
* If we migrate an empty record off to another node
* and the record has not been migrated with data ,
* delete the record instead of storing the empty record .
*/
if ( data . dsize ! = 0 ) {
keep = true ;
2012-02-20 23:54:09 +04:00
} else if ( header - > flags & ( CTDB_REC_RO_HAVE_DELEGATIONS | CTDB_REC_RO_HAVE_READONLY | CTDB_REC_RO_REVOKING_READONLY | CTDB_REC_RO_REVOKE_COMPLETE ) ) {
2012-02-20 14:13:46 +04:00
keep = true ;
2010-12-03 17:29:21 +03:00
} else if ( ctdb_db - > persistent ) {
keep = true ;
2011-02-03 18:32:23 +03:00
} else if ( header - > flags & CTDB_REC_FLAG_AUTOMATIC ) {
/*
* The record is not created by the client but
* automatically by the ctdb_ltdb_fetch logic that
* creates a record with an initial header in the
* ltdb before trying to migrate the record from
* the current lmaster . Keep it instead of trying
* to delete the non - existing record . . .
*/
keep = true ;
schedule_for_deletion = true ;
2010-12-10 16:13:50 +03:00
} else if ( header - > flags & CTDB_REC_FLAG_MIGRATED_WITH_DATA ) {
2010-12-03 17:29:21 +03:00
keep = true ;
2010-12-10 16:13:50 +03:00
} else if ( ctdb_db - > ctdb - > pnn = = lmaster ) {
/*
* If we are lmaster , then we usually keep the record .
* But if we retrieve the dmaster role by a VACUUM_MIGRATE
* and the record is empty and has never been migrated
* with data , then we should delete it instead of storing it .
* This is part of the vacuuming process .
*
* The reason that we usually need to store even empty records
* on the lmaster is that a client operating directly on the
* lmaster ( = = dmaster ) expects the local copy of the record to
* exist after successful ctdb migrate call . If the record does
* not exist , the client goes into a migrate loop and eventually
* fails . So storing the empty record makes sure that we do not
* need to change the client code .
*/
if ( ! ( header - > flags & CTDB_REC_FLAG_VACUUM_MIGRATED ) ) {
keep = true ;
} else if ( ctdb_db - > ctdb - > pnn ! = header - > dmaster ) {
keep = true ;
}
2010-12-03 17:29:21 +03:00
} else if ( ctdb_db - > ctdb - > pnn = = header - > dmaster ) {
keep = true ;
}
2011-04-07 14:17:42 +04:00
if ( keep ) {
if ( ( data . dsize = = 0 ) & &
! ctdb_db - > persistent & &
2012-02-20 23:54:09 +04:00
( ctdb_db - > ctdb - > pnn = = header - > dmaster ) & &
! ( header - > flags & ( CTDB_REC_RO_HAVE_DELEGATIONS | CTDB_REC_RO_HAVE_READONLY | CTDB_REC_RO_REVOKING_READONLY | CTDB_REC_RO_REVOKE_COMPLETE ) ) )
2011-04-07 14:17:42 +04:00
{
schedule_for_deletion = true ;
}
remove_from_delete_queue = ! schedule_for_deletion ;
2010-12-28 15:19:22 +03:00
}
2010-12-21 19:50:52 +03:00
store :
2010-12-10 16:13:50 +03:00
/*
* The VACUUM_MIGRATED flag is only set temporarily for
* the above logic when the record was retrieved by a
* VACUUM_MIGRATE call and should not be stored in the
* database .
*
* The VACUUM_MIGRATE call is triggered by a vacuum fetch ,
* and there are two cases in which the corresponding record
* is stored in the local database :
* 1. The record has been migrated with data in the past
* ( the MIGRATED_WITH_DATA record flag is set ) .
* 2. The record has been filled with data again since it
* had been submitted in the VACUUM_FETCH message to the
* lmaster .
* For such records it is important to not store the
* VACUUM_MIGRATED flag in the database .
*/
header - > flags & = ~ CTDB_REC_FLAG_VACUUM_MIGRATED ;
2011-02-03 18:32:23 +03:00
/*
* Similarly , clear the AUTOMATIC flag which should not enter
* the local database copy since this would require client
* modifications to clear the flag when the client stores
* the record .
*/
header - > flags & = ~ CTDB_REC_FLAG_AUTOMATIC ;
2010-12-30 19:44:51 +03:00
rec . dsize = sizeof ( * header ) + data . dsize ;
rec . dptr = talloc_size ( ctdb , rec . dsize ) ;
CTDB_NO_MEMORY ( ctdb , rec . dptr ) ;
memcpy ( rec . dptr , header , sizeof ( * header ) ) ;
memcpy ( rec . dptr + sizeof ( * header ) , data . dptr , data . dsize ) ;
/* Databases with seqnum updates enabled only get their seqnum
changes when / if we modify the data */
if ( ctdb_db - > seqnum_update ! = NULL ) {
TDB_DATA old ;
old = tdb_fetch ( ctdb_db - > ltdb - > tdb , key ) ;
if ( ( old . dsize = = rec . dsize )
& & ! memcmp ( old . dptr + sizeof ( struct ctdb_ltdb_header ) ,
rec . dptr + sizeof ( struct ctdb_ltdb_header ) ,
rec . dsize - sizeof ( struct ctdb_ltdb_header ) ) ) {
tdb_remove_flags ( ctdb_db - > ltdb - > tdb , TDB_SEQNUM ) ;
seqnum_suppressed = true ;
}
if ( old . dptr ) free ( old . dptr ) ;
}
2010-12-03 17:29:21 +03:00
DEBUG ( DEBUG_DEBUG , ( __location__ " db[%s]: %s record: hash[0x%08x] \n " ,
ctdb_db - > db_name ,
keep ? " storing " : " deleting " ,
ctdb_hash ( & key ) ) ) ;
if ( keep ) {
ret = tdb_store ( ctdb_db - > ltdb - > tdb , key , rec , TDB_REPLACE ) ;
} else {
ret = tdb_delete ( ctdb_db - > ltdb - > tdb , key ) ;
}
2010-12-30 19:44:51 +03:00
if ( ret ! = 0 ) {
2010-12-21 20:08:11 +03:00
int lvl = DEBUG_ERR ;
if ( keep = = false & &
tdb_error ( ctdb_db - > ltdb - > tdb ) = = TDB_ERR_NOEXIST )
{
lvl = DEBUG_DEBUG ;
}
DEBUG ( lvl , ( __location__ " db[%s]: Failed to %s record: "
" %d - %s \n " ,
ctdb_db - > db_name ,
keep ? " store " : " delete " , ret ,
tdb_errorstr ( ctdb_db - > ltdb - > tdb ) ) ) ;
2010-12-28 15:19:22 +03:00
schedule_for_deletion = false ;
2011-04-07 14:17:42 +04:00
remove_from_delete_queue = false ;
2010-12-30 19:44:51 +03:00
}
if ( seqnum_suppressed ) {
tdb_add_flags ( ctdb_db - > ltdb - > tdb , TDB_SEQNUM ) ;
}
talloc_free ( rec . dptr ) ;
2010-12-28 15:19:22 +03:00
if ( schedule_for_deletion ) {
int ret2 ;
ret2 = ctdb_local_schedule_for_deletion ( ctdb_db , header , key ) ;
2011-11-09 07:55:07 +04:00
if ( ret2 ! = 0 ) {
2010-12-28 15:19:22 +03:00
DEBUG ( DEBUG_ERR , ( __location__ " ctdb_local_schedule_for_deletion failed. \n " ) ) ;
}
}
2011-04-07 14:17:42 +04:00
if ( remove_from_delete_queue ) {
ctdb_local_remove_from_delete_queue ( ctdb_db , header , key ) ;
}
2010-12-30 19:44:51 +03:00
return ret ;
}
2007-06-07 16:06:19 +04:00
struct lock_fetch_state {
struct ctdb_context * ctdb ;
void ( * recv_pkt ) ( void * , struct ctdb_req_header * ) ;
void * recv_context ;
struct ctdb_req_header * hdr ;
uint32_t generation ;
bool ignore_generation ;
} ;
/*
called when we should retry the operation
*/
static void lock_fetch_callback ( void * p )
{
struct lock_fetch_state * state = talloc_get_type ( p , struct lock_fetch_state ) ;
if ( ! state - > ignore_generation & &
state - > generation ! = state - > ctdb - > vnn_map - > generation ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_NOTICE , ( " Discarding previous generation lockwait packet \n " ) ) ;
2007-06-07 16:06:19 +04:00
talloc_free ( state - > hdr ) ;
return ;
}
state - > recv_pkt ( state - > recv_context , state - > hdr ) ;
2008-02-04 09:44:24 +03:00
DEBUG ( DEBUG_INFO , ( __location__ " PACKET REQUEUED \n " ) ) ;
2007-06-07 16:06:19 +04:00
}
/*
do a non - blocking ltdb_lock , deferring this ctdb request until we
have the chainlock
It does the following :
1 ) tries to get the chainlock . If it succeeds , then it returns 0
2 ) if it fails to get a chainlock immediately then it sets up a
non - blocking chainlock via ctdb_lockwait , and when it gets the
chainlock it re - submits this ctdb request to the main packet
receive function
This effectively queues all ctdb requests that cannot be
immediately satisfied until it can get the lock . This means that
the main ctdb daemon will not block waiting for a chainlock held by
a client
There are 3 possible return values :
0 : means that it got the lock immediately .
- 1 : means that it failed to get the lock , and won ' t retry
- 2 : means that it failed to get the lock immediately , but will retry
*/
int ctdb_ltdb_lock_requeue ( struct ctdb_db_context * ctdb_db ,
TDB_DATA key , struct ctdb_req_header * hdr ,
void ( * recv_pkt ) ( void * , struct ctdb_req_header * ) ,
void * recv_context , bool ignore_generation )
{
int ret ;
struct tdb_context * tdb = ctdb_db - > ltdb - > tdb ;
struct lockwait_handle * h ;
struct lock_fetch_state * state ;
ret = tdb_chainlock_nonblock ( tdb , key ) ;
if ( ret ! = 0 & &
! ( errno = = EACCES | | errno = = EAGAIN | | errno = = EDEADLK ) ) {
/* a hard failure - don't try again */
return - 1 ;
}
/* when torturing, ensure we test the contended path */
if ( ( ctdb_db - > ctdb - > flags & CTDB_FLAG_TORTURE ) & &
random ( ) % 5 = = 0 ) {
ret = - 1 ;
tdb_chainunlock ( tdb , key ) ;
}
/* first the non-contended path */
if ( ret = = 0 ) {
return 0 ;
}
state = talloc ( hdr , struct lock_fetch_state ) ;
state - > ctdb = ctdb_db - > ctdb ;
state - > hdr = hdr ;
state - > recv_pkt = recv_pkt ;
state - > recv_context = recv_context ;
state - > generation = ctdb_db - > ctdb - > vnn_map - > generation ;
state - > ignore_generation = ignore_generation ;
/* now the contended path */
h = ctdb_lockwait ( ctdb_db , key , lock_fetch_callback , state ) ;
if ( h = = NULL ) {
return - 1 ;
}
/* we need to move the packet off the temporary context in ctdb_input_pkt(),
so it won ' t be freed yet */
talloc_steal ( state , hdr ) ;
talloc_steal ( state , h ) ;
/* now tell the caller than we will retry asynchronously */
return - 2 ;
}
/*
a varient of ctdb_ltdb_lock_requeue that also fetches the record
*/
int ctdb_ltdb_lock_fetch_requeue ( struct ctdb_db_context * ctdb_db ,
TDB_DATA key , struct ctdb_ltdb_header * header ,
struct ctdb_req_header * hdr , TDB_DATA * data ,
void ( * recv_pkt ) ( void * , struct ctdb_req_header * ) ,
void * recv_context , bool ignore_generation )
{
int ret ;
ret = ctdb_ltdb_lock_requeue ( ctdb_db , key , hdr , recv_pkt ,
recv_context , ignore_generation ) ;
if ( ret = = 0 ) {
ret = ctdb_ltdb_fetch ( ctdb_db , key , header , hdr , data ) ;
if ( ret ! = 0 ) {
2010-06-09 08:17:35 +04:00
int uret ;
uret = ctdb_ltdb_unlock ( ctdb_db , key ) ;
if ( uret ! = 0 ) {
DEBUG ( DEBUG_ERR , ( __location__ " ctdb_ltdb_unlock() failed with error %d \n " , uret ) ) ;
}
2007-06-07 16:06:19 +04:00
}
}
return ret ;
}
/*
paraoid check to see if the db is empty
*/
static void ctdb_check_db_empty ( struct ctdb_db_context * ctdb_db )
{
struct tdb_context * tdb = ctdb_db - > ltdb - > tdb ;
int count = tdb_traverse_read ( tdb , NULL , NULL ) ;
if ( count ! = 0 ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_ALERT , ( __location__ " tdb '%s' not empty on attach! aborting \n " ,
2007-06-07 16:06:19 +04:00
ctdb_db - > db_path ) ) ;
ctdb_fatal ( ctdb_db - > ctdb , " database not empty on attach " ) ;
}
}
2009-12-07 15:28:11 +03:00
int ctdb_load_persistent_health ( struct ctdb_context * ctdb ,
struct ctdb_db_context * ctdb_db )
{
struct tdb_context * tdb = ctdb - > db_persistent_health - > tdb ;
char * old ;
char * reason = NULL ;
TDB_DATA key ;
TDB_DATA val ;
key . dptr = discard_const_p ( uint8_t , ctdb_db - > db_name ) ;
key . dsize = strlen ( ctdb_db - > db_name ) ;
old = ctdb_db - > unhealthy_reason ;
ctdb_db - > unhealthy_reason = NULL ;
val = tdb_fetch ( tdb , key ) ;
if ( val . dsize > 0 ) {
reason = talloc_strndup ( ctdb_db ,
( const char * ) val . dptr ,
val . dsize ) ;
if ( reason = = NULL ) {
DEBUG ( DEBUG_ALERT , ( __location__ " talloc_strndup(%d) failed \n " ,
( int ) val . dsize ) ) ;
ctdb_db - > unhealthy_reason = old ;
free ( val . dptr ) ;
return - 1 ;
}
}
if ( val . dptr ) {
free ( val . dptr ) ;
}
talloc_free ( old ) ;
ctdb_db - > unhealthy_reason = reason ;
return 0 ;
}
int ctdb_update_persistent_health ( struct ctdb_context * ctdb ,
struct ctdb_db_context * ctdb_db ,
const char * given_reason , /* NULL means healthy */
int num_healthy_nodes )
{
struct tdb_context * tdb = ctdb - > db_persistent_health - > tdb ;
int ret ;
TDB_DATA key ;
TDB_DATA val ;
char * new_reason = NULL ;
char * old_reason = NULL ;
ret = tdb_transaction_start ( tdb ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_ALERT , ( __location__ " tdb_transaction_start('%s') failed: %d - %s \n " ,
tdb_name ( tdb ) , ret , tdb_errorstr ( tdb ) ) ) ;
return - 1 ;
}
ret = ctdb_load_persistent_health ( ctdb , ctdb_db ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_ALERT , ( __location__ " ctdb_load_persistent_health('%s') failed: %d \n " ,
ctdb_db - > db_name , ret ) ) ;
return - 1 ;
}
old_reason = ctdb_db - > unhealthy_reason ;
key . dptr = discard_const_p ( uint8_t , ctdb_db - > db_name ) ;
key . dsize = strlen ( ctdb_db - > db_name ) ;
if ( given_reason ) {
new_reason = talloc_strdup ( ctdb_db , given_reason ) ;
if ( new_reason = = NULL ) {
DEBUG ( DEBUG_ALERT , ( __location__ " talloc_strdup(%s) failed \n " ,
given_reason ) ) ;
return - 1 ;
}
} else if ( old_reason & & num_healthy_nodes = = 0 ) {
/*
* If the reason indicates ok , but there where no healthy nodes
* available , that it means , we have not recovered valid content
* of the db . So if there ' s an old reason , prefix it with
* " NO-HEALTHY-NODES - "
*/
const char * prefix ;
# define _TMP_PREFIX "NO-HEALTHY-NODES - "
ret = strncmp ( _TMP_PREFIX , old_reason , strlen ( _TMP_PREFIX ) ) ;
if ( ret ! = 0 ) {
prefix = _TMP_PREFIX ;
} else {
prefix = " " ;
}
new_reason = talloc_asprintf ( ctdb_db , " %s%s " ,
prefix , old_reason ) ;
if ( new_reason = = NULL ) {
DEBUG ( DEBUG_ALERT , ( __location__ " talloc_asprintf(%s%s) failed \n " ,
prefix , old_reason ) ) ;
return - 1 ;
}
# undef _TMP_PREFIX
}
if ( new_reason ) {
val . dptr = discard_const_p ( uint8_t , new_reason ) ;
val . dsize = strlen ( new_reason ) ;
ret = tdb_store ( tdb , key , val , TDB_REPLACE ) ;
if ( ret ! = 0 ) {
tdb_transaction_cancel ( tdb ) ;
DEBUG ( DEBUG_ALERT , ( __location__ " tdb_store('%s', %s, %s) failed: %d - %s \n " ,
tdb_name ( tdb ) , ctdb_db - > db_name , new_reason ,
ret , tdb_errorstr ( tdb ) ) ) ;
talloc_free ( new_reason ) ;
return - 1 ;
}
DEBUG ( DEBUG_ALERT , ( " Updated db health for db(%s) to: %s \n " ,
ctdb_db - > db_name , new_reason ) ) ;
} else if ( old_reason ) {
ret = tdb_delete ( tdb , key ) ;
if ( ret ! = 0 ) {
tdb_transaction_cancel ( tdb ) ;
DEBUG ( DEBUG_ALERT , ( __location__ " tdb_delete('%s', %s) failed: %d - %s \n " ,
tdb_name ( tdb ) , ctdb_db - > db_name ,
ret , tdb_errorstr ( tdb ) ) ) ;
talloc_free ( new_reason ) ;
return - 1 ;
}
DEBUG ( DEBUG_NOTICE , ( " Updated db health for db(%s): OK \n " ,
ctdb_db - > db_name ) ) ;
}
ret = tdb_transaction_commit ( tdb ) ;
if ( ret ! = TDB_SUCCESS ) {
DEBUG ( DEBUG_ALERT , ( __location__ " tdb_transaction_commit('%s') failed: %d - %s \n " ,
tdb_name ( tdb ) , ret , tdb_errorstr ( tdb ) ) ) ;
talloc_free ( new_reason ) ;
return - 1 ;
}
talloc_free ( old_reason ) ;
ctdb_db - > unhealthy_reason = new_reason ;
return 0 ;
}
static int ctdb_backup_corrupted_tdb ( struct ctdb_context * ctdb ,
struct ctdb_db_context * ctdb_db )
{
time_t now = time ( NULL ) ;
char * new_path ;
char * new_reason ;
int ret ;
struct tm * tm ;
tm = gmtime ( & now ) ;
/* formatted like: foo.tdb.0.corrupted.20091204160825.0Z */
new_path = talloc_asprintf ( ctdb_db , " %s.corrupted. "
" %04u%02u%02u%02u%02u%02u.0Z " ,
ctdb_db - > db_path ,
tm - > tm_year + 1900 , tm - > tm_mon + 1 ,
tm - > tm_mday , tm - > tm_hour , tm - > tm_min ,
tm - > tm_sec ) ;
if ( new_path = = NULL ) {
DEBUG ( DEBUG_CRIT , ( __location__ " talloc_asprintf() failed \n " ) ) ;
return - 1 ;
}
new_reason = talloc_asprintf ( ctdb_db ,
" ERROR - Backup of corrupted TDB in '%s' " ,
new_path ) ;
if ( new_reason = = NULL ) {
DEBUG ( DEBUG_CRIT , ( __location__ " talloc_asprintf() failed \n " ) ) ;
return - 1 ;
}
ret = ctdb_update_persistent_health ( ctdb , ctdb_db , new_reason , 0 ) ;
talloc_free ( new_reason ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_CRIT , ( __location__
" : ctdb_backup_corrupted_tdb(%s) not implemented yet \n " ,
ctdb_db - > db_path ) ) ;
return - 1 ;
}
ret = rename ( ctdb_db - > db_path , new_path ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_CRIT , ( __location__
" : ctdb_backup_corrupted_tdb(%s) rename to %s failed: %d - %s \n " ,
ctdb_db - > db_path , new_path ,
errno , strerror ( errno ) ) ) ;
talloc_free ( new_path ) ;
return - 1 ;
}
DEBUG ( DEBUG_CRIT , ( __location__
" : ctdb_backup_corrupted_tdb(%s) renamed to %s \n " ,
ctdb_db - > db_path , new_path ) ) ;
talloc_free ( new_path ) ;
return 0 ;
}
int ctdb_recheck_persistent_health ( struct ctdb_context * ctdb )
{
struct ctdb_db_context * ctdb_db ;
int ret ;
int ok = 0 ;
int fail = 0 ;
for ( ctdb_db = ctdb - > db_list ; ctdb_db ; ctdb_db = ctdb_db - > next ) {
if ( ! ctdb_db - > persistent ) {
continue ;
}
ret = ctdb_load_persistent_health ( ctdb , ctdb_db ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_ALERT , ( __location__
" load persistent health for '%s' failed \n " ,
ctdb_db - > db_path ) ) ;
return - 1 ;
}
if ( ctdb_db - > unhealthy_reason = = NULL ) {
ok + + ;
DEBUG ( DEBUG_INFO , ( __location__
" persistent db '%s' healthy \n " ,
ctdb_db - > db_path ) ) ;
continue ;
}
fail + + ;
DEBUG ( DEBUG_ALERT , ( __location__
" persistent db '%s' unhealthy: %s \n " ,
ctdb_db - > db_path ,
ctdb_db - > unhealthy_reason ) ) ;
}
DEBUG ( ( fail ! = 0 ) ? DEBUG_ALERT : DEBUG_NOTICE ,
( " ctdb_recheck_presistent_health: OK[%d] FAIL[%d] \n " ,
ok , fail ) ) ;
if ( fail ! = 0 ) {
return - 1 ;
}
return 0 ;
}
2007-09-21 06:24:02 +04:00
2009-12-02 14:48:22 +03:00
/*
mark a database - as healthy
*/
int32_t ctdb_control_db_set_healthy ( struct ctdb_context * ctdb , TDB_DATA indata )
{
uint32_t db_id = * ( uint32_t * ) indata . dptr ;
struct ctdb_db_context * ctdb_db ;
int ret ;
bool may_recover = false ;
ctdb_db = find_ctdb_db ( ctdb , db_id ) ;
if ( ! ctdb_db ) {
DEBUG ( DEBUG_ERR , ( __location__ " Unknown db 0x%x \n " , db_id ) ) ;
return - 1 ;
}
if ( ctdb_db - > unhealthy_reason ) {
may_recover = true ;
}
ret = ctdb_update_persistent_health ( ctdb , ctdb_db , NULL , 1 ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_ERR , ( __location__
" ctdb_update_persistent_health(%s) failed \n " ,
ctdb_db - > db_name ) ) ;
return - 1 ;
}
if ( may_recover & & ! ctdb - > done_startup ) {
DEBUG ( DEBUG_ERR , ( __location__ " db %s become healthy - force recovery for startup \n " ,
ctdb_db - > db_name ) ) ;
ctdb - > recovery_mode = CTDB_RECOVERY_ACTIVE ;
}
return 0 ;
}
int32_t ctdb_control_db_get_health ( struct ctdb_context * ctdb ,
TDB_DATA indata ,
TDB_DATA * outdata )
{
uint32_t db_id = * ( uint32_t * ) indata . dptr ;
struct ctdb_db_context * ctdb_db ;
int ret ;
ctdb_db = find_ctdb_db ( ctdb , db_id ) ;
if ( ! ctdb_db ) {
DEBUG ( DEBUG_ERR , ( __location__ " Unknown db 0x%x \n " , db_id ) ) ;
return - 1 ;
}
ret = ctdb_load_persistent_health ( ctdb , ctdb_db ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_ERR , ( __location__
" ctdb_load_persistent_health(%s) failed \n " ,
ctdb_db - > db_name ) ) ;
return - 1 ;
}
* outdata = tdb_null ;
if ( ctdb_db - > unhealthy_reason ) {
outdata - > dptr = ( uint8_t * ) ctdb_db - > unhealthy_reason ;
outdata - > dsize = strlen ( ctdb_db - > unhealthy_reason ) + 1 ;
}
return 0 ;
}
2011-09-01 05:08:18 +04:00
int ctdb_set_db_readonly ( struct ctdb_context * ctdb , struct ctdb_db_context * ctdb_db )
{
char * ropath ;
if ( ctdb_db - > readonly ) {
return 0 ;
}
if ( ctdb_db - > persistent ) {
DEBUG ( DEBUG_ERR , ( " Trying to set persistent database with readonly property \n " ) ) ;
return - 1 ;
}
ropath = talloc_asprintf ( ctdb_db , " %s.RO " , ctdb_db - > db_path ) ;
if ( ropath = = NULL ) {
DEBUG ( DEBUG_CRIT , ( " Failed to asprintf the tracking database \n " ) ) ;
return - 1 ;
}
ctdb_db - > rottdb = tdb_open ( ropath ,
ctdb - > tunable . database_hash_size ,
TDB_NOLOCK | TDB_CLEAR_IF_FIRST | TDB_NOSYNC ,
O_CREAT | O_RDWR , 0 ) ;
if ( ctdb_db - > rottdb = = NULL ) {
DEBUG ( DEBUG_CRIT , ( " Failed to open/create the tracking database '%s' \n " , ropath ) ) ;
talloc_free ( ropath ) ;
return - 1 ;
}
DEBUG ( DEBUG_NOTICE , ( " OPENED tracking database : '%s' \n " , ropath ) ) ;
ctdb_db - > readonly = true ;
talloc_free ( ropath ) ;
return 0 ;
}
2007-06-07 16:06:19 +04:00
/*
2007-09-21 06:24:02 +04:00
attach to a database , handling both persistent and non - persistent databases
return 0 on success , - 1 on failure
2007-06-07 16:06:19 +04:00
*/
2009-11-29 14:39:37 +03:00
static int ctdb_local_attach ( struct ctdb_context * ctdb , const char * db_name ,
2010-10-25 04:31:12 +04:00
bool persistent , const char * unhealthy_reason ,
bool jenkinshash )
2007-06-07 16:06:19 +04:00
{
struct ctdb_db_context * ctdb_db , * tmp_db ;
int ret ;
2007-09-21 06:24:02 +04:00
struct TDB_DATA key ;
2008-07-04 11:32:21 +04:00
unsigned tdb_flags ;
2009-12-07 15:28:11 +03:00
int mode = 0600 ;
int remaining_tries = 0 ;
2007-06-07 16:06:19 +04:00
ctdb_db = talloc_zero ( ctdb , struct ctdb_db_context ) ;
CTDB_NO_MEMORY ( ctdb , ctdb_db ) ;
2009-10-10 07:26:09 +04:00
ctdb_db - > priority = 1 ;
2007-06-07 16:06:19 +04:00
ctdb_db - > ctdb = ctdb ;
ctdb_db - > db_name = talloc_strdup ( ctdb_db , db_name ) ;
CTDB_NO_MEMORY ( ctdb , ctdb_db - > db_name ) ;
2007-09-21 06:24:02 +04:00
key . dsize = strlen ( db_name ) + 1 ;
key . dptr = discard_const ( db_name ) ;
ctdb_db - > db_id = ctdb_hash ( & key ) ;
ctdb_db - > persistent = persistent ;
2007-06-07 16:06:19 +04:00
2010-12-17 03:38:09 +03:00
if ( ! ctdb_db - > persistent ) {
ctdb_db - > delete_queue = trbt_create ( ctdb_db , 0 ) ;
if ( ctdb_db - > delete_queue = = NULL ) {
CTDB_NO_MEMORY ( ctdb , ctdb_db - > delete_queue ) ;
}
2010-12-30 20:19:32 +03:00
ctdb_db - > ctdb_ltdb_store_fn = ctdb_ltdb_store_server ;
2010-12-17 03:38:09 +03:00
}
2007-06-07 16:06:19 +04:00
/* check for hash collisions */
for ( tmp_db = ctdb - > db_list ; tmp_db ; tmp_db = tmp_db - > next ) {
if ( tmp_db - > db_id = = ctdb_db - > db_id ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_CRIT , ( " db_id 0x%x hash collision. name1='%s' name2='%s' \n " ,
2007-06-07 16:06:19 +04:00
tmp_db - > db_id , db_name , tmp_db - > db_name ) ) ;
talloc_free ( ctdb_db ) ;
return - 1 ;
}
}
2009-12-07 15:28:11 +03:00
if ( persistent ) {
if ( unhealthy_reason ) {
ret = ctdb_update_persistent_health ( ctdb , ctdb_db ,
unhealthy_reason , 0 ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_ALERT , ( __location__ " ctdb_update_persistent_health('%s','%s') failed: %d \n " ,
ctdb_db - > db_name , unhealthy_reason , ret ) ) ;
talloc_free ( ctdb_db ) ;
return - 1 ;
}
}
if ( ctdb - > max_persistent_check_errors > 0 ) {
remaining_tries = 1 ;
}
if ( ctdb - > done_startup ) {
remaining_tries = 0 ;
}
ret = ctdb_load_persistent_health ( ctdb , ctdb_db ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_ALERT , ( __location__ " ctdb_load_persistent_health('%s') failed: %d \n " ,
ctdb_db - > db_name , ret ) ) ;
talloc_free ( ctdb_db ) ;
return - 1 ;
}
}
if ( ctdb_db - > unhealthy_reason & & remaining_tries = = 0 ) {
DEBUG ( DEBUG_ALERT , ( __location__ " ERROR: tdb %s is marked as unhealthy: %s \n " ,
ctdb_db - > db_name , ctdb_db - > unhealthy_reason ) ) ;
talloc_free ( ctdb_db ) ;
return - 1 ;
}
if ( ctdb_db - > unhealthy_reason ) {
/* this is just a warning, but we want that in the log file! */
DEBUG ( DEBUG_ALERT , ( __location__ " Warning: tdb %s is marked as unhealthy: %s \n " ,
ctdb_db - > db_name , ctdb_db - > unhealthy_reason ) ) ;
}
2007-06-07 16:06:19 +04:00
/* open the database */
ctdb_db - > db_path = talloc_asprintf ( ctdb_db , " %s/%s.%u " ,
2007-09-21 06:24:02 +04:00
persistent ? ctdb - > db_directory_persistent : ctdb - > db_directory ,
2007-09-04 04:06:36 +04:00
db_name , ctdb - > pnn ) ;
2007-06-07 16:06:19 +04:00
2008-07-04 11:32:21 +04:00
tdb_flags = persistent ? TDB_DEFAULT : TDB_CLEAR_IF_FIRST | TDB_NOSYNC ;
2009-12-16 13:29:15 +03:00
if ( ctdb - > valgrinding ) {
2008-07-04 11:32:21 +04:00
tdb_flags | = TDB_NOMMAP ;
}
2009-11-20 23:17:59 +03:00
tdb_flags | = TDB_DISALLOW_NESTING ;
2010-10-25 04:31:12 +04:00
if ( jenkinshash ) {
tdb_flags | = TDB_INCOMPATIBLE_HASH ;
}
2008-07-04 11:32:21 +04:00
2009-12-07 15:28:11 +03:00
again :
2007-06-17 17:31:44 +04:00
ctdb_db - > ltdb = tdb_wrap_open ( ctdb , ctdb_db - > db_path ,
ctdb - > tunable . database_hash_size ,
2008-07-04 11:32:21 +04:00
tdb_flags ,
2009-12-07 15:28:11 +03:00
O_CREAT | O_RDWR , mode ) ;
2007-06-07 16:06:19 +04:00
if ( ctdb_db - > ltdb = = NULL ) {
2009-12-07 15:28:11 +03:00
struct stat st ;
int saved_errno = errno ;
if ( ! persistent ) {
DEBUG ( DEBUG_CRIT , ( " Failed to open tdb '%s': %d - %s \n " ,
ctdb_db - > db_path ,
saved_errno ,
strerror ( saved_errno ) ) ) ;
talloc_free ( ctdb_db ) ;
return - 1 ;
}
if ( remaining_tries = = 0 ) {
DEBUG ( DEBUG_CRIT , ( __location__
" Failed to open persistent tdb '%s': %d - %s \n " ,
ctdb_db - > db_path ,
saved_errno ,
strerror ( saved_errno ) ) ) ;
talloc_free ( ctdb_db ) ;
return - 1 ;
}
ret = stat ( ctdb_db - > db_path , & st ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_CRIT , ( __location__
" Failed to open persistent tdb '%s': %d - %s \n " ,
ctdb_db - > db_path ,
saved_errno ,
strerror ( saved_errno ) ) ) ;
talloc_free ( ctdb_db ) ;
return - 1 ;
}
ret = ctdb_backup_corrupted_tdb ( ctdb , ctdb_db ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_CRIT , ( __location__
" Failed to open persistent tdb '%s': %d - %s \n " ,
ctdb_db - > db_path ,
saved_errno ,
strerror ( saved_errno ) ) ) ;
talloc_free ( ctdb_db ) ;
return - 1 ;
}
remaining_tries - - ;
mode = st . st_mode ;
goto again ;
2007-06-07 16:06:19 +04:00
}
2007-09-21 06:24:02 +04:00
if ( ! persistent ) {
ctdb_check_db_empty ( ctdb_db ) ;
2009-12-07 15:28:11 +03:00
} else {
ret = tdb_check ( ctdb_db - > ltdb - > tdb , NULL , NULL ) ;
if ( ret ! = 0 ) {
int fd ;
struct stat st ;
DEBUG ( DEBUG_CRIT , ( " tdb_check(%s) failed: %d - %s \n " ,
ctdb_db - > db_path , ret ,
tdb_errorstr ( ctdb_db - > ltdb - > tdb ) ) ) ;
if ( remaining_tries = = 0 ) {
talloc_free ( ctdb_db ) ;
return - 1 ;
}
fd = tdb_fd ( ctdb_db - > ltdb - > tdb ) ;
ret = fstat ( fd , & st ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_CRIT , ( __location__
" Failed to fstat() persistent tdb '%s': %d - %s \n " ,
ctdb_db - > db_path ,
errno ,
strerror ( errno ) ) ) ;
talloc_free ( ctdb_db ) ;
return - 1 ;
}
/* close the TDB */
talloc_free ( ctdb_db - > ltdb ) ;
ctdb_db - > ltdb = NULL ;
ret = ctdb_backup_corrupted_tdb ( ctdb , ctdb_db ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_CRIT , ( " Failed to backup corrupted tdb '%s' \n " ,
ctdb_db - > db_path ) ) ;
talloc_free ( ctdb_db ) ;
return - 1 ;
}
remaining_tries - - ;
mode = st . st_mode ;
goto again ;
}
2007-09-21 06:24:02 +04:00
}
2007-06-07 16:06:19 +04:00
2011-11-07 23:55:46 +04:00
/* set up a rb tree we can use to track which records we have a
fetch - lock in - flight for so we can defer any additional calls
for the same record .
*/
ctdb_db - > deferred_fetch = trbt_create ( ctdb_db , 0 ) ;
if ( ctdb_db - > deferred_fetch = = NULL ) {
DEBUG ( DEBUG_ERR , ( " Failed to create deferred fetch rb tree for ctdb database \n " ) ) ;
talloc_free ( ctdb_db ) ;
return - 1 ;
}
2007-06-07 16:06:19 +04:00
DLIST_ADD ( ctdb - > db_list , ctdb_db ) ;
2008-01-05 01:36:53 +03:00
/* setting this can help some high churn databases */
tdb_set_max_dead ( ctdb_db - > ltdb - > tdb , ctdb - > tunable . database_max_dead ) ;
2007-06-07 16:06:19 +04:00
/*
all databases support the " null " function . we need this in
order to do forced migration of records
*/
ret = ctdb_daemon_set_call ( ctdb , ctdb_db - > db_id , ctdb_null_func , CTDB_NULL_FUNC ) ;
if ( ret ! = 0 ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_CRIT , ( " Failed to setup null function for '%s' \n " , ctdb_db - > db_name ) ) ;
2007-06-07 16:06:19 +04:00
talloc_free ( ctdb_db ) ;
return - 1 ;
}
/*
all databases support the " fetch " function . we need this
for efficient Samba3 ctdb fetch
*/
ret = ctdb_daemon_set_call ( ctdb , ctdb_db - > db_id , ctdb_fetch_func , CTDB_FETCH_FUNC ) ;
if ( ret ! = 0 ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_CRIT , ( " Failed to setup fetch function for '%s' \n " , ctdb_db - > db_name ) ) ;
2007-06-07 16:06:19 +04:00
talloc_free ( ctdb_db ) ;
return - 1 ;
}
2007-09-21 06:24:02 +04:00
2011-07-20 05:27:05 +04:00
/*
all databases support the " fetch_with_header " function . we need this
for efficient readonly record fetches
*/
ret = ctdb_daemon_set_call ( ctdb , ctdb_db - > db_id , ctdb_fetch_with_header_func , CTDB_FETCH_WITH_HEADER_FUNC ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_CRIT , ( " Failed to setup fetch function for '%s' \n " , ctdb_db - > db_name ) ) ;
talloc_free ( ctdb_db ) ;
return - 1 ;
}
2009-11-03 02:48:27 +03:00
ret = ctdb_vacuum_init ( ctdb_db ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_CRIT , ( " Failed to setup vacuuming for "
" database '%s' \n " , ctdb_db - > db_name ) ) ;
talloc_free ( ctdb_db ) ;
return - 1 ;
2009-07-23 10:03:39 +04:00
}
2008-02-04 09:44:24 +03:00
DEBUG ( DEBUG_INFO , ( " Attached to database '%s' \n " , ctdb_db - > db_path ) ) ;
2007-06-07 16:06:19 +04:00
2007-09-21 06:24:02 +04:00
/* success */
return 0 ;
}
2011-02-23 07:46:36 +03:00
struct ctdb_deferred_attach_context {
struct ctdb_deferred_attach_context * next , * prev ;
struct ctdb_context * ctdb ;
struct ctdb_req_control * c ;
} ;
static int ctdb_deferred_attach_destructor ( struct ctdb_deferred_attach_context * da_ctx )
{
DLIST_REMOVE ( da_ctx - > ctdb - > deferred_attach , da_ctx ) ;
return 0 ;
}
static void ctdb_deferred_attach_timeout ( struct event_context * ev , struct timed_event * te , struct timeval t , void * private_data )
{
struct ctdb_deferred_attach_context * da_ctx = talloc_get_type ( private_data , struct ctdb_deferred_attach_context ) ;
struct ctdb_context * ctdb = da_ctx - > ctdb ;
ctdb_request_control_reply ( ctdb , da_ctx - > c , NULL , - 1 , NULL ) ;
talloc_free ( da_ctx ) ;
}
static void ctdb_deferred_attach_callback ( struct event_context * ev , struct timed_event * te , struct timeval t , void * private_data )
{
struct ctdb_deferred_attach_context * da_ctx = talloc_get_type ( private_data , struct ctdb_deferred_attach_context ) ;
struct ctdb_context * ctdb = da_ctx - > ctdb ;
/* This talloc-steals the packet ->c */
ctdb_input_pkt ( ctdb , ( struct ctdb_req_header * ) da_ctx - > c ) ;
talloc_free ( da_ctx ) ;
}
int ctdb_process_deferred_attach ( struct ctdb_context * ctdb )
{
struct ctdb_deferred_attach_context * da_ctx ;
/* call it from the main event loop as soon as the current event
finishes .
*/
while ( ( da_ctx = ctdb - > deferred_attach ) ! = NULL ) {
DLIST_REMOVE ( ctdb - > deferred_attach , da_ctx ) ;
2011-03-16 06:55:58 +03:00
event_add_timed ( ctdb - > ev , da_ctx , timeval_current_ofs ( 1 , 0 ) , ctdb_deferred_attach_callback , da_ctx ) ;
2011-02-23 07:46:36 +03:00
}
return 0 ;
}
2007-09-21 06:24:02 +04:00
/*
a client has asked to attach a new database
*/
int32_t ctdb_control_db_attach ( struct ctdb_context * ctdb , TDB_DATA indata ,
2008-05-12 07:37:31 +04:00
TDB_DATA * outdata , uint64_t tdb_flags ,
2011-02-23 07:46:36 +03:00
bool persistent , uint32_t client_id ,
struct ctdb_req_control * c ,
bool * async_reply )
2007-09-21 06:24:02 +04:00
{
const char * db_name = ( const char * ) indata . dptr ;
struct ctdb_db_context * db ;
struct ctdb_node * node = ctdb - > nodes [ ctdb - > pnn ] ;
2011-02-25 02:33:12 +03:00
struct ctdb_client * client = NULL ;
2007-09-21 06:24:02 +04:00
2011-09-02 18:42:10 +04:00
if ( ctdb - > tunable . allow_client_db_attach = = 0 ) {
DEBUG ( DEBUG_ERR , ( " DB Attach to database %s denied by tunable "
" AllowClientDBAccess == 0 \n " , db_name ) ) ;
return - 1 ;
}
2011-02-23 07:46:36 +03:00
/* dont allow any local clients to attach while we are in recovery mode
* except for the recovery daemon .
* allow all attach from the network since these are always from remote
* recovery daemons .
*/
if ( client_id ! = 0 ) {
2011-02-25 02:33:12 +03:00
client = ctdb_reqid_find ( ctdb , client_id , struct ctdb_client ) ;
}
if ( client ! = NULL ) {
2011-02-23 07:46:36 +03:00
/* If the node is inactive it is not part of the cluster
and we should not allow clients to attach to any
databases
*/
if ( node - > flags & NODE_FLAGS_INACTIVE ) {
DEBUG ( DEBUG_ERR , ( " DB Attach to database %s refused since node is inactive (disconnected or banned) \n " , db_name ) ) ;
return - 1 ;
}
if ( ctdb - > recovery_mode = = CTDB_RECOVERY_ACTIVE
2011-03-12 01:42:07 +03:00
& & client - > pid ! = ctdb - > recoverd_pid
& & ! ctdb - > done_startup ) {
2011-02-23 07:46:36 +03:00
struct ctdb_deferred_attach_context * da_ctx = talloc ( client , struct ctdb_deferred_attach_context ) ;
if ( da_ctx = = NULL ) {
DEBUG ( DEBUG_ERR , ( " DB Attach to database %s deferral for client with pid:%d failed due to OOM. \n " , db_name , client - > pid ) ) ;
return - 1 ;
}
da_ctx - > ctdb = ctdb ;
da_ctx - > c = talloc_steal ( da_ctx , c ) ;
talloc_set_destructor ( da_ctx , ctdb_deferred_attach_destructor ) ;
DLIST_ADD ( ctdb - > deferred_attach , da_ctx ) ;
event_add_timed ( ctdb - > ev , da_ctx , timeval_current_ofs ( ctdb - > tunable . deferred_attach_timeout , 0 ) , ctdb_deferred_attach_timeout , da_ctx ) ;
DEBUG ( DEBUG_ERR , ( " DB Attach to database %s deferred for client with pid:%d since node is in recovery mode. \n " , db_name , client - > pid ) ) ;
* async_reply = true ;
return 0 ;
}
}
2008-05-12 07:37:31 +04:00
/* the client can optionally pass additional tdb flags, but we
only allow a subset of those on the database in ctdb . Note
that tdb_flags is passed in via the ( otherwise unused )
srvid to the attach control */
2010-10-25 04:31:12 +04:00
tdb_flags & = ( TDB_NOSYNC | TDB_INCOMPATIBLE_HASH ) ;
2008-05-12 07:37:31 +04:00
2009-11-25 00:03:42 +03:00
/* see if we already have this name */
db = ctdb_db_handle ( ctdb , db_name ) ;
if ( db ) {
2012-06-21 12:26:03 +04:00
if ( db - > persistent ! = persistent ) {
DEBUG ( DEBUG_ERR , ( " ERROR: DB Attach %spersistent to %spersistent "
" database %s " , persistent ? " " : " non- " ,
db - > persistent ? " " : " non- " , db_name ) ) ;
return - 1 ;
}
2009-11-25 00:03:42 +03:00
outdata - > dptr = ( uint8_t * ) & db - > db_id ;
outdata - > dsize = sizeof ( db - > db_id ) ;
tdb_add_flags ( db - > ltdb - > tdb , tdb_flags ) ;
return 0 ;
}
2009-10-27 07:17:45 +03:00
2010-10-25 04:31:12 +04:00
if ( ctdb_local_attach ( ctdb , db_name , persistent , NULL , ( tdb_flags & TDB_INCOMPATIBLE_HASH ) ? true : false ) ! = 0 ) {
2007-09-21 06:24:02 +04:00
return - 1 ;
}
db = ctdb_db_handle ( ctdb , db_name ) ;
if ( ! db ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_ERR , ( " Failed to find db handle for name '%s' \n " , db_name ) ) ;
2007-09-21 06:24:02 +04:00
return - 1 ;
}
2008-05-12 07:37:31 +04:00
/* remember the flags the client has specified */
2008-07-30 13:58:49 +04:00
tdb_add_flags ( db - > ltdb - > tdb , tdb_flags ) ;
2008-05-12 07:37:31 +04:00
2007-09-21 06:24:02 +04:00
outdata - > dptr = ( uint8_t * ) & db - > db_id ;
outdata - > dsize = sizeof ( db - > db_id ) ;
2009-12-16 13:27:20 +03:00
/* Try to ensure it's locked in mem */
ctdb_lockdown_memory ( ctdb ) ;
2007-06-07 16:06:19 +04:00
/* tell all the other nodes about this database */
2010-10-25 04:31:12 +04:00
ctdb_daemon_send_control ( ctdb , CTDB_BROADCAST_ALL , tdb_flags ,
2007-09-21 07:47:40 +04:00
persistent ? CTDB_CONTROL_DB_ATTACH_PERSISTENT :
CTDB_CONTROL_DB_ATTACH ,
0 , CTDB_CTRL_FLAG_NOREPLY ,
2007-06-07 16:06:19 +04:00
indata , NULL , NULL ) ;
/* success */
2007-09-21 06:24:02 +04:00
return 0 ;
}
/*
attach to all existing persistent databases
*/
2009-11-29 14:39:37 +03:00
static int ctdb_attach_persistent ( struct ctdb_context * ctdb ,
const char * unhealthy_reason )
2007-09-21 06:24:02 +04:00
{
DIR * d ;
struct dirent * de ;
/* open the persistent db directory and scan it for files */
d = opendir ( ctdb - > db_directory_persistent ) ;
if ( d = = NULL ) {
return 0 ;
}
while ( ( de = readdir ( d ) ) ) {
2009-10-28 14:54:29 +03:00
char * p , * s , * q ;
2007-09-21 06:24:02 +04:00
size_t len = strlen ( de - > d_name ) ;
uint32_t node ;
2009-10-28 14:54:29 +03:00
int invalid_name = 0 ;
2007-09-21 06:24:02 +04:00
s = talloc_strdup ( ctdb , de - > d_name ) ;
CTDB_NO_MEMORY ( ctdb , s ) ;
/* only accept names ending in .tdb */
p = strstr ( s , " .tdb. " ) ;
if ( len < 7 | | p = = NULL ) {
talloc_free ( s ) ;
continue ;
}
2009-10-28 14:54:29 +03:00
/* only accept names ending with .tdb. and any number of digits */
q = p + 5 ;
while ( * q ! = 0 & & invalid_name = = 0 ) {
if ( ! isdigit ( * q + + ) ) {
invalid_name = 1 ;
}
}
if ( invalid_name = = 1 | | sscanf ( p + 5 , " %u " , & node ) ! = 1 | | node ! = ctdb - > pnn ) {
DEBUG ( DEBUG_ERR , ( " Ignoring persistent database '%s' \n " , de - > d_name ) ) ;
2007-09-21 06:24:02 +04:00
talloc_free ( s ) ;
continue ;
}
p [ 4 ] = 0 ;
2010-10-25 04:31:12 +04:00
if ( ctdb_local_attach ( ctdb , s , true , unhealthy_reason , 0 ) ! = 0 ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_ERR , ( " Failed to attach to persistent database '%s' \n " , de - > d_name ) ) ;
2007-09-21 06:24:02 +04:00
closedir ( d ) ;
talloc_free ( s ) ;
return - 1 ;
}
2009-11-29 14:39:37 +03:00
2009-12-02 06:53:21 +03:00
DEBUG ( DEBUG_INFO , ( " Attached to persistent database %s \n " , s ) ) ;
2007-09-21 06:24:02 +04:00
talloc_free ( s ) ;
}
closedir ( d ) ;
2007-06-07 16:06:19 +04:00
return 0 ;
}
2009-11-29 14:39:23 +03:00
int ctdb_attach_databases ( struct ctdb_context * ctdb )
{
int ret ;
2009-11-29 14:39:37 +03:00
char * persistent_health_path = NULL ;
char * unhealthy_reason = NULL ;
bool first_try = true ;
2009-11-29 14:39:23 +03:00
if ( ctdb - > db_directory = = NULL ) {
ctdb - > db_directory = VARDIR " /ctdb " ;
}
if ( ctdb - > db_directory_persistent = = NULL ) {
ctdb - > db_directory_persistent = VARDIR " /ctdb/persistent " ;
}
if ( ctdb - > db_directory_state = = NULL ) {
ctdb - > db_directory_state = VARDIR " /ctdb/state " ;
}
/* make sure the db directory exists */
ret = mkdir ( ctdb - > db_directory , 0700 ) ;
if ( ret = = - 1 & & errno ! = EEXIST ) {
DEBUG ( DEBUG_CRIT , ( __location__ " Unable to create ctdb directory '%s' \n " ,
ctdb - > db_directory ) ) ;
return - 1 ;
}
/* make sure the persistent db directory exists */
ret = mkdir ( ctdb - > db_directory_persistent , 0700 ) ;
if ( ret = = - 1 & & errno ! = EEXIST ) {
DEBUG ( DEBUG_CRIT , ( __location__ " Unable to create ctdb persistent directory '%s' \n " ,
ctdb - > db_directory_persistent ) ) ;
return - 1 ;
}
/* make sure the internal state db directory exists */
ret = mkdir ( ctdb - > db_directory_state , 0700 ) ;
if ( ret = = - 1 & & errno ! = EEXIST ) {
DEBUG ( DEBUG_CRIT , ( __location__ " Unable to create ctdb state directory '%s' \n " ,
ctdb - > db_directory_state ) ) ;
return - 1 ;
}
2009-11-29 14:39:37 +03:00
persistent_health_path = talloc_asprintf ( ctdb , " %s/%s.%u " ,
ctdb - > db_directory_state ,
PERSISTENT_HEALTH_TDB ,
ctdb - > pnn ) ;
if ( persistent_health_path = = NULL ) {
DEBUG ( DEBUG_CRIT , ( __location__ " talloc_asprintf() failed \n " ) ) ;
return - 1 ;
}
again :
ctdb - > db_persistent_health = tdb_wrap_open ( ctdb , persistent_health_path ,
0 , TDB_DISALLOW_NESTING ,
O_CREAT | O_RDWR , 0600 ) ;
if ( ctdb - > db_persistent_health = = NULL ) {
struct tdb_wrap * tdb ;
if ( ! first_try ) {
DEBUG ( DEBUG_CRIT , ( " Failed to open tdb '%s': %d - %s \n " ,
persistent_health_path ,
errno ,
strerror ( errno ) ) ) ;
talloc_free ( persistent_health_path ) ;
talloc_free ( unhealthy_reason ) ;
return - 1 ;
}
first_try = false ;
unhealthy_reason = talloc_asprintf ( ctdb , " WARNING - '%s' %s - %s " ,
persistent_health_path ,
" was cleared after a failure " ,
" manual verification needed " ) ;
if ( unhealthy_reason = = NULL ) {
DEBUG ( DEBUG_CRIT , ( __location__ " talloc_asprintf() failed \n " ) ) ;
talloc_free ( persistent_health_path ) ;
return - 1 ;
}
DEBUG ( DEBUG_CRIT , ( " Failed to open tdb '%s' - retrying after CLEAR_IF_FIRST \n " ,
persistent_health_path ) ) ;
tdb = tdb_wrap_open ( ctdb , persistent_health_path ,
0 , TDB_CLEAR_IF_FIRST | TDB_DISALLOW_NESTING ,
O_CREAT | O_RDWR , 0600 ) ;
if ( tdb ) {
DEBUG ( DEBUG_CRIT , ( " Failed to open tdb '%s' - with CLEAR_IF_FIRST: %d - %s \n " ,
persistent_health_path ,
errno ,
strerror ( errno ) ) ) ;
talloc_free ( persistent_health_path ) ;
talloc_free ( unhealthy_reason ) ;
return - 1 ;
}
talloc_free ( tdb ) ;
goto again ;
}
ret = tdb_check ( ctdb - > db_persistent_health - > tdb , NULL , NULL ) ;
if ( ret ! = 0 ) {
struct tdb_wrap * tdb ;
talloc_free ( ctdb - > db_persistent_health ) ;
ctdb - > db_persistent_health = NULL ;
if ( ! first_try ) {
DEBUG ( DEBUG_CRIT , ( " tdb_check('%s') failed \n " ,
persistent_health_path ) ) ;
talloc_free ( persistent_health_path ) ;
talloc_free ( unhealthy_reason ) ;
return - 1 ;
}
first_try = false ;
unhealthy_reason = talloc_asprintf ( ctdb , " WARNING - '%s' %s - %s " ,
persistent_health_path ,
" was cleared after a failure " ,
" manual verification needed " ) ;
if ( unhealthy_reason = = NULL ) {
DEBUG ( DEBUG_CRIT , ( __location__ " talloc_asprintf() failed \n " ) ) ;
talloc_free ( persistent_health_path ) ;
return - 1 ;
}
DEBUG ( DEBUG_CRIT , ( " tdb_check('%s') failed - retrying after CLEAR_IF_FIRST \n " ,
persistent_health_path ) ) ;
tdb = tdb_wrap_open ( ctdb , persistent_health_path ,
0 , TDB_CLEAR_IF_FIRST | TDB_DISALLOW_NESTING ,
O_CREAT | O_RDWR , 0600 ) ;
if ( tdb ) {
DEBUG ( DEBUG_CRIT , ( " Failed to open tdb '%s' - with CLEAR_IF_FIRST: %d - %s \n " ,
persistent_health_path ,
errno ,
strerror ( errno ) ) ) ;
talloc_free ( persistent_health_path ) ;
talloc_free ( unhealthy_reason ) ;
return - 1 ;
}
talloc_free ( tdb ) ;
goto again ;
}
talloc_free ( persistent_health_path ) ;
ret = ctdb_attach_persistent ( ctdb , unhealthy_reason ) ;
talloc_free ( unhealthy_reason ) ;
2009-11-29 14:39:23 +03:00
if ( ret ! = 0 ) {
return ret ;
}
return 0 ;
}
2007-06-07 16:06:19 +04:00
/*
called when a broadcast seqnum update comes in
*/
int32_t ctdb_ltdb_update_seqnum ( struct ctdb_context * ctdb , uint32_t db_id , uint32_t srcnode )
{
struct ctdb_db_context * ctdb_db ;
2007-09-04 04:06:36 +04:00
if ( srcnode = = ctdb - > pnn ) {
2007-06-07 16:06:19 +04:00
/* don't update ourselves! */
return 0 ;
}
ctdb_db = find_ctdb_db ( ctdb , db_id ) ;
if ( ! ctdb_db ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_ERR , ( " Unknown db_id 0x%x in ctdb_ltdb_update_seqnum \n " , db_id ) ) ;
2007-06-07 16:06:19 +04:00
return - 1 ;
}
2009-12-07 15:28:11 +03:00
if ( ctdb_db - > unhealthy_reason ) {
DEBUG ( DEBUG_ERR , ( " db(%s) unhealty in ctdb_ltdb_update_seqnum: %s \n " ,
ctdb_db - > db_name , ctdb_db - > unhealthy_reason ) ) ;
return - 1 ;
}
2007-06-07 16:06:19 +04:00
tdb_increment_seqnum_nonblock ( ctdb_db - > ltdb - > tdb ) ;
ctdb_db - > seqnum = tdb_get_seqnum ( ctdb_db - > ltdb - > tdb ) ;
return 0 ;
}
/*
timer to check for seqnum changes in a ltdb and propogate them
*/
static void ctdb_ltdb_seqnum_check ( struct event_context * ev , struct timed_event * te ,
struct timeval t , void * p )
{
struct ctdb_db_context * ctdb_db = talloc_get_type ( p , struct ctdb_db_context ) ;
struct ctdb_context * ctdb = ctdb_db - > ctdb ;
uint32_t new_seqnum = tdb_get_seqnum ( ctdb_db - > ltdb - > tdb ) ;
if ( new_seqnum ! = ctdb_db - > seqnum ) {
/* something has changed - propogate it */
TDB_DATA data ;
data . dptr = ( uint8_t * ) & ctdb_db - > db_id ;
data . dsize = sizeof ( uint32_t ) ;
ctdb_daemon_send_control ( ctdb , CTDB_BROADCAST_VNNMAP , 0 ,
CTDB_CONTROL_UPDATE_SEQNUM , 0 , CTDB_CTRL_FLAG_NOREPLY ,
data , NULL , NULL ) ;
}
ctdb_db - > seqnum = new_seqnum ;
/* setup a new timer */
2009-12-08 19:00:55 +03:00
ctdb_db - > seqnum_update =
2007-06-07 16:06:19 +04:00
event_add_timed ( ctdb - > ev , ctdb_db ,
2009-04-01 10:21:38 +04:00
timeval_current_ofs ( ctdb - > tunable . seqnum_interval / 1000 , ( ctdb - > tunable . seqnum_interval % 1000 ) * 1000 ) ,
2007-06-07 16:06:19 +04:00
ctdb_ltdb_seqnum_check , ctdb_db ) ;
}
/*
enable seqnum handling on this db
*/
int32_t ctdb_ltdb_enable_seqnum ( struct ctdb_context * ctdb , uint32_t db_id )
{
struct ctdb_db_context * ctdb_db ;
ctdb_db = find_ctdb_db ( ctdb , db_id ) ;
if ( ! ctdb_db ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_ERR , ( " Unknown db_id 0x%x in ctdb_ltdb_enable_seqnum \n " , db_id ) ) ;
2007-06-07 16:06:19 +04:00
return - 1 ;
}
2009-12-08 19:00:55 +03:00
if ( ctdb_db - > seqnum_update = = NULL ) {
ctdb_db - > seqnum_update =
2007-06-07 16:06:19 +04:00
event_add_timed ( ctdb - > ev , ctdb_db ,
2009-04-01 10:21:38 +04:00
timeval_current_ofs ( ctdb - > tunable . seqnum_interval / 1000 , ( ctdb - > tunable . seqnum_interval % 1000 ) * 1000 ) ,
2007-06-07 16:06:19 +04:00
ctdb_ltdb_seqnum_check , ctdb_db ) ;
}
tdb_enable_seqnum ( ctdb_db - > ltdb - > tdb ) ;
ctdb_db - > seqnum = tdb_get_seqnum ( ctdb_db - > ltdb - > tdb ) ;
return 0 ;
}
2009-10-10 07:26:09 +04:00
int32_t ctdb_control_set_db_priority ( struct ctdb_context * ctdb , TDB_DATA indata )
{
struct ctdb_db_priority * db_prio = ( struct ctdb_db_priority * ) indata . dptr ;
struct ctdb_db_context * ctdb_db ;
ctdb_db = find_ctdb_db ( ctdb , db_prio - > db_id ) ;
if ( ! ctdb_db ) {
DEBUG ( DEBUG_ERR , ( " Unknown db_id 0x%x in ctdb_set_db_priority \n " , db_prio - > db_id ) ) ;
2011-02-25 02:06:08 +03:00
return 0 ;
2009-10-10 07:26:09 +04:00
}
2009-10-12 05:08:39 +04:00
if ( ( db_prio - > priority < 1 ) | | ( db_prio - > priority > NUM_DB_PRIORITIES ) ) {
DEBUG ( DEBUG_ERR , ( " Trying to set invalid priority : %u \n " , db_prio - > priority ) ) ;
2011-02-25 02:06:08 +03:00
return 0 ;
2009-10-12 05:08:39 +04:00
}
2009-10-10 07:26:09 +04:00
ctdb_db - > priority = db_prio - > priority ;
DEBUG ( DEBUG_INFO , ( " Setting DB priority to %u for db 0x%08x \n " , db_prio - > priority , db_prio - > db_id ) ) ;
return 0 ;
}
2012-03-20 09:58:35 +04:00
int ctdb_set_db_sticky ( struct ctdb_context * ctdb , struct ctdb_db_context * ctdb_db )
{
DEBUG ( DEBUG_NOTICE , ( " set db sticky %s \n " , ctdb_db - > db_name ) ) ;
if ( ctdb_db - > sticky ) {
return 0 ;
}
if ( ctdb_db - > persistent ) {
DEBUG ( DEBUG_ERR , ( " Trying to set persistent database with sticky property \n " ) ) ;
return - 1 ;
}
ctdb_db - > sticky_records = trbt_create ( ctdb_db , 0 ) ;
ctdb_db - > sticky = true ;
return 0 ;
}
2012-02-08 08:29:27 +04:00
int32_t ctdb_control_get_db_statistics ( struct ctdb_context * ctdb ,
uint32_t db_id ,
TDB_DATA * outdata )
{
struct ctdb_db_context * ctdb_db ;
2012-06-13 10:17:18 +04:00
struct ctdb_db_statistics_wire * stats ;
int i ;
int len ;
char * ptr ;
2012-02-08 08:29:27 +04:00
ctdb_db = find_ctdb_db ( ctdb , db_id ) ;
if ( ! ctdb_db ) {
DEBUG ( DEBUG_ERR , ( " Unknown db_id 0x%x in get_db_statistics \n " , db_id ) ) ;
return - 1 ;
}
2012-06-13 10:17:18 +04:00
len = offsetof ( struct ctdb_db_statistics_wire , hot_keys ) ;
for ( i = 0 ; i < MAX_HOT_KEYS ; i + + ) {
len + = 8 + ctdb_db - > statistics . hot_keys [ i ] . key . dsize ;
}
stats = talloc_size ( outdata , len ) ;
if ( stats = = NULL ) {
DEBUG ( DEBUG_ERR , ( " Failed to allocate db statistics wire structure \n " ) ) ;
return - 1 ;
}
stats - > db_ro_delegations = ctdb_db - > statistics . db_ro_delegations ;
stats - > db_ro_revokes = ctdb_db - > statistics . db_ro_revokes ;
for ( i = 0 ; i < MAX_COUNT_BUCKETS ; i + + ) {
stats - > hop_count_bucket [ i ] = ctdb_db - > statistics . hop_count_bucket [ i ] ;
}
stats - > num_hot_keys = MAX_HOT_KEYS ;
ptr = & stats - > hot_keys [ 0 ] ;
for ( i = 0 ; i < MAX_HOT_KEYS ; i + + ) {
* ( uint32_t * ) ptr = ctdb_db - > statistics . hot_keys [ i ] . count ;
ptr + = 4 ;
* ( uint32_t * ) ptr = ctdb_db - > statistics . hot_keys [ i ] . key . dsize ;
ptr + = 4 ;
memcpy ( ptr , ctdb_db - > statistics . hot_keys [ i ] . key . dptr , ctdb_db - > statistics . hot_keys [ i ] . key . dsize ) ;
ptr + = ctdb_db - > statistics . hot_keys [ i ] . key . dsize ;
}
outdata - > dptr = ( uint8_t * ) stats ;
outdata - > dsize = len ;
2012-02-08 08:29:27 +04:00
return 0 ;
}