2012-10-11 04:29:29 +04:00
/*
ctdb lock handling
provide API to do non - blocking locks for single or all databases
Copyright ( C ) Amitay Isaacs 2012
This program is free software ; you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation ; either version 3 of the License , or
( at your option ) any later version .
This program is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
along with this program ; if not , see < http : //www.gnu.org/licenses/>.
*/
# include "includes.h"
# include "include/ctdb_private.h"
# include "include/ctdb_protocol.h"
# include "tevent.h"
# include "tdb.h"
2014-08-15 09:46:33 +04:00
# include "lib/tdb_wrap/tdb_wrap.h"
2012-10-11 04:29:29 +04:00
# include "system/filesys.h"
# include "lib/util/dlinklist.h"
2015-10-23 06:11:53 +03:00
# include "common/system.h"
2012-10-11 04:29:29 +04:00
/*
* Non - blocking Locking API
*
* 1. Create a child process to do blocking locks .
* 2. Once the locks are obtained , signal parent process via fd .
* 3. Invoke registered callback routine with locking status .
* 4. If the child process cannot get locks within certain time ,
2014-07-23 06:52:03 +04:00
* execute an external script to debug .
2012-10-11 04:29:29 +04:00
*
* ctdb_lock_record ( ) - get a lock on a record
* ctdb_lock_db ( ) - get a lock on a DB
* ctdb_lock_alldb_prio ( ) - get a lock on all DBs with given priority
* ctdb_lock_alldb ( ) - get a lock on all DBs
*
* auto_mark - whether to mark / unmark DBs in before / after callback
2015-06-02 06:15:37 +03:00
* = false is used for freezing databases for
* recovery since the recovery cannot start till
* databases are locked on all the nodes .
* = true is used for record locks .
2012-10-11 04:29:29 +04:00
*/
enum lock_type {
LOCK_RECORD ,
LOCK_DB ,
LOCK_ALLDB_PRIO ,
LOCK_ALLDB ,
} ;
2012-11-14 08:51:59 +04:00
static const char * const lock_type_str [ ] = {
" lock_record " ,
" lock_db " ,
" lock_alldb_prio " ,
2014-05-30 07:48:45 +04:00
" lock_alldb " ,
2012-11-14 08:51:59 +04:00
} ;
2012-10-11 04:29:29 +04:00
struct lock_request ;
/* lock_context is the common part for a lock request */
struct lock_context {
struct lock_context * next , * prev ;
enum lock_type type ;
struct ctdb_context * ctdb ;
struct ctdb_db_context * ctdb_db ;
TDB_DATA key ;
uint32_t priority ;
bool auto_mark ;
2014-05-30 09:36:03 +04:00
struct lock_request * request ;
2012-10-11 04:29:29 +04:00
pid_t child ;
int fd [ 2 ] ;
struct tevent_fd * tfd ;
struct tevent_timer * ttimer ;
struct timeval start_time ;
2014-03-10 12:10:29 +04:00
uint32_t key_hash ;
2014-05-30 09:36:03 +04:00
bool can_schedule ;
2012-10-11 04:29:29 +04:00
} ;
/* lock_request is the client specific part for a lock request */
struct lock_request {
struct lock_context * lctx ;
void ( * callback ) ( void * , bool ) ;
void * private_data ;
} ;
2012-07-11 09:15:41 +04:00
/*
* Support samba 3.6 . x ( and older ) versions which do not set db priority .
*
* By default , all databases are set to priority 1. So only when priority
* is set to 1 , check for databases that need higher priority .
*/
2013-01-04 07:32:55 +04:00
static bool later_db ( struct ctdb_context * ctdb , const char * name )
2012-07-11 09:15:41 +04:00
{
2013-01-04 07:32:55 +04:00
if ( ctdb - > tunable . samba3_hack = = 0 ) {
return false ;
}
2012-07-11 09:15:41 +04:00
if ( strstr ( name , " brlock " ) | |
strstr ( name , " g_lock " ) | |
strstr ( name , " notify_onelevel " ) | |
strstr ( name , " serverid " ) | |
strstr ( name , " xattr_tdb " ) ) {
return true ;
}
return false ;
}
2014-08-05 10:45:34 +04:00
int ctdb_db_prio_iterator ( struct ctdb_context * ctdb , uint32_t priority ,
ctdb_db_handler_t handler , void * private_data )
2013-04-30 07:23:59 +04:00
{
struct ctdb_db_context * ctdb_db ;
int ret ;
for ( ctdb_db = ctdb - > db_list ; ctdb_db ; ctdb_db = ctdb_db - > next ) {
if ( ctdb_db - > priority ! = priority ) {
continue ;
}
if ( later_db ( ctdb , ctdb_db - > db_name ) ) {
continue ;
}
2014-08-05 10:37:43 +04:00
ret = handler ( ctdb_db , private_data ) ;
2013-04-30 07:23:59 +04:00
if ( ret ! = 0 ) {
return - 1 ;
}
}
/* If priority != 1, later_db check is not required and can return */
if ( priority ! = 1 ) {
return 0 ;
}
for ( ctdb_db = ctdb - > db_list ; ctdb_db ; ctdb_db = ctdb_db - > next ) {
if ( ! later_db ( ctdb , ctdb_db - > db_name ) ) {
continue ;
}
2014-08-05 10:37:43 +04:00
ret = handler ( ctdb_db , private_data ) ;
2013-04-30 07:23:59 +04:00
if ( ret ! = 0 ) {
return - 1 ;
}
}
return 0 ;
}
2014-08-05 10:49:06 +04:00
int ctdb_db_iterator ( struct ctdb_context * ctdb , ctdb_db_handler_t handler ,
void * private_data )
{
struct ctdb_db_context * ctdb_db ;
int ret ;
for ( ctdb_db = ctdb - > db_list ; ctdb_db ; ctdb_db = ctdb_db - > next ) {
ret = handler ( ctdb_db , private_data ) ;
if ( ret ! = 0 ) {
return - 1 ;
}
}
return 0 ;
}
2013-04-30 07:23:59 +04:00
2012-10-11 04:29:29 +04:00
/*
* lock all databases - mark only
*/
2014-08-05 10:37:43 +04:00
static int db_lock_mark_handler ( struct ctdb_db_context * ctdb_db ,
2013-04-30 08:07:11 +04:00
void * private_data )
{
int tdb_transaction_write_lock_mark ( struct tdb_context * ) ;
2014-08-05 10:37:43 +04:00
DEBUG ( DEBUG_INFO , ( " marking locked database %s \n " , ctdb_db - > db_name ) ) ;
2013-04-30 08:07:11 +04:00
if ( tdb_transaction_write_lock_mark ( ctdb_db - > ltdb - > tdb ) ! = 0 ) {
DEBUG ( DEBUG_ERR , ( " Failed to mark (transaction lock) database %s \n " ,
ctdb_db - > db_name ) ) ;
return - 1 ;
}
if ( tdb_lockall_mark ( ctdb_db - > ltdb - > tdb ) ! = 0 ) {
DEBUG ( DEBUG_ERR , ( " Failed to mark (all lock) database %s \n " ,
ctdb_db - > db_name ) ) ;
return - 1 ;
}
return 0 ;
}
2015-09-10 06:24:43 +03:00
int ctdb_lockdb_mark ( struct ctdb_db_context * ctdb_db )
{
if ( ! ctdb_db_frozen ( ctdb_db ) ) {
DEBUG ( DEBUG_ERR ,
( " Attempt to mark database locked when not frozen \n " ) ) ;
return - 1 ;
}
return db_lock_mark_handler ( ctdb_db , NULL ) ;
}
2012-10-11 04:29:29 +04:00
int ctdb_lockall_mark_prio ( struct ctdb_context * ctdb , uint32_t priority )
{
/*
* This function is only used by the main dameon during recovery .
* At this stage , the databases have already been locked , by a
2014-08-21 06:32:02 +04:00
* dedicated child process .
2012-10-11 04:29:29 +04:00
*/
2014-08-21 06:32:02 +04:00
if ( ! ctdb_db_prio_frozen ( ctdb , priority ) ) {
2012-10-11 04:29:29 +04:00
DEBUG ( DEBUG_ERR , ( " Attempt to mark all databases locked when not frozen \n " ) ) ;
return - 1 ;
}
2014-08-05 10:43:11 +04:00
return ctdb_db_prio_iterator ( ctdb , priority , db_lock_mark_handler , NULL ) ;
2012-10-11 04:29:29 +04:00
}
static int ctdb_lockall_mark ( struct ctdb_context * ctdb )
{
uint32_t priority ;
for ( priority = 1 ; priority < = NUM_DB_PRIORITIES ; priority + + ) {
2014-08-05 10:43:11 +04:00
int ret ;
ret = ctdb_db_prio_iterator ( ctdb , priority ,
db_lock_mark_handler , NULL ) ;
if ( ret ! = 0 ) {
2012-10-11 04:29:29 +04:00
return - 1 ;
}
}
return 0 ;
}
/*
* lock all databases - unmark only
*/
2014-08-05 10:37:43 +04:00
static int db_lock_unmark_handler ( struct ctdb_db_context * ctdb_db ,
2013-04-30 08:16:07 +04:00
void * private_data )
{
int tdb_transaction_write_lock_unmark ( struct tdb_context * ) ;
2014-08-05 10:37:43 +04:00
DEBUG ( DEBUG_INFO , ( " unmarking locked database %s \n " , ctdb_db - > db_name ) ) ;
2013-04-30 08:16:07 +04:00
if ( tdb_transaction_write_lock_unmark ( ctdb_db - > ltdb - > tdb ) ! = 0 ) {
DEBUG ( DEBUG_ERR , ( " Failed to unmark (transaction lock) database %s \n " ,
ctdb_db - > db_name ) ) ;
return - 1 ;
}
if ( tdb_lockall_unmark ( ctdb_db - > ltdb - > tdb ) ! = 0 ) {
DEBUG ( DEBUG_ERR , ( " Failed to unmark (all lock) database %s \n " ,
ctdb_db - > db_name ) ) ;
return - 1 ;
}
return 0 ;
}
2015-09-10 06:24:43 +03:00
int ctdb_lockdb_unmark ( struct ctdb_db_context * ctdb_db )
{
if ( ! ctdb_db_frozen ( ctdb_db ) ) {
DEBUG ( DEBUG_ERR ,
( " Attempt to unmark database locked when not frozen \n " ) ) ;
return - 1 ;
}
return db_lock_unmark_handler ( ctdb_db , NULL ) ;
}
2012-10-11 04:29:29 +04:00
int ctdb_lockall_unmark_prio ( struct ctdb_context * ctdb , uint32_t priority )
{
/*
2014-02-07 17:11:19 +04:00
* This function is only used by the main daemon during recovery .
2012-10-11 04:29:29 +04:00
* At this stage , the databases have already been locked , by a
2014-08-21 06:32:02 +04:00
* dedicated child process .
2012-10-11 04:29:29 +04:00
*/
2014-08-21 06:32:02 +04:00
if ( ! ctdb_db_prio_frozen ( ctdb , priority ) ) {
2012-10-11 04:29:29 +04:00
DEBUG ( DEBUG_ERR , ( " Attempt to unmark all databases locked when not frozen \n " ) ) ;
return - 1 ;
}
2014-08-05 10:43:11 +04:00
return ctdb_db_prio_iterator ( ctdb , priority , db_lock_unmark_handler ,
NULL ) ;
2012-10-11 04:29:29 +04:00
}
static int ctdb_lockall_unmark ( struct ctdb_context * ctdb )
{
uint32_t priority ;
2013-11-11 05:39:27 +04:00
for ( priority = NUM_DB_PRIORITIES ; priority > 0 ; priority - - ) {
2014-08-05 10:43:11 +04:00
int ret ;
ret = ctdb_db_prio_iterator ( ctdb , priority ,
db_lock_unmark_handler , NULL ) ;
if ( ret ! = 0 ) {
2012-10-11 04:29:29 +04:00
return - 1 ;
}
}
return 0 ;
}
2012-07-09 11:37:35 +04:00
static void ctdb_lock_schedule ( struct ctdb_context * ctdb ) ;
2012-10-11 04:29:29 +04:00
/*
* Destructor to kill the child locking process
*/
static int ctdb_lock_context_destructor ( struct lock_context * lock_ctx )
{
2014-08-11 11:08:20 +04:00
if ( lock_ctx - > request ) {
lock_ctx - > request - > lctx = NULL ;
}
2012-10-11 04:29:29 +04:00
if ( lock_ctx - > child > 0 ) {
ctdb_kill ( lock_ctx - > ctdb , lock_ctx - > child , SIGKILL ) ;
2014-07-24 09:56:41 +04:00
if ( lock_ctx - > type = = LOCK_RECORD ) {
DLIST_REMOVE ( lock_ctx - > ctdb_db - > lock_current , lock_ctx ) ;
} else {
DLIST_REMOVE ( lock_ctx - > ctdb - > lock_current , lock_ctx ) ;
}
2013-11-15 08:58:59 +04:00
if ( lock_ctx - > ctdb_db ) {
lock_ctx - > ctdb_db - > lock_num_current - - ;
}
2012-10-11 04:29:29 +04:00
CTDB_DECREMENT_STAT ( lock_ctx - > ctdb , locks . num_current ) ;
2014-07-15 08:49:44 +04:00
if ( lock_ctx - > ctdb_db ) {
2012-10-11 04:29:29 +04:00
CTDB_DECREMENT_DB_STAT ( lock_ctx - > ctdb_db , locks . num_current ) ;
}
} else {
2014-07-24 09:56:41 +04:00
if ( lock_ctx - > type = = LOCK_RECORD ) {
DLIST_REMOVE ( lock_ctx - > ctdb_db - > lock_pending , lock_ctx ) ;
} else {
DLIST_REMOVE ( lock_ctx - > ctdb - > lock_pending , lock_ctx ) ;
}
2012-10-11 04:29:29 +04:00
CTDB_DECREMENT_STAT ( lock_ctx - > ctdb , locks . num_pending ) ;
2014-07-15 08:49:44 +04:00
if ( lock_ctx - > ctdb_db ) {
2012-10-11 04:29:29 +04:00
CTDB_DECREMENT_DB_STAT ( lock_ctx - > ctdb_db , locks . num_pending ) ;
}
}
2012-07-09 11:37:35 +04:00
ctdb_lock_schedule ( lock_ctx - > ctdb ) ;
2012-10-11 04:29:29 +04:00
return 0 ;
}
/*
* Destructor to remove lock request
*/
static int ctdb_lock_request_destructor ( struct lock_request * lock_request )
{
2015-06-01 17:15:11 +03:00
if ( lock_request - > lctx = = NULL ) {
return 0 ;
}
lock_request - > lctx - > request = NULL ;
2014-08-11 11:08:20 +04:00
TALLOC_FREE ( lock_request - > lctx ) ;
2015-06-01 17:15:11 +03:00
2012-10-11 04:29:29 +04:00
return 0 ;
}
/*
* Process all the callbacks waiting for lock
*
* If lock has failed , callback is executed with locked = false
*/
static void process_callbacks ( struct lock_context * lock_ctx , bool locked )
{
2014-05-30 09:36:03 +04:00
struct lock_request * request ;
2015-06-02 13:39:17 +03:00
bool auto_mark = lock_ctx - > auto_mark ;
2012-10-11 04:29:29 +04:00
2015-06-02 13:39:17 +03:00
if ( auto_mark & & locked ) {
2012-10-11 04:29:29 +04:00
switch ( lock_ctx - > type ) {
case LOCK_RECORD :
tdb_chainlock_mark ( lock_ctx - > ctdb_db - > ltdb - > tdb , lock_ctx - > key ) ;
break ;
case LOCK_DB :
2015-09-10 06:24:43 +03:00
ctdb_lockdb_mark ( lock_ctx - > ctdb_db ) ;
2012-10-11 04:29:29 +04:00
break ;
case LOCK_ALLDB_PRIO :
ctdb_lockall_mark_prio ( lock_ctx - > ctdb , lock_ctx - > priority ) ;
break ;
case LOCK_ALLDB :
ctdb_lockall_mark ( lock_ctx - > ctdb ) ;
break ;
}
}
2014-05-30 09:36:03 +04:00
request = lock_ctx - > request ;
2015-06-02 13:39:17 +03:00
if ( auto_mark ) {
2015-06-02 04:25:44 +03:00
/* Since request may be freed in the callback, unset the lock
* context , so request destructor will not free lock context .
*/
request - > lctx = NULL ;
2012-10-11 04:29:29 +04:00
}
2015-05-26 17:45:34 +03:00
/* Since request may be freed in the callback, unset the request */
lock_ctx - > request = NULL ;
2014-05-30 09:36:03 +04:00
request - > callback ( request - > private_data , locked ) ;
2012-10-11 04:29:29 +04:00
2015-06-02 13:39:17 +03:00
if ( ! auto_mark ) {
return ;
}
if ( locked ) {
2012-10-11 04:29:29 +04:00
switch ( lock_ctx - > type ) {
case LOCK_RECORD :
tdb_chainlock_unmark ( lock_ctx - > ctdb_db - > ltdb - > tdb , lock_ctx - > key ) ;
break ;
case LOCK_DB :
2015-09-10 06:24:43 +03:00
ctdb_lockdb_unmark ( lock_ctx - > ctdb_db ) ;
2012-10-11 04:29:29 +04:00
break ;
case LOCK_ALLDB_PRIO :
ctdb_lockall_unmark_prio ( lock_ctx - > ctdb , lock_ctx - > priority ) ;
break ;
case LOCK_ALLDB :
ctdb_lockall_unmark ( lock_ctx - > ctdb ) ;
break ;
}
}
2015-06-02 13:43:17 +03:00
talloc_free ( lock_ctx ) ;
2012-10-11 04:29:29 +04:00
}
static int lock_bucket_id ( double t )
{
2013-07-03 05:01:21 +04:00
double ms = 1.e-3 , s = 1 ;
2012-10-11 04:29:29 +04:00
int id ;
2013-07-03 05:01:21 +04:00
if ( t < 1 * ms ) {
2012-10-11 04:29:29 +04:00
id = 0 ;
2013-07-03 05:01:21 +04:00
} else if ( t < 10 * ms ) {
2012-10-11 04:29:29 +04:00
id = 1 ;
2013-07-03 05:01:21 +04:00
} else if ( t < 100 * ms ) {
2012-10-11 04:29:29 +04:00
id = 2 ;
2013-07-03 05:01:21 +04:00
} else if ( t < 1 * s ) {
2012-10-11 04:29:29 +04:00
id = 3 ;
2013-07-03 05:01:21 +04:00
} else if ( t < 2 * s ) {
2012-10-11 04:29:29 +04:00
id = 4 ;
2013-07-03 05:01:21 +04:00
} else if ( t < 4 * s ) {
2012-10-11 04:29:29 +04:00
id = 5 ;
2013-07-03 05:01:21 +04:00
} else if ( t < 8 * s ) {
2012-10-11 04:29:29 +04:00
id = 6 ;
2013-07-03 05:01:21 +04:00
} else if ( t < 16 * s ) {
2012-10-11 04:29:29 +04:00
id = 7 ;
2013-07-03 05:01:21 +04:00
} else if ( t < 32 * s ) {
2012-10-11 04:29:29 +04:00
id = 8 ;
2013-07-03 05:01:21 +04:00
} else if ( t < 64 * s ) {
id = 9 ;
} else {
id = 10 ;
2012-10-11 04:29:29 +04:00
}
return id ;
}
/*
* Callback routine when the required locks are obtained .
* Called from parent context
*/
static void ctdb_lock_handler ( struct tevent_context * ev ,
struct tevent_fd * tfd ,
uint16_t flags ,
void * private_data )
{
struct lock_context * lock_ctx ;
char c ;
bool locked ;
double t ;
int id ;
lock_ctx = talloc_get_type_abort ( private_data , struct lock_context ) ;
/* cancel the timeout event */
2014-08-04 16:41:06 +04:00
TALLOC_FREE ( lock_ctx - > ttimer ) ;
2012-10-11 04:29:29 +04:00
t = timeval_elapsed ( & lock_ctx - > start_time ) ;
id = lock_bucket_id ( t ) ;
/* Read the status from the child process */
2014-07-30 15:03:53 +04:00
if ( sys_read ( lock_ctx - > fd [ 0 ] , & c , 1 ) ! = 1 ) {
2013-11-11 05:39:27 +04:00
locked = false ;
} else {
locked = ( c = = 0 ? true : false ) ;
}
2012-10-11 04:29:29 +04:00
/* Update statistics */
CTDB_INCREMENT_STAT ( lock_ctx - > ctdb , locks . num_calls ) ;
if ( lock_ctx - > ctdb_db ) {
CTDB_INCREMENT_DB_STAT ( lock_ctx - > ctdb_db , locks . num_calls ) ;
}
if ( locked ) {
if ( lock_ctx - > ctdb_db ) {
2013-07-03 05:46:53 +04:00
CTDB_INCREMENT_STAT ( lock_ctx - > ctdb , locks . buckets [ id ] ) ;
CTDB_UPDATE_LATENCY ( lock_ctx - > ctdb , lock_ctx - > ctdb_db ,
lock_type_str [ lock_ctx - > type ] , locks . latency ,
lock_ctx - > start_time ) ;
2012-11-14 08:51:59 +04:00
CTDB_UPDATE_DB_LATENCY ( lock_ctx - > ctdb_db , lock_type_str [ lock_ctx - > type ] , locks . latency , t ) ;
2012-10-11 04:29:29 +04:00
CTDB_INCREMENT_DB_STAT ( lock_ctx - > ctdb_db , locks . buckets [ id ] ) ;
}
} else {
CTDB_INCREMENT_STAT ( lock_ctx - > ctdb , locks . num_failed ) ;
if ( lock_ctx - > ctdb_db ) {
CTDB_INCREMENT_DB_STAT ( lock_ctx - > ctdb_db , locks . num_failed ) ;
}
}
process_callbacks ( lock_ctx , locked ) ;
}
/*
* Callback routine when required locks are not obtained within timeout
* Called from parent context
*/
static void ctdb_lock_timeout_handler ( struct tevent_context * ev ,
struct tevent_timer * ttimer ,
struct timeval current_time ,
void * private_data )
{
2015-03-06 06:05:23 +03:00
static char debug_locks [ PATH_MAX + 1 ] = " " ;
2012-10-11 04:29:29 +04:00
struct lock_context * lock_ctx ;
struct ctdb_context * ctdb ;
2013-07-08 09:46:53 +04:00
pid_t pid ;
2015-03-04 07:36:05 +03:00
double elapsed_time ;
int new_timer ;
2012-10-11 04:29:29 +04:00
lock_ctx = talloc_get_type_abort ( private_data , struct lock_context ) ;
ctdb = lock_ctx - > ctdb ;
2014-08-11 10:43:07 +04:00
/* If a node stopped/banned, don't spam the logs */
if ( ctdb - > nodes [ ctdb - > pnn ] - > flags & NODE_FLAGS_INACTIVE ) {
2014-09-25 06:44:59 +04:00
lock_ctx - > ttimer = NULL ;
2014-08-11 10:43:07 +04:00
return ;
}
2015-03-04 07:36:05 +03:00
elapsed_time = timeval_elapsed ( & lock_ctx - > start_time ) ;
2014-07-15 08:49:44 +04:00
if ( lock_ctx - > ctdb_db ) {
2013-07-08 09:46:53 +04:00
DEBUG ( DEBUG_WARNING ,
( " Unable to get %s lock on database %s for %.0lf seconds \n " ,
( lock_ctx - > type = = LOCK_RECORD ? " RECORD " : " DB " ) ,
2015-03-04 07:36:05 +03:00
lock_ctx - > ctdb_db - > db_name , elapsed_time ) ) ;
2013-07-08 09:46:53 +04:00
} else {
DEBUG ( DEBUG_WARNING ,
( " Unable to get ALLDB locks for %.0lf seconds \n " ,
2015-03-04 07:36:05 +03:00
elapsed_time ) ) ;
2013-07-08 09:46:53 +04:00
}
2015-03-06 06:05:23 +03:00
if ( ctdb_set_helper ( " lock debugging helper " ,
debug_locks , sizeof ( debug_locks ) ,
" CTDB_DEBUG_LOCKS " ,
getenv ( " CTDB_BASE " ) , " debug_locks.sh " ) ) {
2013-11-19 08:31:39 +04:00
pid = vfork ( ) ;
2013-07-08 09:46:53 +04:00
if ( pid = = 0 ) {
2013-10-01 09:13:29 +04:00
execl ( debug_locks , debug_locks , NULL ) ;
2013-11-19 08:31:39 +04:00
_exit ( 0 ) ;
2013-07-08 09:46:53 +04:00
}
2013-11-19 08:31:39 +04:00
ctdb_track_child ( ctdb , pid ) ;
2013-10-01 09:13:29 +04:00
} else {
DEBUG ( DEBUG_WARNING ,
( __location__
2015-03-06 06:05:23 +03:00
" Unable to setup lock debugging \n " ) ) ;
2012-10-11 04:29:29 +04:00
}
2015-03-04 07:36:05 +03:00
/* Back-off logging if lock is not obtained for a long time */
if ( elapsed_time < 100.0 ) {
new_timer = 10 ;
} else if ( elapsed_time < 1000.0 ) {
new_timer = 100 ;
} else {
new_timer = 1000 ;
}
2012-10-11 04:29:29 +04:00
/* reset the timeout timer */
// talloc_free(lock_ctx->ttimer);
lock_ctx - > ttimer = tevent_add_timer ( ctdb - > ev ,
lock_ctx ,
2015-03-04 07:36:05 +03:00
timeval_current_ofs ( new_timer , 0 ) ,
2012-10-11 04:29:29 +04:00
ctdb_lock_timeout_handler ,
( void * ) lock_ctx ) ;
}
2014-08-05 10:37:43 +04:00
static int db_count_handler ( struct ctdb_db_context * ctdb_db , void * private_data )
2013-04-30 08:32:46 +04:00
{
int * count = ( int * ) private_data ;
2014-12-11 05:16:47 +03:00
( * count ) + = 2 ;
2013-04-30 08:32:46 +04:00
return 0 ;
}
2014-12-11 05:16:47 +03:00
static int db_flags ( struct ctdb_db_context * ctdb_db )
{
int tdb_flags = TDB_DEFAULT ;
# ifdef TDB_MUTEX_LOCKING
if ( ! ctdb_db - > persistent & & ctdb_db - > ctdb - > tunable . mutex_enabled ) {
tdb_flags = ( TDB_MUTEX_LOCKING | TDB_CLEAR_IF_FIRST ) ;
}
# endif
return tdb_flags ;
}
2013-04-30 08:32:46 +04:00
struct db_namelist {
2014-08-13 08:46:31 +04:00
const char * * names ;
2013-04-30 08:32:46 +04:00
int n ;
} ;
2014-08-05 10:37:43 +04:00
static int db_name_handler ( struct ctdb_db_context * ctdb_db , void * private_data )
2013-04-30 08:32:46 +04:00
{
struct db_namelist * list = ( struct db_namelist * ) private_data ;
list - > names [ list - > n ] = talloc_strdup ( list - > names , ctdb_db - > db_path ) ;
2014-12-11 05:16:47 +03:00
list - > names [ list - > n + 1 ] = talloc_asprintf ( list - > names , " 0x%x " ,
db_flags ( ctdb_db ) ) ;
list - > n + = 2 ;
2013-04-30 08:32:46 +04:00
return 0 ;
}
2014-08-13 08:46:31 +04:00
static bool lock_helper_args ( TALLOC_CTX * mem_ctx ,
struct lock_context * lock_ctx , int fd ,
int * argc , const char * * * argv )
2013-04-30 08:32:46 +04:00
{
struct ctdb_context * ctdb = lock_ctx - > ctdb ;
2014-08-13 08:46:31 +04:00
const char * * args = NULL ;
2013-04-30 08:32:46 +04:00
int nargs , i ;
int priority ;
struct db_namelist list ;
switch ( lock_ctx - > type ) {
case LOCK_RECORD :
2014-12-11 05:16:47 +03:00
nargs = 6 ;
2013-04-30 08:32:46 +04:00
break ;
case LOCK_DB :
2014-12-11 05:16:47 +03:00
nargs = 5 ;
2013-04-30 08:32:46 +04:00
break ;
case LOCK_ALLDB_PRIO :
2014-08-13 09:01:54 +04:00
nargs = 3 ;
2014-08-05 10:43:11 +04:00
ctdb_db_prio_iterator ( ctdb , lock_ctx - > priority ,
db_count_handler , & nargs ) ;
2013-04-30 08:32:46 +04:00
break ;
case LOCK_ALLDB :
2014-08-13 09:01:54 +04:00
nargs = 3 ;
2013-04-30 08:32:46 +04:00
for ( priority = 1 ; priority < NUM_DB_PRIORITIES ; priority + + ) {
2014-08-05 10:43:11 +04:00
ctdb_db_prio_iterator ( ctdb , priority ,
db_count_handler , & nargs ) ;
2013-04-30 08:32:46 +04:00
}
break ;
}
/* Add extra argument for null termination */
nargs + + ;
2014-08-13 08:46:31 +04:00
args = talloc_array ( mem_ctx , const char * , nargs ) ;
2013-04-30 08:32:46 +04:00
if ( args = = NULL ) {
2014-08-13 08:46:31 +04:00
return false ;
2013-04-30 08:32:46 +04:00
}
2014-08-13 09:01:54 +04:00
args [ 0 ] = talloc_asprintf ( args , " %d " , getpid ( ) ) ;
args [ 1 ] = talloc_asprintf ( args , " %d " , fd ) ;
2013-04-30 08:32:46 +04:00
switch ( lock_ctx - > type ) {
case LOCK_RECORD :
2014-08-13 09:01:54 +04:00
args [ 2 ] = talloc_strdup ( args , " RECORD " ) ;
args [ 3 ] = talloc_strdup ( args , lock_ctx - > ctdb_db - > db_path ) ;
2014-12-11 05:16:47 +03:00
args [ 4 ] = talloc_asprintf ( args , " 0x%x " ,
db_flags ( lock_ctx - > ctdb_db ) ) ;
2013-04-30 08:32:46 +04:00
if ( lock_ctx - > key . dsize = = 0 ) {
2014-12-11 05:16:47 +03:00
args [ 5 ] = talloc_strdup ( args , " NULL " ) ;
2013-04-30 08:32:46 +04:00
} else {
2014-12-11 05:16:47 +03:00
args [ 5 ] = hex_encode_talloc ( args , lock_ctx - > key . dptr , lock_ctx - > key . dsize ) ;
2013-04-30 08:32:46 +04:00
}
break ;
case LOCK_DB :
2014-08-13 09:01:54 +04:00
args [ 2 ] = talloc_strdup ( args , " DB " ) ;
args [ 3 ] = talloc_strdup ( args , lock_ctx - > ctdb_db - > db_path ) ;
2014-12-11 05:16:47 +03:00
args [ 4 ] = talloc_asprintf ( args , " 0x%x " ,
db_flags ( lock_ctx - > ctdb_db ) ) ;
2013-04-30 08:32:46 +04:00
break ;
case LOCK_ALLDB_PRIO :
2014-08-13 09:01:54 +04:00
args [ 2 ] = talloc_strdup ( args , " DB " ) ;
2013-04-30 08:32:46 +04:00
list . names = args ;
2014-08-13 09:01:54 +04:00
list . n = 3 ;
2014-08-05 10:43:11 +04:00
ctdb_db_prio_iterator ( ctdb , lock_ctx - > priority ,
db_name_handler , & list ) ;
2013-04-30 08:32:46 +04:00
break ;
case LOCK_ALLDB :
2014-08-13 09:01:54 +04:00
args [ 2 ] = talloc_strdup ( args , " DB " ) ;
2013-04-30 08:32:46 +04:00
list . names = args ;
2014-08-13 09:01:54 +04:00
list . n = 3 ;
2013-04-30 08:32:46 +04:00
for ( priority = 1 ; priority < NUM_DB_PRIORITIES ; priority + + ) {
2014-08-05 10:43:11 +04:00
ctdb_db_prio_iterator ( ctdb , priority ,
db_name_handler , & list ) ;
2013-04-30 08:32:46 +04:00
}
break ;
}
/* Make sure last argument is NULL */
args [ nargs - 1 ] = NULL ;
for ( i = 0 ; i < nargs - 1 ; i + + ) {
if ( args [ i ] = = NULL ) {
talloc_free ( args ) ;
2014-08-13 08:46:31 +04:00
return false ;
2013-04-30 08:32:46 +04:00
}
}
2014-08-13 08:46:31 +04:00
* argc = nargs ;
* argv = args ;
return true ;
2013-04-30 08:32:46 +04:00
}
2014-07-24 09:56:41 +04:00
/*
* Find a lock request that can be scheduled
*/
2014-09-04 07:28:34 +04:00
static struct lock_context * ctdb_find_lock_context ( struct ctdb_context * ctdb )
2014-07-24 09:56:41 +04:00
{
struct lock_context * lock_ctx , * next_ctx ;
struct ctdb_db_context * ctdb_db ;
/* First check if there are database lock requests */
2014-08-04 17:57:12 +04:00
for ( lock_ctx = ctdb - > lock_pending ; lock_ctx ! = NULL ;
lock_ctx = next_ctx ) {
if ( lock_ctx - > request ! = NULL ) {
/* Found a lock context with a request */
return lock_ctx ;
2014-07-24 09:56:41 +04:00
}
2014-08-04 17:57:12 +04:00
next_ctx = lock_ctx - > next ;
DEBUG ( DEBUG_INFO , ( " Removing lock context without lock "
" request \n " ) ) ;
DLIST_REMOVE ( ctdb - > lock_pending , lock_ctx ) ;
CTDB_DECREMENT_STAT ( ctdb , locks . num_pending ) ;
if ( lock_ctx - > ctdb_db ) {
CTDB_DECREMENT_DB_STAT ( lock_ctx - > ctdb_db ,
locks . num_pending ) ;
}
talloc_free ( lock_ctx ) ;
2014-07-24 09:56:41 +04:00
}
/* Next check database queues */
for ( ctdb_db = ctdb - > db_list ; ctdb_db ; ctdb_db = ctdb_db - > next ) {
2014-08-04 17:57:12 +04:00
if ( ctdb_db - > lock_num_current = =
ctdb - > tunable . lock_processes_per_db ) {
2014-07-24 09:56:41 +04:00
continue ;
}
2014-08-04 17:57:12 +04:00
for ( lock_ctx = ctdb_db - > lock_pending ; lock_ctx ! = NULL ;
lock_ctx = next_ctx ) {
2014-07-24 09:56:41 +04:00
next_ctx = lock_ctx - > next ;
2014-08-04 17:57:12 +04:00
if ( lock_ctx - > request ! = NULL ) {
return lock_ctx ;
}
2014-07-24 09:56:41 +04:00
2014-08-04 17:57:12 +04:00
DEBUG ( DEBUG_INFO , ( " Removing lock context without "
" lock request \n " ) ) ;
DLIST_REMOVE ( ctdb_db - > lock_pending , lock_ctx ) ;
CTDB_DECREMENT_STAT ( ctdb , locks . num_pending ) ;
CTDB_DECREMENT_DB_STAT ( ctdb_db , locks . num_pending ) ;
talloc_free ( lock_ctx ) ;
2014-07-24 09:56:41 +04:00
}
}
return NULL ;
}
2013-04-30 08:32:46 +04:00
2012-10-11 04:29:29 +04:00
/*
* Schedule a new lock child process
* Set up callback handler and timeout handler
*/
static void ctdb_lock_schedule ( struct ctdb_context * ctdb )
{
2014-07-24 09:56:41 +04:00
struct lock_context * lock_ctx ;
2014-08-13 08:46:31 +04:00
int ret , argc ;
2013-04-30 09:07:49 +04:00
TALLOC_CTX * tmp_ctx ;
2015-03-06 06:05:23 +03:00
static char prog [ PATH_MAX + 1 ] = " " ;
2014-08-13 08:46:31 +04:00
const char * * args ;
2012-10-11 04:29:29 +04:00
2015-03-06 06:05:23 +03:00
if ( ! ctdb_set_helper ( " lock helper " ,
prog , sizeof ( prog ) ,
" CTDB_LOCK_HELPER " ,
CTDB_HELPER_BINDIR , " ctdb_lock_helper " ) ) {
ctdb_die ( ctdb , __location__
" Unable to set lock helper \n " ) ;
2013-05-08 07:45:55 +04:00
}
2012-10-11 04:29:29 +04:00
/* Find a lock context with requests */
2014-07-24 09:56:41 +04:00
lock_ctx = ctdb_find_lock_context ( ctdb ) ;
2012-10-11 04:29:29 +04:00
if ( lock_ctx = = NULL ) {
return ;
}
lock_ctx - > child = - 1 ;
ret = pipe ( lock_ctx - > fd ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_ERR , ( " Failed to create pipe in ctdb_lock_schedule \n " ) ) ;
return ;
}
2013-04-30 09:07:49 +04:00
set_close_on_exec ( lock_ctx - > fd [ 0 ] ) ;
/* Create data for child process */
tmp_ctx = talloc_new ( lock_ctx ) ;
if ( tmp_ctx = = NULL ) {
DEBUG ( DEBUG_ERR , ( " Failed to allocate memory for helper args \n " ) ) ;
close ( lock_ctx - > fd [ 0 ] ) ;
close ( lock_ctx - > fd [ 1 ] ) ;
return ;
}
/* Create arguments for lock helper */
2014-08-13 08:46:31 +04:00
if ( ! lock_helper_args ( tmp_ctx , lock_ctx , lock_ctx - > fd [ 1 ] ,
& argc , & args ) ) {
2013-04-30 09:07:49 +04:00
DEBUG ( DEBUG_ERR , ( " Failed to create lock helper args \n " ) ) ;
close ( lock_ctx - > fd [ 0 ] ) ;
close ( lock_ctx - > fd [ 1 ] ) ;
talloc_free ( tmp_ctx ) ;
return ;
}
2014-08-13 09:01:54 +04:00
if ( ! ctdb_vfork_with_logging ( lock_ctx , ctdb , " lock_helper " ,
prog , argc , ( const char * * ) args ,
NULL , NULL , & lock_ctx - > child ) ) {
2012-10-11 04:29:29 +04:00
DEBUG ( DEBUG_ERR , ( " Failed to create a child in ctdb_lock_schedule \n " ) ) ;
close ( lock_ctx - > fd [ 0 ] ) ;
close ( lock_ctx - > fd [ 1 ] ) ;
2013-04-30 09:07:49 +04:00
talloc_free ( tmp_ctx ) ;
2012-10-11 04:29:29 +04:00
return ;
}
/* Parent process */
close ( lock_ctx - > fd [ 1 ] ) ;
2013-04-30 09:07:49 +04:00
talloc_free ( tmp_ctx ) ;
2012-10-11 04:29:29 +04:00
/* Set up timeout handler */
lock_ctx - > ttimer = tevent_add_timer ( ctdb - > ev ,
lock_ctx ,
timeval_current_ofs ( 10 , 0 ) ,
ctdb_lock_timeout_handler ,
( void * ) lock_ctx ) ;
if ( lock_ctx - > ttimer = = NULL ) {
ctdb_kill ( ctdb , lock_ctx - > child , SIGKILL ) ;
lock_ctx - > child = - 1 ;
close ( lock_ctx - > fd [ 0 ] ) ;
return ;
}
/* Set up callback */
lock_ctx - > tfd = tevent_add_fd ( ctdb - > ev ,
lock_ctx ,
lock_ctx - > fd [ 0 ] ,
EVENT_FD_READ ,
ctdb_lock_handler ,
( void * ) lock_ctx ) ;
if ( lock_ctx - > tfd = = NULL ) {
TALLOC_FREE ( lock_ctx - > ttimer ) ;
ctdb_kill ( ctdb , lock_ctx - > child , SIGKILL ) ;
lock_ctx - > child = - 1 ;
close ( lock_ctx - > fd [ 0 ] ) ;
return ;
}
tevent_fd_set_auto_close ( lock_ctx - > tfd ) ;
/* Move the context from pending to current */
2014-07-24 09:56:41 +04:00
if ( lock_ctx - > type = = LOCK_RECORD ) {
DLIST_REMOVE ( lock_ctx - > ctdb_db - > lock_pending , lock_ctx ) ;
DLIST_ADD_END ( lock_ctx - > ctdb_db - > lock_current , lock_ctx , NULL ) ;
} else {
DLIST_REMOVE ( ctdb - > lock_pending , lock_ctx ) ;
DLIST_ADD_END ( ctdb - > lock_current , lock_ctx , NULL ) ;
}
2014-07-15 08:38:52 +04:00
CTDB_DECREMENT_STAT ( lock_ctx - > ctdb , locks . num_pending ) ;
2014-07-15 08:38:12 +04:00
CTDB_INCREMENT_STAT ( lock_ctx - > ctdb , locks . num_current ) ;
2013-11-15 08:58:59 +04:00
if ( lock_ctx - > ctdb_db ) {
lock_ctx - > ctdb_db - > lock_num_current + + ;
2014-07-15 08:38:52 +04:00
CTDB_DECREMENT_DB_STAT ( lock_ctx - > ctdb_db , locks . num_pending ) ;
2013-11-15 11:36:09 +04:00
CTDB_INCREMENT_DB_STAT ( lock_ctx - > ctdb_db , locks . num_current ) ;
2013-11-15 08:58:59 +04:00
}
2012-10-11 04:29:29 +04:00
}
/*
* Lock record / db depending on type
*/
2014-08-11 11:08:20 +04:00
static struct lock_request * ctdb_lock_internal ( TALLOC_CTX * mem_ctx ,
struct ctdb_context * ctdb ,
2012-10-11 04:29:29 +04:00
struct ctdb_db_context * ctdb_db ,
TDB_DATA key ,
uint32_t priority ,
void ( * callback ) ( void * , bool ) ,
void * private_data ,
enum lock_type type ,
bool auto_mark )
{
2013-11-18 08:48:22 +04:00
struct lock_context * lock_ctx = NULL ;
2012-10-11 04:29:29 +04:00
struct lock_request * request ;
if ( callback = = NULL ) {
2013-05-01 06:55:22 +04:00
DEBUG ( DEBUG_WARNING , ( " No callback function specified, not locking \n " ) ) ;
2012-10-11 04:29:29 +04:00
return NULL ;
}
2014-05-30 09:36:28 +04:00
lock_ctx = talloc_zero ( ctdb , struct lock_context ) ;
2012-10-11 04:29:29 +04:00
if ( lock_ctx = = NULL ) {
2014-05-30 09:36:28 +04:00
DEBUG ( DEBUG_ERR , ( " Failed to create a new lock context \n " ) ) ;
return NULL ;
}
2012-10-11 04:29:29 +04:00
2014-08-11 11:08:20 +04:00
if ( ( request = talloc_zero ( mem_ctx , struct lock_request ) ) = = NULL ) {
2014-05-30 09:49:46 +04:00
talloc_free ( lock_ctx ) ;
return NULL ;
}
2014-05-30 09:36:28 +04:00
lock_ctx - > type = type ;
lock_ctx - > ctdb = ctdb ;
lock_ctx - > ctdb_db = ctdb_db ;
lock_ctx - > key . dsize = key . dsize ;
if ( key . dsize > 0 ) {
lock_ctx - > key . dptr = talloc_memdup ( lock_ctx , key . dptr , key . dsize ) ;
if ( lock_ctx - > key . dptr = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " Memory allocation error \n " ) ) ;
talloc_free ( lock_ctx ) ;
2015-06-02 04:15:11 +03:00
talloc_free ( request ) ;
2014-05-30 09:36:28 +04:00
return NULL ;
2012-10-11 04:29:29 +04:00
}
2014-05-30 09:36:28 +04:00
lock_ctx - > key_hash = ctdb_hash ( & key ) ;
} else {
lock_ctx - > key . dptr = NULL ;
}
lock_ctx - > priority = priority ;
lock_ctx - > auto_mark = auto_mark ;
2012-10-11 04:29:29 +04:00
2014-05-30 09:49:46 +04:00
lock_ctx - > request = request ;
2014-05-30 09:36:28 +04:00
lock_ctx - > child = - 1 ;
2012-10-11 04:29:29 +04:00
2014-07-15 08:13:25 +04:00
/* Non-record locks are required by recovery and should be scheduled
* immediately , so keep them at the head of the pending queue .
*/
if ( lock_ctx - > type = = LOCK_RECORD ) {
2014-07-24 09:56:41 +04:00
DLIST_ADD_END ( ctdb_db - > lock_pending , lock_ctx , NULL ) ;
2014-07-15 08:13:25 +04:00
} else {
2014-07-24 09:56:41 +04:00
DLIST_ADD_END ( ctdb - > lock_pending , lock_ctx , NULL ) ;
2014-07-15 08:13:25 +04:00
}
2014-05-30 09:36:28 +04:00
CTDB_INCREMENT_STAT ( ctdb , locks . num_pending ) ;
if ( ctdb_db ) {
CTDB_INCREMENT_DB_STAT ( ctdb_db , locks . num_pending ) ;
2012-10-11 04:29:29 +04:00
}
2014-05-30 09:36:28 +04:00
/* Start the timer when we activate the context */
lock_ctx - > start_time = timeval_current ( ) ;
2012-10-11 04:29:29 +04:00
request - > lctx = lock_ctx ;
request - > callback = callback ;
request - > private_data = private_data ;
talloc_set_destructor ( request , ctdb_lock_request_destructor ) ;
2015-06-01 17:22:07 +03:00
talloc_set_destructor ( lock_ctx , ctdb_lock_context_destructor ) ;
2012-10-11 04:29:29 +04:00
ctdb_lock_schedule ( ctdb ) ;
return request ;
}
/*
* obtain a lock on a record in a database
*/
2014-08-11 11:08:20 +04:00
struct lock_request * ctdb_lock_record ( TALLOC_CTX * mem_ctx ,
struct ctdb_db_context * ctdb_db ,
2012-10-11 04:29:29 +04:00
TDB_DATA key ,
bool auto_mark ,
void ( * callback ) ( void * , bool ) ,
void * private_data )
{
2014-08-11 11:08:20 +04:00
return ctdb_lock_internal ( mem_ctx ,
ctdb_db - > ctdb ,
2012-10-11 04:29:29 +04:00
ctdb_db ,
key ,
0 ,
callback ,
private_data ,
LOCK_RECORD ,
auto_mark ) ;
}
/*
* obtain a lock on a database
*/
2014-08-11 11:08:20 +04:00
struct lock_request * ctdb_lock_db ( TALLOC_CTX * mem_ctx ,
struct ctdb_db_context * ctdb_db ,
2012-10-11 04:29:29 +04:00
bool auto_mark ,
void ( * callback ) ( void * , bool ) ,
void * private_data )
{
2014-08-11 11:08:20 +04:00
return ctdb_lock_internal ( mem_ctx ,
ctdb_db - > ctdb ,
2012-10-11 04:29:29 +04:00
ctdb_db ,
tdb_null ,
0 ,
callback ,
private_data ,
LOCK_DB ,
auto_mark ) ;
}
/*
* obtain locks on all databases of specified priority
*/
2014-08-11 11:08:20 +04:00
struct lock_request * ctdb_lock_alldb_prio ( TALLOC_CTX * mem_ctx ,
struct ctdb_context * ctdb ,
2012-10-11 04:29:29 +04:00
uint32_t priority ,
bool auto_mark ,
void ( * callback ) ( void * , bool ) ,
void * private_data )
{
2013-11-11 05:39:27 +04:00
if ( priority < 1 | | priority > NUM_DB_PRIORITIES ) {
2012-10-11 04:29:29 +04:00
DEBUG ( DEBUG_ERR , ( " Invalid db priority: %u \n " , priority ) ) ;
return NULL ;
}
2014-08-11 11:08:20 +04:00
return ctdb_lock_internal ( mem_ctx ,
ctdb ,
2012-10-11 04:29:29 +04:00
NULL ,
tdb_null ,
priority ,
callback ,
private_data ,
LOCK_ALLDB_PRIO ,
auto_mark ) ;
}
/*
* obtain locks on all databases
*/
2014-08-11 11:08:20 +04:00
struct lock_request * ctdb_lock_alldb ( TALLOC_CTX * mem_ctx ,
struct ctdb_context * ctdb ,
2012-10-11 04:29:29 +04:00
bool auto_mark ,
void ( * callback ) ( void * , bool ) ,
void * private_data )
{
2014-08-11 11:08:20 +04:00
return ctdb_lock_internal ( mem_ctx ,
ctdb ,
2012-10-11 04:29:29 +04:00
NULL ,
tdb_null ,
0 ,
callback ,
private_data ,
LOCK_ALLDB ,
auto_mark ) ;
}