2012-10-11 11:29:29 +11:00
/*
ctdb lock handling
provide API to do non - blocking locks for single or all databases
Copyright ( C ) Amitay Isaacs 2012
This program is free software ; you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation ; either version 3 of the License , or
( at your option ) any later version .
This program is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
along with this program ; if not , see < http : //www.gnu.org/licenses/>.
*/
2015-10-26 16:50:46 +11:00
# include "replace.h"
2012-10-11 11:29:29 +11:00
# include "system/filesys.h"
2015-10-26 16:50:46 +11:00
# include "system/network.h"
# include <talloc.h>
# include <tevent.h>
# include "lib/tdb_wrap/tdb_wrap.h"
2012-10-11 11:29:29 +11:00
# include "lib/util/dlinklist.h"
2015-10-26 16:50:46 +11:00
# include "lib/util/debug.h"
# include "lib/util/samba_util.h"
2016-11-29 12:55:06 +11:00
# include "lib/util/sys_rw.h"
2015-10-26 16:50:46 +11:00
# include "ctdb_private.h"
2015-10-23 14:17:34 +11:00
# include "common/common.h"
2015-11-11 15:41:10 +11:00
# include "common/logging.h"
2012-10-11 11:29:29 +11:00
/*
* Non - blocking Locking API
*
* 1. Create a child process to do blocking locks .
* 2. Once the locks are obtained , signal parent process via fd .
* 3. Invoke registered callback routine with locking status .
* 4. If the child process cannot get locks within certain time ,
2014-07-23 12:52:03 +10:00
* execute an external script to debug .
2012-10-11 11:29:29 +11:00
*
* ctdb_lock_record ( ) - get a lock on a record
* ctdb_lock_db ( ) - get a lock on a DB
*
* auto_mark - whether to mark / unmark DBs in before / after callback
2015-06-02 13:15:37 +10:00
* = false is used for freezing databases for
* recovery since the recovery cannot start till
* databases are locked on all the nodes .
* = true is used for record locks .
2012-10-11 11:29:29 +11:00
*/
enum lock_type {
LOCK_RECORD ,
LOCK_DB ,
} ;
2012-11-14 15:51:59 +11:00
static const char * const lock_type_str [ ] = {
" lock_record " ,
" lock_db " ,
} ;
2012-10-11 11:29:29 +11:00
struct lock_request ;
/* lock_context is the common part for a lock request */
struct lock_context {
struct lock_context * next , * prev ;
enum lock_type type ;
struct ctdb_context * ctdb ;
struct ctdb_db_context * ctdb_db ;
TDB_DATA key ;
uint32_t priority ;
bool auto_mark ;
2014-05-30 15:36:03 +10:00
struct lock_request * request ;
2012-10-11 11:29:29 +11:00
pid_t child ;
int fd [ 2 ] ;
struct tevent_fd * tfd ;
struct tevent_timer * ttimer ;
struct timeval start_time ;
2014-03-10 19:10:29 +11:00
uint32_t key_hash ;
2014-05-30 15:36:03 +10:00
bool can_schedule ;
2012-10-11 11:29:29 +11:00
} ;
/* lock_request is the client specific part for a lock request */
struct lock_request {
struct lock_context * lctx ;
void ( * callback ) ( void * , bool ) ;
void * private_data ;
} ;
2014-08-05 16:49:06 +10:00
int ctdb_db_iterator ( struct ctdb_context * ctdb , ctdb_db_handler_t handler ,
void * private_data )
{
struct ctdb_db_context * ctdb_db ;
int ret ;
for ( ctdb_db = ctdb - > db_list ; ctdb_db ; ctdb_db = ctdb_db - > next ) {
ret = handler ( ctdb_db , private_data ) ;
if ( ret ! = 0 ) {
return - 1 ;
}
}
return 0 ;
}
2013-04-30 13:23:59 +10:00
2012-10-11 11:29:29 +11:00
/*
* lock all databases - mark only
*/
2014-08-05 16:37:43 +10:00
static int db_lock_mark_handler ( struct ctdb_db_context * ctdb_db ,
2013-04-30 14:07:11 +10:00
void * private_data )
{
int tdb_transaction_write_lock_mark ( struct tdb_context * ) ;
2014-08-05 16:37:43 +10:00
DEBUG ( DEBUG_INFO , ( " marking locked database %s \n " , ctdb_db - > db_name ) ) ;
2013-04-30 14:07:11 +10:00
if ( tdb_transaction_write_lock_mark ( ctdb_db - > ltdb - > tdb ) ! = 0 ) {
DEBUG ( DEBUG_ERR , ( " Failed to mark (transaction lock) database %s \n " ,
ctdb_db - > db_name ) ) ;
return - 1 ;
}
if ( tdb_lockall_mark ( ctdb_db - > ltdb - > tdb ) ! = 0 ) {
DEBUG ( DEBUG_ERR , ( " Failed to mark (all lock) database %s \n " ,
ctdb_db - > db_name ) ) ;
return - 1 ;
}
return 0 ;
}
2015-09-10 13:24:43 +10:00
int ctdb_lockdb_mark ( struct ctdb_db_context * ctdb_db )
{
if ( ! ctdb_db_frozen ( ctdb_db ) ) {
DEBUG ( DEBUG_ERR ,
( " Attempt to mark database locked when not frozen \n " ) ) ;
return - 1 ;
}
return db_lock_mark_handler ( ctdb_db , NULL ) ;
}
2012-10-11 11:29:29 +11:00
/*
* lock all databases - unmark only
*/
2014-08-05 16:37:43 +10:00
static int db_lock_unmark_handler ( struct ctdb_db_context * ctdb_db ,
2013-04-30 14:16:07 +10:00
void * private_data )
{
int tdb_transaction_write_lock_unmark ( struct tdb_context * ) ;
2014-08-05 16:37:43 +10:00
DEBUG ( DEBUG_INFO , ( " unmarking locked database %s \n " , ctdb_db - > db_name ) ) ;
2013-04-30 14:16:07 +10:00
if ( tdb_transaction_write_lock_unmark ( ctdb_db - > ltdb - > tdb ) ! = 0 ) {
DEBUG ( DEBUG_ERR , ( " Failed to unmark (transaction lock) database %s \n " ,
ctdb_db - > db_name ) ) ;
return - 1 ;
}
if ( tdb_lockall_unmark ( ctdb_db - > ltdb - > tdb ) ! = 0 ) {
DEBUG ( DEBUG_ERR , ( " Failed to unmark (all lock) database %s \n " ,
ctdb_db - > db_name ) ) ;
return - 1 ;
}
return 0 ;
}
2015-09-10 13:24:43 +10:00
int ctdb_lockdb_unmark ( struct ctdb_db_context * ctdb_db )
{
if ( ! ctdb_db_frozen ( ctdb_db ) ) {
DEBUG ( DEBUG_ERR ,
( " Attempt to unmark database locked when not frozen \n " ) ) ;
return - 1 ;
}
return db_lock_unmark_handler ( ctdb_db , NULL ) ;
}
2012-07-09 17:37:35 +10:00
static void ctdb_lock_schedule ( struct ctdb_context * ctdb ) ;
2012-10-11 11:29:29 +11:00
/*
* Destructor to kill the child locking process
*/
static int ctdb_lock_context_destructor ( struct lock_context * lock_ctx )
{
2014-08-11 17:08:20 +10:00
if ( lock_ctx - > request ) {
lock_ctx - > request - > lctx = NULL ;
}
2012-10-11 11:29:29 +11:00
if ( lock_ctx - > child > 0 ) {
2016-11-29 17:20:45 +11:00
ctdb_kill ( lock_ctx - > ctdb , lock_ctx - > child , SIGTERM ) ;
2014-07-24 15:56:41 +10:00
if ( lock_ctx - > type = = LOCK_RECORD ) {
DLIST_REMOVE ( lock_ctx - > ctdb_db - > lock_current , lock_ctx ) ;
} else {
DLIST_REMOVE ( lock_ctx - > ctdb - > lock_current , lock_ctx ) ;
}
2013-11-15 15:58:59 +11:00
if ( lock_ctx - > ctdb_db ) {
lock_ctx - > ctdb_db - > lock_num_current - - ;
}
2012-10-11 11:29:29 +11:00
CTDB_DECREMENT_STAT ( lock_ctx - > ctdb , locks . num_current ) ;
2014-07-15 14:49:44 +10:00
if ( lock_ctx - > ctdb_db ) {
2012-10-11 11:29:29 +11:00
CTDB_DECREMENT_DB_STAT ( lock_ctx - > ctdb_db , locks . num_current ) ;
}
} else {
2014-07-24 15:56:41 +10:00
if ( lock_ctx - > type = = LOCK_RECORD ) {
DLIST_REMOVE ( lock_ctx - > ctdb_db - > lock_pending , lock_ctx ) ;
} else {
DLIST_REMOVE ( lock_ctx - > ctdb - > lock_pending , lock_ctx ) ;
}
2012-10-11 11:29:29 +11:00
CTDB_DECREMENT_STAT ( lock_ctx - > ctdb , locks . num_pending ) ;
2014-07-15 14:49:44 +10:00
if ( lock_ctx - > ctdb_db ) {
2012-10-11 11:29:29 +11:00
CTDB_DECREMENT_DB_STAT ( lock_ctx - > ctdb_db , locks . num_pending ) ;
}
}
2012-07-09 17:37:35 +10:00
ctdb_lock_schedule ( lock_ctx - > ctdb ) ;
2012-10-11 11:29:29 +11:00
return 0 ;
}
/*
* Destructor to remove lock request
*/
static int ctdb_lock_request_destructor ( struct lock_request * lock_request )
{
2015-06-02 00:15:11 +10:00
if ( lock_request - > lctx = = NULL ) {
return 0 ;
}
lock_request - > lctx - > request = NULL ;
2014-08-11 17:08:20 +10:00
TALLOC_FREE ( lock_request - > lctx ) ;
2015-06-02 00:15:11 +10:00
2012-10-11 11:29:29 +11:00
return 0 ;
}
/*
* Process all the callbacks waiting for lock
*
* If lock has failed , callback is executed with locked = false
*/
static void process_callbacks ( struct lock_context * lock_ctx , bool locked )
{
2014-05-30 15:36:03 +10:00
struct lock_request * request ;
2015-06-02 12:39:17 +02:00
bool auto_mark = lock_ctx - > auto_mark ;
2012-10-11 11:29:29 +11:00
2015-06-02 12:39:17 +02:00
if ( auto_mark & & locked ) {
2012-10-11 11:29:29 +11:00
switch ( lock_ctx - > type ) {
case LOCK_RECORD :
tdb_chainlock_mark ( lock_ctx - > ctdb_db - > ltdb - > tdb , lock_ctx - > key ) ;
break ;
case LOCK_DB :
2015-09-10 13:24:43 +10:00
ctdb_lockdb_mark ( lock_ctx - > ctdb_db ) ;
2012-10-11 11:29:29 +11:00
break ;
}
}
2014-05-30 15:36:03 +10:00
request = lock_ctx - > request ;
2015-06-02 12:39:17 +02:00
if ( auto_mark ) {
2015-06-02 11:25:44 +10:00
/* Since request may be freed in the callback, unset the lock
* context , so request destructor will not free lock context .
*/
request - > lctx = NULL ;
2012-10-11 11:29:29 +11:00
}
2015-05-26 16:45:34 +02:00
/* Since request may be freed in the callback, unset the request */
lock_ctx - > request = NULL ;
2014-05-30 15:36:03 +10:00
request - > callback ( request - > private_data , locked ) ;
2012-10-11 11:29:29 +11:00
2015-06-02 12:39:17 +02:00
if ( ! auto_mark ) {
return ;
}
if ( locked ) {
2012-10-11 11:29:29 +11:00
switch ( lock_ctx - > type ) {
case LOCK_RECORD :
tdb_chainlock_unmark ( lock_ctx - > ctdb_db - > ltdb - > tdb , lock_ctx - > key ) ;
break ;
case LOCK_DB :
2015-09-10 13:24:43 +10:00
ctdb_lockdb_unmark ( lock_ctx - > ctdb_db ) ;
2012-10-11 11:29:29 +11:00
break ;
}
}
2015-06-02 12:43:17 +02:00
talloc_free ( lock_ctx ) ;
2012-10-11 11:29:29 +11:00
}
static int lock_bucket_id ( double t )
{
2013-07-03 11:01:21 +10:00
double ms = 1.e-3 , s = 1 ;
2012-10-11 11:29:29 +11:00
int id ;
2013-07-03 11:01:21 +10:00
if ( t < 1 * ms ) {
2012-10-11 11:29:29 +11:00
id = 0 ;
2013-07-03 11:01:21 +10:00
} else if ( t < 10 * ms ) {
2012-10-11 11:29:29 +11:00
id = 1 ;
2013-07-03 11:01:21 +10:00
} else if ( t < 100 * ms ) {
2012-10-11 11:29:29 +11:00
id = 2 ;
2013-07-03 11:01:21 +10:00
} else if ( t < 1 * s ) {
2012-10-11 11:29:29 +11:00
id = 3 ;
2013-07-03 11:01:21 +10:00
} else if ( t < 2 * s ) {
2012-10-11 11:29:29 +11:00
id = 4 ;
2013-07-03 11:01:21 +10:00
} else if ( t < 4 * s ) {
2012-10-11 11:29:29 +11:00
id = 5 ;
2013-07-03 11:01:21 +10:00
} else if ( t < 8 * s ) {
2012-10-11 11:29:29 +11:00
id = 6 ;
2013-07-03 11:01:21 +10:00
} else if ( t < 16 * s ) {
2012-10-11 11:29:29 +11:00
id = 7 ;
2013-07-03 11:01:21 +10:00
} else if ( t < 32 * s ) {
2012-10-11 11:29:29 +11:00
id = 8 ;
2013-07-03 11:01:21 +10:00
} else if ( t < 64 * s ) {
id = 9 ;
} else {
id = 10 ;
2012-10-11 11:29:29 +11:00
}
return id ;
}
/*
* Callback routine when the required locks are obtained .
* Called from parent context
*/
static void ctdb_lock_handler ( struct tevent_context * ev ,
struct tevent_fd * tfd ,
uint16_t flags ,
void * private_data )
{
struct lock_context * lock_ctx ;
char c ;
bool locked ;
double t ;
int id ;
lock_ctx = talloc_get_type_abort ( private_data , struct lock_context ) ;
/* cancel the timeout event */
2014-08-04 12:41:06 +00:00
TALLOC_FREE ( lock_ctx - > ttimer ) ;
2012-10-11 11:29:29 +11:00
t = timeval_elapsed ( & lock_ctx - > start_time ) ;
id = lock_bucket_id ( t ) ;
/* Read the status from the child process */
2014-07-30 21:03:53 +10:00
if ( sys_read ( lock_ctx - > fd [ 0 ] , & c , 1 ) ! = 1 ) {
2013-11-11 12:39:27 +11:00
locked = false ;
} else {
locked = ( c = = 0 ? true : false ) ;
}
2012-10-11 11:29:29 +11:00
/* Update statistics */
CTDB_INCREMENT_STAT ( lock_ctx - > ctdb , locks . num_calls ) ;
if ( lock_ctx - > ctdb_db ) {
CTDB_INCREMENT_DB_STAT ( lock_ctx - > ctdb_db , locks . num_calls ) ;
}
if ( locked ) {
if ( lock_ctx - > ctdb_db ) {
2013-07-03 11:46:53 +10:00
CTDB_INCREMENT_STAT ( lock_ctx - > ctdb , locks . buckets [ id ] ) ;
CTDB_UPDATE_LATENCY ( lock_ctx - > ctdb , lock_ctx - > ctdb_db ,
lock_type_str [ lock_ctx - > type ] , locks . latency ,
lock_ctx - > start_time ) ;
2012-11-14 15:51:59 +11:00
CTDB_UPDATE_DB_LATENCY ( lock_ctx - > ctdb_db , lock_type_str [ lock_ctx - > type ] , locks . latency , t ) ;
2012-10-11 11:29:29 +11:00
CTDB_INCREMENT_DB_STAT ( lock_ctx - > ctdb_db , locks . buckets [ id ] ) ;
}
} else {
CTDB_INCREMENT_STAT ( lock_ctx - > ctdb , locks . num_failed ) ;
if ( lock_ctx - > ctdb_db ) {
CTDB_INCREMENT_DB_STAT ( lock_ctx - > ctdb_db , locks . num_failed ) ;
}
}
process_callbacks ( lock_ctx , locked ) ;
}
2017-06-07 16:44:24 +10:00
struct lock_log_entry {
struct db_hash_context * lock_log ;
TDB_DATA key ;
unsigned long log_sec ;
struct tevent_timer * timer ;
} ;
static int lock_log_fetch_parser ( uint8_t * keybuf , size_t keylen ,
uint8_t * databuf , size_t datalen ,
void * private_data )
{
struct lock_log_entry * * entry =
( struct lock_log_entry * * ) private_data ;
if ( datalen ! = sizeof ( struct lock_log_entry * ) ) {
return EINVAL ;
}
* entry = talloc_get_type_abort ( * ( void * * ) databuf ,
struct lock_log_entry ) ;
return 0 ;
}
static void lock_log_cleanup ( struct tevent_context * ev ,
struct tevent_timer * ttimer ,
struct timeval current_time ,
void * private_data )
{
struct lock_log_entry * entry = talloc_get_type_abort (
private_data , struct lock_log_entry ) ;
int ret ;
entry - > timer = NULL ;
ret = db_hash_delete ( entry - > lock_log , entry - > key . dptr ,
entry - > key . dsize ) ;
if ( ret ! = 0 ) {
return ;
}
talloc_free ( entry ) ;
}
static bool lock_log_skip ( struct tevent_context * ev ,
struct db_hash_context * lock_log ,
TDB_DATA key , unsigned long elapsed_sec )
{
struct lock_log_entry * entry = NULL ;
int ret ;
ret = db_hash_fetch ( lock_log , key . dptr , key . dsize ,
lock_log_fetch_parser , & entry ) ;
if ( ret = = ENOENT ) {
entry = talloc_zero ( lock_log , struct lock_log_entry ) ;
if ( entry = = NULL ) {
goto fail ;
}
entry - > lock_log = lock_log ;
entry - > key . dptr = talloc_memdup ( entry , key . dptr , key . dsize ) ;
if ( entry - > key . dptr = = NULL ) {
talloc_free ( entry ) ;
goto fail ;
}
entry - > key . dsize = key . dsize ;
entry - > log_sec = elapsed_sec ;
entry - > timer = tevent_add_timer ( ev , entry ,
timeval_current_ofs ( 30 , 0 ) ,
lock_log_cleanup , entry ) ;
if ( entry - > timer = = NULL ) {
talloc_free ( entry ) ;
goto fail ;
}
ret = db_hash_add ( lock_log , key . dptr , key . dsize ,
( uint8_t * ) & entry ,
sizeof ( struct lock_log_entry * ) ) ;
if ( ret ! = 0 ) {
talloc_free ( entry ) ;
goto fail ;
}
return false ;
} else if ( ret = = EINVAL ) {
ret = db_hash_delete ( lock_log , key . dptr , key . dsize ) ;
if ( ret ! = 0 ) {
goto fail ;
}
return false ;
} else if ( ret = = 0 ) {
if ( elapsed_sec < = entry - > log_sec ) {
return true ;
}
entry - > log_sec = elapsed_sec ;
TALLOC_FREE ( entry - > timer ) ;
entry - > timer = tevent_add_timer ( ev , entry ,
timeval_current_ofs ( 30 , 0 ) ,
lock_log_cleanup , entry ) ;
if ( entry - > timer = = NULL ) {
ret = db_hash_delete ( lock_log , key . dptr , key . dsize ) ;
if ( ret ! = 0 ) {
goto fail ;
}
talloc_free ( entry ) ;
}
return false ;
}
fail :
return false ;
}
2012-10-11 11:29:29 +11:00
/*
* Callback routine when required locks are not obtained within timeout
* Called from parent context
*/
static void ctdb_lock_timeout_handler ( struct tevent_context * ev ,
struct tevent_timer * ttimer ,
struct timeval current_time ,
void * private_data )
{
2015-03-06 14:05:23 +11:00
static char debug_locks [ PATH_MAX + 1 ] = " " ;
2012-10-11 11:29:29 +11:00
struct lock_context * lock_ctx ;
struct ctdb_context * ctdb ;
2013-07-08 15:46:53 +10:00
pid_t pid ;
2015-03-04 15:36:05 +11:00
double elapsed_time ;
2017-06-07 16:44:24 +10:00
bool skip ;
2017-06-13 15:32:36 +10:00
char * keystr ;
2012-10-11 11:29:29 +11:00
lock_ctx = talloc_get_type_abort ( private_data , struct lock_context ) ;
ctdb = lock_ctx - > ctdb ;
2015-03-04 15:36:05 +11:00
elapsed_time = timeval_elapsed ( & lock_ctx - > start_time ) ;
2017-06-07 16:44:24 +10:00
/* For database locks, always log */
if ( lock_ctx - > type = = LOCK_DB ) {
DEBUG ( DEBUG_WARNING ,
( " Unable to get DB lock on database %s for "
" %.0lf seconds \n " ,
lock_ctx - > ctdb_db - > db_name , elapsed_time ) ) ;
goto lock_debug ;
}
/* For record locks, check if we have already logged */
skip = lock_log_skip ( ev , lock_ctx - > ctdb_db - > lock_log ,
lock_ctx - > key , ( unsigned long ) elapsed_time ) ;
if ( skip ) {
goto skip_lock_debug ;
}
2017-06-13 15:32:36 +10:00
keystr = hex_encode_talloc ( lock_ctx , lock_ctx - > key . dptr ,
lock_ctx - > key . dsize ) ;
2017-06-07 16:45:50 +10:00
DEBUG ( DEBUG_WARNING ,
2017-06-13 15:32:36 +10:00
( " Unable to get RECORD lock on database %s for %.0lf seconds "
" (key %s) \n " ,
lock_ctx - > ctdb_db - > db_name , elapsed_time ,
keystr ? keystr : " " ) ) ;
TALLOC_FREE ( keystr ) ;
2013-07-08 15:46:53 +10:00
2016-09-22 13:58:06 +10:00
/* If a node stopped/banned, don't spam the logs */
if ( ctdb - > nodes [ ctdb - > pnn ] - > flags & NODE_FLAGS_INACTIVE ) {
goto skip_lock_debug ;
}
2017-06-07 16:44:24 +10:00
lock_debug :
2016-09-22 14:06:44 +10:00
2015-03-06 14:05:23 +11:00
if ( ctdb_set_helper ( " lock debugging helper " ,
debug_locks , sizeof ( debug_locks ) ,
" CTDB_DEBUG_LOCKS " ,
getenv ( " CTDB_BASE " ) , " debug_locks.sh " ) ) {
2013-11-19 15:31:39 +11:00
pid = vfork ( ) ;
2013-07-08 15:46:53 +10:00
if ( pid = = 0 ) {
2013-10-01 15:13:29 +10:00
execl ( debug_locks , debug_locks , NULL ) ;
2013-11-19 15:31:39 +11:00
_exit ( 0 ) ;
2013-07-08 15:46:53 +10:00
}
2013-11-19 15:31:39 +11:00
ctdb_track_child ( ctdb , pid ) ;
2013-10-01 15:13:29 +10:00
} else {
DEBUG ( DEBUG_WARNING ,
( __location__
2015-03-06 14:05:23 +11:00
" Unable to setup lock debugging \n " ) ) ;
2012-10-11 11:29:29 +11:00
}
2016-09-22 13:58:06 +10:00
skip_lock_debug :
2012-10-11 11:29:29 +11:00
/* reset the timeout timer */
// talloc_free(lock_ctx->ttimer);
lock_ctx - > ttimer = tevent_add_timer ( ctdb - > ev ,
lock_ctx ,
2017-06-07 16:44:24 +10:00
timeval_current_ofs ( 10 , 0 ) ,
2012-10-11 11:29:29 +11:00
ctdb_lock_timeout_handler ,
( void * ) lock_ctx ) ;
}
2014-08-13 14:46:31 +10:00
static bool lock_helper_args ( TALLOC_CTX * mem_ctx ,
struct lock_context * lock_ctx , int fd ,
int * argc , const char * * * argv )
2013-04-30 14:32:46 +10:00
{
2014-08-13 14:46:31 +10:00
const char * * args = NULL ;
2016-07-19 16:47:57 +10:00
int nargs = 0 , i ;
2013-04-30 14:32:46 +10:00
switch ( lock_ctx - > type ) {
case LOCK_RECORD :
2014-12-11 13:16:47 +11:00
nargs = 6 ;
2013-04-30 14:32:46 +10:00
break ;
case LOCK_DB :
2014-12-11 13:16:47 +11:00
nargs = 5 ;
2013-04-30 14:32:46 +10:00
break ;
}
/* Add extra argument for null termination */
nargs + + ;
2014-08-13 14:46:31 +10:00
args = talloc_array ( mem_ctx , const char * , nargs ) ;
2013-04-30 14:32:46 +10:00
if ( args = = NULL ) {
2014-08-13 14:46:31 +10:00
return false ;
2013-04-30 14:32:46 +10:00
}
2014-08-13 15:01:54 +10:00
args [ 0 ] = talloc_asprintf ( args , " %d " , getpid ( ) ) ;
args [ 1 ] = talloc_asprintf ( args , " %d " , fd ) ;
2013-04-30 14:32:46 +10:00
switch ( lock_ctx - > type ) {
case LOCK_RECORD :
2014-08-13 15:01:54 +10:00
args [ 2 ] = talloc_strdup ( args , " RECORD " ) ;
args [ 3 ] = talloc_strdup ( args , lock_ctx - > ctdb_db - > db_path ) ;
2014-12-11 13:16:47 +11:00
args [ 4 ] = talloc_asprintf ( args , " 0x%x " ,
2017-03-02 15:37:19 +11:00
tdb_get_flags ( lock_ctx - > ctdb_db - > ltdb - > tdb ) ) ;
2013-04-30 14:32:46 +10:00
if ( lock_ctx - > key . dsize = = 0 ) {
2014-12-11 13:16:47 +11:00
args [ 5 ] = talloc_strdup ( args , " NULL " ) ;
2013-04-30 14:32:46 +10:00
} else {
2014-12-11 13:16:47 +11:00
args [ 5 ] = hex_encode_talloc ( args , lock_ctx - > key . dptr , lock_ctx - > key . dsize ) ;
2013-04-30 14:32:46 +10:00
}
break ;
case LOCK_DB :
2014-08-13 15:01:54 +10:00
args [ 2 ] = talloc_strdup ( args , " DB " ) ;
args [ 3 ] = talloc_strdup ( args , lock_ctx - > ctdb_db - > db_path ) ;
2014-12-11 13:16:47 +11:00
args [ 4 ] = talloc_asprintf ( args , " 0x%x " ,
2017-03-02 15:37:19 +11:00
tdb_get_flags ( lock_ctx - > ctdb_db - > ltdb - > tdb ) ) ;
2013-04-30 14:32:46 +10:00
break ;
}
/* Make sure last argument is NULL */
args [ nargs - 1 ] = NULL ;
for ( i = 0 ; i < nargs - 1 ; i + + ) {
if ( args [ i ] = = NULL ) {
talloc_free ( args ) ;
2014-08-13 14:46:31 +10:00
return false ;
2013-04-30 14:32:46 +10:00
}
}
2014-08-13 14:46:31 +10:00
* argc = nargs ;
* argv = args ;
return true ;
2013-04-30 14:32:46 +10:00
}
2014-07-24 15:56:41 +10:00
/*
* Find a lock request that can be scheduled
*/
2014-09-04 13:28:34 +10:00
static struct lock_context * ctdb_find_lock_context ( struct ctdb_context * ctdb )
2014-07-24 15:56:41 +10:00
{
struct lock_context * lock_ctx , * next_ctx ;
struct ctdb_db_context * ctdb_db ;
/* First check if there are database lock requests */
2014-08-04 13:57:12 +00:00
for ( lock_ctx = ctdb - > lock_pending ; lock_ctx ! = NULL ;
lock_ctx = next_ctx ) {
if ( lock_ctx - > request ! = NULL ) {
/* Found a lock context with a request */
return lock_ctx ;
2014-07-24 15:56:41 +10:00
}
2014-08-04 13:57:12 +00:00
next_ctx = lock_ctx - > next ;
DEBUG ( DEBUG_INFO , ( " Removing lock context without lock "
" request \n " ) ) ;
DLIST_REMOVE ( ctdb - > lock_pending , lock_ctx ) ;
CTDB_DECREMENT_STAT ( ctdb , locks . num_pending ) ;
if ( lock_ctx - > ctdb_db ) {
CTDB_DECREMENT_DB_STAT ( lock_ctx - > ctdb_db ,
locks . num_pending ) ;
}
talloc_free ( lock_ctx ) ;
2014-07-24 15:56:41 +10:00
}
/* Next check database queues */
for ( ctdb_db = ctdb - > db_list ; ctdb_db ; ctdb_db = ctdb_db - > next ) {
2014-08-04 13:57:12 +00:00
if ( ctdb_db - > lock_num_current = =
ctdb - > tunable . lock_processes_per_db ) {
2014-07-24 15:56:41 +10:00
continue ;
}
2014-08-04 13:57:12 +00:00
for ( lock_ctx = ctdb_db - > lock_pending ; lock_ctx ! = NULL ;
lock_ctx = next_ctx ) {
2014-07-24 15:56:41 +10:00
next_ctx = lock_ctx - > next ;
2014-08-04 13:57:12 +00:00
if ( lock_ctx - > request ! = NULL ) {
return lock_ctx ;
}
2014-07-24 15:56:41 +10:00
2014-08-04 13:57:12 +00:00
DEBUG ( DEBUG_INFO , ( " Removing lock context without "
" lock request \n " ) ) ;
DLIST_REMOVE ( ctdb_db - > lock_pending , lock_ctx ) ;
CTDB_DECREMENT_STAT ( ctdb , locks . num_pending ) ;
CTDB_DECREMENT_DB_STAT ( ctdb_db , locks . num_pending ) ;
talloc_free ( lock_ctx ) ;
2014-07-24 15:56:41 +10:00
}
}
return NULL ;
}
2013-04-30 14:32:46 +10:00
2012-10-11 11:29:29 +11:00
/*
* Schedule a new lock child process
* Set up callback handler and timeout handler
*/
static void ctdb_lock_schedule ( struct ctdb_context * ctdb )
{
2014-07-24 15:56:41 +10:00
struct lock_context * lock_ctx ;
2014-08-13 14:46:31 +10:00
int ret , argc ;
2013-04-30 15:07:49 +10:00
TALLOC_CTX * tmp_ctx ;
2015-03-06 14:05:23 +11:00
static char prog [ PATH_MAX + 1 ] = " " ;
2014-08-13 14:46:31 +10:00
const char * * args ;
2012-10-11 11:29:29 +11:00
2015-03-06 14:05:23 +11:00
if ( ! ctdb_set_helper ( " lock helper " ,
prog , sizeof ( prog ) ,
" CTDB_LOCK_HELPER " ,
CTDB_HELPER_BINDIR , " ctdb_lock_helper " ) ) {
ctdb_die ( ctdb , __location__
" Unable to set lock helper \n " ) ;
2013-05-08 13:45:55 +10:00
}
2012-10-11 11:29:29 +11:00
/* Find a lock context with requests */
2014-07-24 15:56:41 +10:00
lock_ctx = ctdb_find_lock_context ( ctdb ) ;
2012-10-11 11:29:29 +11:00
if ( lock_ctx = = NULL ) {
return ;
}
lock_ctx - > child = - 1 ;
ret = pipe ( lock_ctx - > fd ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_ERR , ( " Failed to create pipe in ctdb_lock_schedule \n " ) ) ;
return ;
}
2013-04-30 15:07:49 +10:00
set_close_on_exec ( lock_ctx - > fd [ 0 ] ) ;
/* Create data for child process */
tmp_ctx = talloc_new ( lock_ctx ) ;
if ( tmp_ctx = = NULL ) {
DEBUG ( DEBUG_ERR , ( " Failed to allocate memory for helper args \n " ) ) ;
close ( lock_ctx - > fd [ 0 ] ) ;
close ( lock_ctx - > fd [ 1 ] ) ;
return ;
}
2016-06-17 18:35:18 +10:00
if ( ! ctdb - > do_setsched ) {
ret = setenv ( " CTDB_NOSETSCHED " , " 1 " , 1 ) ;
if ( ret ! = 0 ) {
DEBUG ( DEBUG_WARNING ,
( " Failed to set CTDB_NOSETSCHED variable \n " ) ) ;
}
}
2013-04-30 15:07:49 +10:00
/* Create arguments for lock helper */
2014-08-13 14:46:31 +10:00
if ( ! lock_helper_args ( tmp_ctx , lock_ctx , lock_ctx - > fd [ 1 ] ,
& argc , & args ) ) {
2013-04-30 15:07:49 +10:00
DEBUG ( DEBUG_ERR , ( " Failed to create lock helper args \n " ) ) ;
close ( lock_ctx - > fd [ 0 ] ) ;
close ( lock_ctx - > fd [ 1 ] ) ;
talloc_free ( tmp_ctx ) ;
return ;
}
2016-11-30 12:22:02 +11:00
lock_ctx - > child = ctdb_vfork_exec ( lock_ctx , ctdb , prog , argc ,
( const char * * ) args ) ;
if ( lock_ctx - > child = = - 1 ) {
2012-10-11 11:29:29 +11:00
DEBUG ( DEBUG_ERR , ( " Failed to create a child in ctdb_lock_schedule \n " ) ) ;
close ( lock_ctx - > fd [ 0 ] ) ;
close ( lock_ctx - > fd [ 1 ] ) ;
2013-04-30 15:07:49 +10:00
talloc_free ( tmp_ctx ) ;
2012-10-11 11:29:29 +11:00
return ;
}
/* Parent process */
close ( lock_ctx - > fd [ 1 ] ) ;
2013-04-30 15:07:49 +10:00
talloc_free ( tmp_ctx ) ;
2012-10-11 11:29:29 +11:00
/* Set up timeout handler */
lock_ctx - > ttimer = tevent_add_timer ( ctdb - > ev ,
lock_ctx ,
timeval_current_ofs ( 10 , 0 ) ,
ctdb_lock_timeout_handler ,
( void * ) lock_ctx ) ;
if ( lock_ctx - > ttimer = = NULL ) {
2016-11-29 17:20:45 +11:00
ctdb_kill ( ctdb , lock_ctx - > child , SIGTERM ) ;
2012-10-11 11:29:29 +11:00
lock_ctx - > child = - 1 ;
close ( lock_ctx - > fd [ 0 ] ) ;
return ;
}
/* Set up callback */
lock_ctx - > tfd = tevent_add_fd ( ctdb - > ev ,
lock_ctx ,
lock_ctx - > fd [ 0 ] ,
2015-10-26 16:50:09 +11:00
TEVENT_FD_READ ,
2012-10-11 11:29:29 +11:00
ctdb_lock_handler ,
( void * ) lock_ctx ) ;
if ( lock_ctx - > tfd = = NULL ) {
TALLOC_FREE ( lock_ctx - > ttimer ) ;
2016-11-29 17:20:45 +11:00
ctdb_kill ( ctdb , lock_ctx - > child , SIGTERM ) ;
2012-10-11 11:29:29 +11:00
lock_ctx - > child = - 1 ;
close ( lock_ctx - > fd [ 0 ] ) ;
return ;
}
tevent_fd_set_auto_close ( lock_ctx - > tfd ) ;
/* Move the context from pending to current */
2014-07-24 15:56:41 +10:00
if ( lock_ctx - > type = = LOCK_RECORD ) {
DLIST_REMOVE ( lock_ctx - > ctdb_db - > lock_pending , lock_ctx ) ;
2016-02-05 11:32:18 +01:00
DLIST_ADD_END ( lock_ctx - > ctdb_db - > lock_current , lock_ctx ) ;
2014-07-24 15:56:41 +10:00
} else {
DLIST_REMOVE ( ctdb - > lock_pending , lock_ctx ) ;
2016-02-05 11:32:18 +01:00
DLIST_ADD_END ( ctdb - > lock_current , lock_ctx ) ;
2014-07-24 15:56:41 +10:00
}
2014-07-15 14:38:52 +10:00
CTDB_DECREMENT_STAT ( lock_ctx - > ctdb , locks . num_pending ) ;
2014-07-15 14:38:12 +10:00
CTDB_INCREMENT_STAT ( lock_ctx - > ctdb , locks . num_current ) ;
2013-11-15 15:58:59 +11:00
if ( lock_ctx - > ctdb_db ) {
lock_ctx - > ctdb_db - > lock_num_current + + ;
2014-07-15 14:38:52 +10:00
CTDB_DECREMENT_DB_STAT ( lock_ctx - > ctdb_db , locks . num_pending ) ;
2013-11-15 18:36:09 +11:00
CTDB_INCREMENT_DB_STAT ( lock_ctx - > ctdb_db , locks . num_current ) ;
2013-11-15 15:58:59 +11:00
}
2012-10-11 11:29:29 +11:00
}
/*
* Lock record / db depending on type
*/
2014-08-11 17:08:20 +10:00
static struct lock_request * ctdb_lock_internal ( TALLOC_CTX * mem_ctx ,
struct ctdb_context * ctdb ,
2012-10-11 11:29:29 +11:00
struct ctdb_db_context * ctdb_db ,
TDB_DATA key ,
uint32_t priority ,
void ( * callback ) ( void * , bool ) ,
void * private_data ,
enum lock_type type ,
bool auto_mark )
{
2013-11-18 15:48:22 +11:00
struct lock_context * lock_ctx = NULL ;
2012-10-11 11:29:29 +11:00
struct lock_request * request ;
if ( callback = = NULL ) {
2013-05-01 12:55:22 +10:00
DEBUG ( DEBUG_WARNING , ( " No callback function specified, not locking \n " ) ) ;
2012-10-11 11:29:29 +11:00
return NULL ;
}
2014-05-30 15:36:28 +10:00
lock_ctx = talloc_zero ( ctdb , struct lock_context ) ;
2012-10-11 11:29:29 +11:00
if ( lock_ctx = = NULL ) {
2014-05-30 15:36:28 +10:00
DEBUG ( DEBUG_ERR , ( " Failed to create a new lock context \n " ) ) ;
return NULL ;
}
2012-10-11 11:29:29 +11:00
2014-08-11 17:08:20 +10:00
if ( ( request = talloc_zero ( mem_ctx , struct lock_request ) ) = = NULL ) {
2014-05-30 15:49:46 +10:00
talloc_free ( lock_ctx ) ;
return NULL ;
}
2014-05-30 15:36:28 +10:00
lock_ctx - > type = type ;
lock_ctx - > ctdb = ctdb ;
lock_ctx - > ctdb_db = ctdb_db ;
lock_ctx - > key . dsize = key . dsize ;
if ( key . dsize > 0 ) {
lock_ctx - > key . dptr = talloc_memdup ( lock_ctx , key . dptr , key . dsize ) ;
if ( lock_ctx - > key . dptr = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " Memory allocation error \n " ) ) ;
talloc_free ( lock_ctx ) ;
2015-06-02 11:15:11 +10:00
talloc_free ( request ) ;
2014-05-30 15:36:28 +10:00
return NULL ;
2012-10-11 11:29:29 +11:00
}
2014-05-30 15:36:28 +10:00
lock_ctx - > key_hash = ctdb_hash ( & key ) ;
} else {
lock_ctx - > key . dptr = NULL ;
}
lock_ctx - > priority = priority ;
lock_ctx - > auto_mark = auto_mark ;
2012-10-11 11:29:29 +11:00
2014-05-30 15:49:46 +10:00
lock_ctx - > request = request ;
2014-05-30 15:36:28 +10:00
lock_ctx - > child = - 1 ;
2012-10-11 11:29:29 +11:00
2014-07-15 14:13:25 +10:00
/* Non-record locks are required by recovery and should be scheduled
* immediately , so keep them at the head of the pending queue .
*/
if ( lock_ctx - > type = = LOCK_RECORD ) {
2016-02-05 11:32:18 +01:00
DLIST_ADD_END ( ctdb_db - > lock_pending , lock_ctx ) ;
2014-07-15 14:13:25 +10:00
} else {
2016-02-05 11:32:18 +01:00
DLIST_ADD_END ( ctdb - > lock_pending , lock_ctx ) ;
2014-07-15 14:13:25 +10:00
}
2014-05-30 15:36:28 +10:00
CTDB_INCREMENT_STAT ( ctdb , locks . num_pending ) ;
if ( ctdb_db ) {
CTDB_INCREMENT_DB_STAT ( ctdb_db , locks . num_pending ) ;
2012-10-11 11:29:29 +11:00
}
2014-05-30 15:36:28 +10:00
/* Start the timer when we activate the context */
lock_ctx - > start_time = timeval_current ( ) ;
2012-10-11 11:29:29 +11:00
request - > lctx = lock_ctx ;
request - > callback = callback ;
request - > private_data = private_data ;
talloc_set_destructor ( request , ctdb_lock_request_destructor ) ;
2015-06-02 00:22:07 +10:00
talloc_set_destructor ( lock_ctx , ctdb_lock_context_destructor ) ;
2012-10-11 11:29:29 +11:00
ctdb_lock_schedule ( ctdb ) ;
return request ;
}
/*
* obtain a lock on a record in a database
*/
2014-08-11 17:08:20 +10:00
struct lock_request * ctdb_lock_record ( TALLOC_CTX * mem_ctx ,
struct ctdb_db_context * ctdb_db ,
2012-10-11 11:29:29 +11:00
TDB_DATA key ,
bool auto_mark ,
void ( * callback ) ( void * , bool ) ,
void * private_data )
{
2014-08-11 17:08:20 +10:00
return ctdb_lock_internal ( mem_ctx ,
ctdb_db - > ctdb ,
2012-10-11 11:29:29 +11:00
ctdb_db ,
key ,
0 ,
callback ,
private_data ,
LOCK_RECORD ,
auto_mark ) ;
}
/*
* obtain a lock on a database
*/
2014-08-11 17:08:20 +10:00
struct lock_request * ctdb_lock_db ( TALLOC_CTX * mem_ctx ,
struct ctdb_db_context * ctdb_db ,
2012-10-11 11:29:29 +11:00
bool auto_mark ,
void ( * callback ) ( void * , bool ) ,
void * private_data )
{
2014-08-11 17:08:20 +10:00
return ctdb_lock_internal ( mem_ctx ,
ctdb_db - > ctdb ,
2012-10-11 11:29:29 +11:00
ctdb_db ,
tdb_null ,
0 ,
callback ,
private_data ,
LOCK_DB ,
auto_mark ) ;
}