2007-05-03 06:16:03 +04:00
/*
efficient async ctdb traverse
Copyright ( C ) Andrew Tridgell 2007
2007-05-31 07:50:53 +04:00
This program is free software ; you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
2007-07-10 09:29:31 +04:00
the Free Software Foundation ; either version 3 of the License , or
2007-05-31 07:50:53 +04:00
( at your option ) any later version .
This program is distributed in the hope that it will be useful ,
2007-05-03 06:16:03 +04:00
but WITHOUT ANY WARRANTY ; without even the implied warranty of
2007-05-31 07:50:53 +04:00
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
2007-07-10 09:29:31 +04:00
along with this program ; if not , see < http : //www.gnu.org/licenses/>.
2007-05-03 06:16:03 +04:00
*/
2015-10-26 08:50:46 +03:00
# include "replace.h"
2007-05-03 06:16:03 +04:00
# include "system/filesys.h"
2015-10-26 08:50:46 +03:00
# include "system/network.h"
2007-05-03 06:16:03 +04:00
# include "system/wait.h"
2015-10-26 08:50:46 +03:00
# include "system/time.h"
# include <talloc.h>
# include <tevent.h>
2014-08-15 09:46:33 +04:00
# include "lib/tdb_wrap/tdb_wrap.h"
2009-05-06 01:32:25 +04:00
# include "lib/util/dlinklist.h"
2015-10-26 08:50:46 +03:00
# include "lib/util/debug.h"
# include "lib/util/samba_util.h"
2015-09-24 02:10:59 +03:00
# include "lib/util/util_process.h"
2015-10-26 08:50:46 +03:00
# include "ctdb_private.h"
# include "ctdb_client.h"
2015-03-17 06:30:18 +03:00
# include "common/reqid.h"
2015-10-23 06:11:53 +03:00
# include "common/system.h"
2015-10-23 06:17:34 +03:00
# include "common/common.h"
2015-11-11 07:41:10 +03:00
# include "common/logging.h"
2007-05-03 06:16:03 +04:00
typedef void ( * ctdb_traverse_fn_t ) ( void * private_data , TDB_DATA key , TDB_DATA data ) ;
/*
handle returned to caller - freeing this handler will kill the child and
terminate the traverse
*/
2007-05-03 11:12:23 +04:00
struct ctdb_traverse_local_handle {
2009-05-06 01:32:25 +04:00
struct ctdb_traverse_local_handle * next , * prev ;
2007-05-03 06:16:03 +04:00
struct ctdb_db_context * ctdb_db ;
int fd [ 2 ] ;
pid_t child ;
2009-05-06 01:32:25 +04:00
uint64_t srvid ;
uint32_t client_reqid ;
2013-06-06 10:12:07 +04:00
uint32_t reqid ;
int srcnode ;
2007-05-03 06:16:03 +04:00
void * private_data ;
ctdb_traverse_fn_t callback ;
2011-11-28 02:16:33 +04:00
bool withemptyrecords ;
2013-06-06 10:26:25 +04:00
struct tevent_fd * fde ;
2013-09-06 12:11:40 +04:00
int records_failed ;
int records_sent ;
2007-05-03 06:16:03 +04:00
} ;
/*
2013-06-06 10:26:25 +04:00
* called when traverse is completed by child or on error
2007-05-03 06:16:03 +04:00
*/
2013-06-06 10:26:25 +04:00
static void ctdb_traverse_child_handler ( struct tevent_context * ev , struct tevent_fd * fde ,
uint16_t flags , void * private_data )
2007-05-03 06:16:03 +04:00
{
2013-06-06 10:26:25 +04:00
struct ctdb_traverse_local_handle * h = talloc_get_type ( private_data ,
struct ctdb_traverse_local_handle ) ;
2007-05-03 06:16:03 +04:00
ctdb_traverse_fn_t callback = h - > callback ;
void * p = h - > private_data ;
2013-09-06 12:11:40 +04:00
int res ;
ssize_t n ;
/* Read the number of records sent by traverse child */
2014-07-30 15:03:53 +04:00
n = sys_read ( h - > fd [ 0 ] , & res , sizeof ( res ) ) ;
2013-09-06 12:11:40 +04:00
if ( n < 0 | | n ! = sizeof ( res ) ) {
/* Traverse child failed */
DEBUG ( DEBUG_ERR , ( " Local traverse failed db:%s reqid:%d \n " ,
h - > ctdb_db - > db_name , h - > reqid ) ) ;
} else if ( res < 0 ) {
/* Traverse failed */
res = - res ;
DEBUG ( DEBUG_ERR , ( " Local traverse failed db:%s reqid:%d records:%d \n " ,
h - > ctdb_db - > db_name , h - > reqid , res ) ) ;
} else {
DEBUG ( DEBUG_INFO , ( " Local traverse end db:%s reqid:%d records:%d \n " ,
h - > ctdb_db - > db_name , h - > reqid , res ) ) ;
}
2007-05-03 06:16:03 +04:00
2013-06-06 10:26:25 +04:00
callback ( p , tdb_null , tdb_null ) ;
2007-05-03 06:16:03 +04:00
}
/*
destroy a in - flight traverse operation
*/
2007-05-03 11:12:23 +04:00
static int traverse_local_destructor ( struct ctdb_traverse_local_handle * h )
2007-05-03 06:16:03 +04:00
{
2009-05-06 01:32:25 +04:00
DLIST_REMOVE ( h - > ctdb_db - > traverse , h ) ;
2012-05-03 05:42:41 +04:00
ctdb_kill ( h - > ctdb_db - > ctdb , h - > child , SIGKILL ) ;
2007-05-03 06:16:03 +04:00
return 0 ;
}
2007-05-03 11:12:23 +04:00
/*
callback from tdb_traverse_read ( )
*/
static int ctdb_traverse_local_fn ( struct tdb_context * tdb , TDB_DATA key , TDB_DATA data , void * p )
{
2013-06-06 10:26:25 +04:00
struct ctdb_traverse_local_handle * h = talloc_get_type ( p ,
2007-05-03 11:12:23 +04:00
struct ctdb_traverse_local_handle ) ;
2015-10-29 09:30:30 +03:00
struct ctdb_rec_data_old * d ;
2007-05-03 11:12:23 +04:00
struct ctdb_ltdb_header * hdr ;
2013-06-06 10:26:25 +04:00
int res , status ;
TDB_DATA outdata ;
2007-05-03 11:12:23 +04:00
hdr = ( struct ctdb_ltdb_header * ) data . dptr ;
2008-07-16 06:23:18 +04:00
if ( h - > ctdb_db - > persistent = = 0 ) {
/* filter out zero-length records */
2011-11-28 02:16:33 +04:00
if ( ! h - > withemptyrecords & &
data . dsize < = sizeof ( struct ctdb_ltdb_header ) )
{
2008-07-16 06:23:18 +04:00
return 0 ;
}
/* filter out non-authoritative records */
if ( hdr - > dmaster ! = h - > ctdb_db - > ctdb - > pnn ) {
return 0 ;
}
2007-05-03 11:12:23 +04:00
}
2013-06-06 10:26:25 +04:00
d = ctdb_marshall_record ( h , h - > reqid , key , NULL , data ) ;
2007-05-03 11:12:23 +04:00
if ( d = = NULL ) {
/* error handling is tricky in this child code .... */
2013-09-06 12:11:40 +04:00
h - > records_failed + + ;
2007-05-03 11:12:23 +04:00
return - 1 ;
}
2013-06-06 10:26:25 +04:00
outdata . dptr = ( uint8_t * ) d ;
outdata . dsize = d - > length ;
res = ctdb_control ( h - > ctdb_db - > ctdb , h - > srcnode , 0 , CTDB_CONTROL_TRAVERSE_DATA ,
CTDB_CTRL_FLAG_NOREPLY , outdata , NULL , NULL , & status , NULL , NULL ) ;
if ( res ! = 0 | | status ! = 0 ) {
2013-09-06 12:11:40 +04:00
h - > records_failed + + ;
2007-05-03 06:16:03 +04:00
return - 1 ;
}
2013-06-06 10:26:25 +04:00
2013-09-06 12:11:40 +04:00
h - > records_sent + + ;
2007-05-03 06:16:03 +04:00
return 0 ;
}
2009-05-06 01:32:25 +04:00
struct traverse_all_state {
struct ctdb_context * ctdb ;
struct ctdb_traverse_local_handle * h ;
uint32_t reqid ;
uint32_t srcnode ;
uint32_t client_reqid ;
uint64_t srvid ;
2011-11-28 02:16:33 +04:00
bool withemptyrecords ;
2009-05-06 01:32:25 +04:00
} ;
2007-05-03 11:12:23 +04:00
2007-05-03 06:16:03 +04:00
/*
2007-05-03 11:12:23 +04:00
setup a non - blocking traverse of a local ltdb . The callback function
will be called on every record in the local ltdb . To stop the
2011-11-01 17:34:35 +04:00
traverse , talloc_free ( ) the traverse_handle .
2007-05-03 11:12:23 +04:00
The traverse is finished when the callback is called with tdb_null for key and data
2007-05-03 06:16:03 +04:00
*/
2007-06-05 11:57:07 +04:00
static struct ctdb_traverse_local_handle * ctdb_traverse_local ( struct ctdb_db_context * ctdb_db ,
ctdb_traverse_fn_t callback ,
2009-05-06 01:32:25 +04:00
struct traverse_all_state * all_state )
2007-05-03 06:16:03 +04:00
{
2007-05-03 11:12:23 +04:00
struct ctdb_traverse_local_handle * h ;
2007-05-03 06:16:03 +04:00
int ret ;
2009-05-06 01:32:25 +04:00
h = talloc_zero ( all_state , struct ctdb_traverse_local_handle ) ;
2007-05-03 11:12:23 +04:00
if ( h = = NULL ) {
2007-05-03 06:16:03 +04:00
return NULL ;
}
ret = pipe ( h - > fd ) ;
if ( ret ! = 0 ) {
talloc_free ( h ) ;
return NULL ;
}
2011-01-10 05:57:49 +03:00
h - > child = ctdb_fork ( ctdb_db - > ctdb ) ;
2007-05-03 06:16:03 +04:00
if ( h - > child = = ( pid_t ) - 1 ) {
close ( h - > fd [ 0 ] ) ;
close ( h - > fd [ 1 ] ) ;
talloc_free ( h ) ;
return NULL ;
}
h - > callback = callback ;
2009-05-06 01:32:25 +04:00
h - > private_data = all_state ;
2007-05-03 06:16:03 +04:00
h - > ctdb_db = ctdb_db ;
2009-05-06 01:32:25 +04:00
h - > client_reqid = all_state - > client_reqid ;
2013-06-06 10:12:07 +04:00
h - > reqid = all_state - > reqid ;
2009-05-06 01:32:25 +04:00
h - > srvid = all_state - > srvid ;
2013-06-06 10:12:07 +04:00
h - > srcnode = all_state - > srcnode ;
2011-11-28 02:16:33 +04:00
h - > withemptyrecords = all_state - > withemptyrecords ;
2007-05-03 06:16:03 +04:00
if ( h - > child = = 0 ) {
/* start the traverse in the child */
2013-09-09 06:46:26 +04:00
int res , status ;
2013-06-06 10:26:25 +04:00
pid_t parent = getpid ( ) ;
2013-09-13 07:28:31 +04:00
struct ctdb_context * ctdb = ctdb_db - > ctdb ;
2015-10-29 09:30:30 +03:00
struct ctdb_rec_data_old * d ;
2013-09-09 06:46:26 +04:00
TDB_DATA outdata ;
2013-06-06 10:26:25 +04:00
2007-05-03 06:16:03 +04:00
close ( h - > fd [ 0 ] ) ;
2013-06-06 10:26:25 +04:00
2015-09-24 02:10:59 +03:00
prctl_set_comment ( " ctdb_traverse " ) ;
2013-09-13 07:28:31 +04:00
if ( switch_from_server_to_client ( ctdb , " traverse_local-%s: " ,
2013-06-06 10:26:25 +04:00
ctdb_db - > db_name ) ! = 0 ) {
DEBUG ( DEBUG_CRIT , ( " Failed to switch traverse child into client mode \n " ) ) ;
2013-09-06 12:11:40 +04:00
_exit ( 0 ) ;
2013-06-06 10:26:25 +04:00
}
2013-09-09 06:46:26 +04:00
d = ctdb_marshall_record ( h , h - > reqid , tdb_null , NULL , tdb_null ) ;
if ( d = = NULL ) {
res = 0 ;
2014-07-30 15:03:53 +04:00
sys_write ( h - > fd [ 1 ] , & res , sizeof ( int ) ) ;
2013-09-09 06:46:26 +04:00
_exit ( 0 ) ;
}
2013-09-06 12:11:40 +04:00
res = tdb_traverse_read ( ctdb_db - > ltdb - > tdb , ctdb_traverse_local_fn , h ) ;
if ( res = = - 1 | | h - > records_failed > 0 ) {
/* traverse failed */
res = - ( h - > records_sent ) ;
} else {
res = h - > records_sent ;
2013-06-06 10:26:25 +04:00
}
2013-09-10 11:52:26 +04:00
/* Wait till all the data is flushed from output queue */
while ( ctdb_queue_length ( ctdb - > daemon . queue ) > 0 ) {
tevent_loop_once ( ctdb - > ev ) ;
}
2013-09-09 06:46:26 +04:00
/* End traverse by sending empty record */
outdata . dptr = ( uint8_t * ) d ;
outdata . dsize = d - > length ;
ret = ctdb_control ( ctdb , h - > srcnode , 0 ,
CTDB_CONTROL_TRAVERSE_DATA ,
CTDB_CTRL_FLAG_NOREPLY , outdata ,
NULL , NULL , & status , NULL , NULL ) ;
if ( ret = = - 1 | | status = = - 1 ) {
if ( res > 0 ) {
res = - res ;
}
}
2014-07-30 15:03:53 +04:00
sys_write ( h - > fd [ 1 ] , & res , sizeof ( res ) ) ;
2013-06-06 10:26:25 +04:00
2013-09-13 07:28:31 +04:00
while ( ctdb_kill ( ctdb , parent , 0 ) = = 0 | | errno ! = ESRCH ) {
2013-06-06 10:26:25 +04:00
sleep ( 5 ) ;
}
2007-05-03 06:16:03 +04:00
_exit ( 0 ) ;
}
close ( h - > fd [ 1 ] ) ;
2009-10-15 04:24:54 +04:00
set_close_on_exec ( h - > fd [ 0 ] ) ;
2007-05-03 11:12:23 +04:00
talloc_set_destructor ( h , traverse_local_destructor ) ;
2007-05-03 06:16:03 +04:00
2009-05-06 01:32:25 +04:00
DLIST_ADD ( ctdb_db - > traverse , h ) ;
2015-10-26 08:50:09 +03:00
h - > fde = tevent_add_fd ( ctdb_db - > ctdb - > ev , h , h - > fd [ 0 ] , TEVENT_FD_READ ,
2013-06-06 10:26:25 +04:00
ctdb_traverse_child_handler , h ) ;
if ( h - > fde = = NULL ) {
close ( h - > fd [ 0 ] ) ;
2007-05-03 06:16:03 +04:00
talloc_free ( h ) ;
return NULL ;
}
2013-06-06 10:26:25 +04:00
tevent_fd_set_auto_close ( h - > fde ) ;
2007-05-03 06:16:03 +04:00
return h ;
}
2007-05-03 11:12:23 +04:00
struct ctdb_traverse_all_handle {
struct ctdb_context * ctdb ;
2008-07-16 06:23:18 +04:00
struct ctdb_db_context * ctdb_db ;
2007-05-03 11:12:23 +04:00
uint32_t reqid ;
ctdb_traverse_fn_t callback ;
void * private_data ;
uint32_t null_count ;
2013-01-22 06:27:20 +04:00
bool timedout ;
2007-05-03 11:12:23 +04:00
} ;
/*
destroy a traverse_all op
*/
static int ctdb_traverse_all_destructor ( struct ctdb_traverse_all_handle * state )
{
2015-03-17 06:30:18 +03:00
reqid_remove ( state - > ctdb - > idr , state - > reqid ) ;
2007-05-03 11:12:23 +04:00
return 0 ;
}
2007-05-10 08:06:48 +04:00
/* called when a traverse times out */
2015-10-26 08:50:09 +03:00
static void ctdb_traverse_all_timeout ( struct tevent_context * ev ,
struct tevent_timer * te ,
2007-05-10 08:06:48 +04:00
struct timeval t , void * private_data )
{
struct ctdb_traverse_all_handle * state = talloc_get_type ( private_data , struct ctdb_traverse_all_handle ) ;
2009-05-05 10:33:21 +04:00
DEBUG ( DEBUG_ERR , ( __location__ " Traverse all timeout on database:%s \n " , state - > ctdb_db - > db_name ) ) ;
2010-09-29 04:38:41 +04:00
CTDB_INCREMENT_STAT ( state - > ctdb , timeouts . traverse ) ;
2007-05-10 08:06:48 +04:00
2013-01-22 06:27:20 +04:00
state - > timedout = true ;
2007-05-10 08:06:48 +04:00
state - > callback ( state - > private_data , tdb_null , tdb_null ) ;
}
2009-05-06 01:32:25 +04:00
struct traverse_start_state {
struct ctdb_context * ctdb ;
struct ctdb_traverse_all_handle * h ;
uint32_t srcnode ;
uint32_t reqid ;
uint32_t db_id ;
uint64_t srvid ;
2011-11-28 02:16:33 +04:00
bool withemptyrecords ;
2013-09-06 08:51:54 +04:00
int num_records ;
2009-05-06 01:32:25 +04:00
} ;
2007-05-03 11:12:23 +04:00
/*
setup a cluster - wide non - blocking traverse of a ctdb . The
callback function will be called on every record in the local
2013-02-06 13:28:37 +04:00
ltdb . To stop the traverse , talloc_free ( ) the traverse_handle .
2007-05-03 11:12:23 +04:00
The traverse is finished when the callback is called with tdb_null
for key and data
*/
2007-06-05 11:57:07 +04:00
static struct ctdb_traverse_all_handle * ctdb_daemon_traverse_all ( struct ctdb_db_context * ctdb_db ,
ctdb_traverse_fn_t callback ,
2009-05-06 01:32:25 +04:00
struct traverse_start_state * start_state )
2007-05-03 11:12:23 +04:00
{
struct ctdb_traverse_all_handle * state ;
struct ctdb_context * ctdb = ctdb_db - > ctdb ;
int ret ;
TDB_DATA data ;
struct ctdb_traverse_all r ;
2013-04-11 07:20:09 +04:00
struct ctdb_traverse_all_ext r_ext ;
2008-07-16 06:23:18 +04:00
uint32_t destination ;
2007-05-03 11:12:23 +04:00
2009-05-05 10:33:21 +04:00
state = talloc ( start_state , struct ctdb_traverse_all_handle ) ;
2007-05-03 11:12:23 +04:00
if ( state = = NULL ) {
return NULL ;
}
2008-07-16 06:23:18 +04:00
state - > ctdb = ctdb ;
state - > ctdb_db = ctdb_db ;
2015-03-17 06:30:18 +03:00
state - > reqid = reqid_new ( ctdb_db - > ctdb - > idr , state ) ;
2008-07-16 06:23:18 +04:00
state - > callback = callback ;
2009-05-06 01:32:25 +04:00
state - > private_data = start_state ;
2008-07-16 06:23:18 +04:00
state - > null_count = 0 ;
2013-01-22 06:27:20 +04:00
state - > timedout = false ;
2007-05-03 11:12:23 +04:00
talloc_set_destructor ( state , ctdb_traverse_all_destructor ) ;
2013-04-11 07:20:09 +04:00
if ( start_state - > withemptyrecords ) {
r_ext . db_id = ctdb_db - > db_id ;
r_ext . reqid = state - > reqid ;
r_ext . pnn = ctdb - > pnn ;
r_ext . client_reqid = start_state - > reqid ;
r_ext . srvid = start_state - > srvid ;
r_ext . withemptyrecords = start_state - > withemptyrecords ;
2007-05-03 11:12:23 +04:00
2013-04-11 07:20:09 +04:00
data . dptr = ( uint8_t * ) & r_ext ;
data . dsize = sizeof ( r_ext ) ;
} else {
r . db_id = ctdb_db - > db_id ;
r . reqid = state - > reqid ;
r . pnn = ctdb - > pnn ;
r . client_reqid = start_state - > reqid ;
r . srvid = start_state - > srvid ;
data . dptr = ( uint8_t * ) & r ;
data . dsize = sizeof ( r ) ;
}
2007-05-03 11:12:23 +04:00
2008-07-16 06:23:18 +04:00
if ( ctdb_db - > persistent = = 0 ) {
/* normal database, traverse all nodes */
destination = CTDB_BROADCAST_VNNMAP ;
} else {
int i ;
/* persistent database, traverse one node, preferably
* the local one
*/
destination = ctdb - > pnn ;
/* check we are in the vnnmap */
for ( i = 0 ; i < ctdb - > vnn_map - > size ; i + + ) {
if ( ctdb - > vnn_map - > map [ i ] = = ctdb - > pnn ) {
break ;
}
}
/* if we are not in the vnn map we just pick the first
* node instead
*/
if ( i = = ctdb - > vnn_map - > size ) {
destination = ctdb - > vnn_map - > map [ 0 ] ;
}
}
/* tell all the nodes in the cluster to start sending records to this
* node , or if it is a persistent database , just tell the local
* node
*/
2013-04-11 07:20:09 +04:00
if ( start_state - > withemptyrecords ) {
ret = ctdb_daemon_send_control ( ctdb , destination , 0 ,
CTDB_CONTROL_TRAVERSE_ALL_EXT ,
0 , CTDB_CTRL_FLAG_NOREPLY , data , NULL , NULL ) ;
} else {
ret = ctdb_daemon_send_control ( ctdb , destination , 0 ,
CTDB_CONTROL_TRAVERSE_ALL ,
0 , CTDB_CTRL_FLAG_NOREPLY , data , NULL , NULL ) ;
}
2008-07-16 06:23:18 +04:00
2007-05-03 11:12:23 +04:00
if ( ret ! = 0 ) {
talloc_free ( state ) ;
return NULL ;
}
2013-09-06 08:51:54 +04:00
DEBUG ( DEBUG_NOTICE , ( " Starting traverse on DB %s (id %d) \n " ,
ctdb_db - > db_name , state - > reqid ) ) ;
2007-05-10 08:06:48 +04:00
/* timeout the traverse */
2015-10-26 08:50:09 +03:00
tevent_add_timer ( ctdb - > ev , state ,
timeval_current_ofs ( ctdb - > tunable . traverse_timeout , 0 ) ,
ctdb_traverse_all_timeout , state ) ;
2007-05-10 08:06:48 +04:00
2007-05-03 11:12:23 +04:00
return state ;
}
/*
2013-09-09 06:46:26 +04:00
called when local traverse ends
2007-05-03 11:12:23 +04:00
*/
static void traverse_all_callback ( void * p , TDB_DATA key , TDB_DATA data )
{
struct traverse_all_state * state = talloc_get_type ( p , struct traverse_all_state ) ;
2013-09-09 06:46:26 +04:00
/* we're done */
talloc_free ( state ) ;
2007-05-03 11:12:23 +04:00
}
2013-04-11 07:18:36 +04:00
/*
* extended version to take the " withemptyrecords " parameter "
*/
int32_t ctdb_control_traverse_all_ext ( struct ctdb_context * ctdb , TDB_DATA data , TDB_DATA * outdata )
{
struct ctdb_traverse_all_ext * c = ( struct ctdb_traverse_all_ext * ) data . dptr ;
struct traverse_all_state * state ;
struct ctdb_db_context * ctdb_db ;
if ( data . dsize ! = sizeof ( struct ctdb_traverse_all_ext ) ) {
DEBUG ( DEBUG_ERR , ( __location__ " Invalid size in ctdb_control_traverse_all_ext \n " ) ) ;
return - 1 ;
}
ctdb_db = find_ctdb_db ( ctdb , c - > db_id ) ;
if ( ctdb_db = = NULL ) {
return - 1 ;
}
if ( ctdb_db - > unhealthy_reason ) {
if ( ctdb - > tunable . allow_unhealthy_db_read = = 0 ) {
DEBUG ( DEBUG_ERR , ( " db(%s) unhealty in ctdb_control_traverse_all: %s \n " ,
ctdb_db - > db_name , ctdb_db - > unhealthy_reason ) ) ;
return - 1 ;
}
DEBUG ( DEBUG_WARNING , ( " warn: db(%s) unhealty in ctdb_control_traverse_all: %s \n " ,
ctdb_db - > db_name , ctdb_db - > unhealthy_reason ) ) ;
}
state = talloc ( ctdb_db , struct traverse_all_state ) ;
if ( state = = NULL ) {
return - 1 ;
}
state - > reqid = c - > reqid ;
state - > srcnode = c - > pnn ;
state - > ctdb = ctdb ;
state - > client_reqid = c - > client_reqid ;
state - > srvid = c - > srvid ;
state - > withemptyrecords = c - > withemptyrecords ;
state - > h = ctdb_traverse_local ( ctdb_db , traverse_all_callback , state ) ;
if ( state - > h = = NULL ) {
talloc_free ( state ) ;
return - 1 ;
}
return 0 ;
}
2007-05-03 11:12:23 +04:00
/*
called when a CTDB_CONTROL_TRAVERSE_ALL control comes in . We then
setup a traverse of our local ltdb , sending the records as
CTDB_CONTROL_TRAVERSE_DATA records back to the originator
*/
int32_t ctdb_control_traverse_all ( struct ctdb_context * ctdb , TDB_DATA data , TDB_DATA * outdata )
{
struct ctdb_traverse_all * c = ( struct ctdb_traverse_all * ) data . dptr ;
struct traverse_all_state * state ;
struct ctdb_db_context * ctdb_db ;
if ( data . dsize ! = sizeof ( struct ctdb_traverse_all ) ) {
2009-05-06 01:32:25 +04:00
DEBUG ( DEBUG_ERR , ( __location__ " Invalid size in ctdb_control_traverse_all \n " ) ) ;
2007-05-03 11:12:23 +04:00
return - 1 ;
}
ctdb_db = find_ctdb_db ( ctdb , c - > db_id ) ;
if ( ctdb_db = = NULL ) {
return - 1 ;
}
2009-12-07 15:28:11 +03:00
if ( ctdb_db - > unhealthy_reason ) {
if ( ctdb - > tunable . allow_unhealthy_db_read = = 0 ) {
DEBUG ( DEBUG_ERR , ( " db(%s) unhealty in ctdb_control_traverse_all: %s \n " ,
ctdb_db - > db_name , ctdb_db - > unhealthy_reason ) ) ;
return - 1 ;
}
DEBUG ( DEBUG_WARNING , ( " warn: db(%s) unhealty in ctdb_control_traverse_all: %s \n " ,
ctdb_db - > db_name , ctdb_db - > unhealthy_reason ) ) ;
}
2007-05-03 11:12:23 +04:00
state = talloc ( ctdb_db , struct traverse_all_state ) ;
if ( state = = NULL ) {
return - 1 ;
}
state - > reqid = c - > reqid ;
2007-09-04 04:49:21 +04:00
state - > srcnode = c - > pnn ;
2007-05-03 11:12:23 +04:00
state - > ctdb = ctdb ;
2009-05-06 01:32:25 +04:00
state - > client_reqid = c - > client_reqid ;
state - > srvid = c - > srvid ;
2013-04-11 07:20:09 +04:00
state - > withemptyrecords = false ;
2007-05-03 11:12:23 +04:00
state - > h = ctdb_traverse_local ( ctdb_db , traverse_all_callback , state ) ;
if ( state - > h = = NULL ) {
talloc_free ( state ) ;
return - 1 ;
}
return 0 ;
}
/*
called when a CTDB_CONTROL_TRAVERSE_DATA control comes in . We then
call the traverse_all callback with the record
*/
int32_t ctdb_control_traverse_data ( struct ctdb_context * ctdb , TDB_DATA data , TDB_DATA * outdata )
{
2015-10-29 09:30:30 +03:00
struct ctdb_rec_data_old * d = ( struct ctdb_rec_data_old * ) data . dptr ;
2007-05-03 11:12:23 +04:00
struct ctdb_traverse_all_handle * state ;
TDB_DATA key ;
ctdb_traverse_fn_t callback ;
void * private_data ;
if ( data . dsize < sizeof ( uint32_t ) | | data . dsize ! = d - > length ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_ERR , ( " Bad record size in ctdb_control_traverse_data \n " ) ) ;
2007-05-03 11:12:23 +04:00
return - 1 ;
}
2015-03-17 06:30:18 +03:00
state = reqid_find ( ctdb - > idr , d - > reqid , struct ctdb_traverse_all_handle ) ;
2007-05-03 11:12:23 +04:00
if ( state = = NULL | | d - > reqid ! = state - > reqid ) {
/* traverse might have been terminated already */
return - 1 ;
}
key . dsize = d - > keylen ;
key . dptr = & d - > data [ 0 ] ;
data . dsize = d - > datalen ;
data . dptr = & d - > data [ d - > keylen ] ;
if ( key . dsize = = 0 & & data . dsize = = 0 ) {
state - > null_count + + ;
2008-07-16 06:23:18 +04:00
/* Persistent databases are only scanned on one node (the local
* node )
*/
if ( state - > ctdb_db - > persistent = = 0 ) {
if ( state - > null_count ! = ctdb_get_num_active_nodes ( ctdb ) ) {
return 0 ;
}
2007-05-03 11:12:23 +04:00
}
}
callback = state - > callback ;
private_data = state - > private_data ;
callback ( private_data , key , data ) ;
return 0 ;
}
2009-05-06 01:32:25 +04:00
/*
kill a in - progress traverse , used when a client disconnects
*/
int32_t ctdb_control_traverse_kill ( struct ctdb_context * ctdb , TDB_DATA data ,
TDB_DATA * outdata , uint32_t srcnode )
{
struct ctdb_traverse_start * d = ( struct ctdb_traverse_start * ) data . dptr ;
struct ctdb_db_context * ctdb_db ;
struct ctdb_traverse_local_handle * t ;
ctdb_db = find_ctdb_db ( ctdb , d - > db_id ) ;
if ( ctdb_db = = NULL ) {
return - 1 ;
}
for ( t = ctdb_db - > traverse ; t ; t = t - > next ) {
if ( t - > client_reqid = = d - > reqid & &
t - > srvid = = d - > srvid ) {
talloc_free ( t ) ;
break ;
}
}
return 0 ;
}
/*
this is called when a client disconnects during a traverse
we need to notify all the nodes taking part in the search that they
should kill their traverse children
*/
static int ctdb_traverse_start_destructor ( struct traverse_start_state * state )
{
struct ctdb_traverse_start r ;
TDB_DATA data ;
2009-05-05 10:33:21 +04:00
DEBUG ( DEBUG_ERR , ( __location__ " Traverse cancelled by client disconnect for database:0x%08x \n " , state - > db_id ) ) ;
2009-05-06 01:32:25 +04:00
r . db_id = state - > db_id ;
r . reqid = state - > reqid ;
r . srvid = state - > srvid ;
data . dptr = ( uint8_t * ) & r ;
data . dsize = sizeof ( r ) ;
ctdb_daemon_send_control ( state - > ctdb , CTDB_BROADCAST_CONNECTED , 0 ,
CTDB_CONTROL_TRAVERSE_KILL ,
0 , CTDB_CTRL_FLAG_NOREPLY , data , NULL , NULL ) ;
return 0 ;
}
2007-05-03 11:12:23 +04:00
/*
callback which sends records as messages to the client
*/
static void traverse_start_callback ( void * p , TDB_DATA key , TDB_DATA data )
{
struct traverse_start_state * state ;
2015-10-29 09:30:30 +03:00
struct ctdb_rec_data_old * d ;
2007-05-23 14:06:37 +04:00
TDB_DATA cdata ;
2007-05-03 11:12:23 +04:00
state = talloc_get_type ( p , struct traverse_start_state ) ;
2007-09-21 06:24:02 +04:00
d = ctdb_marshall_record ( state , state - > reqid , key , NULL , data ) ;
2007-05-03 11:12:23 +04:00
if ( d = = NULL ) {
return ;
}
2007-05-23 14:06:37 +04:00
cdata . dptr = ( uint8_t * ) d ;
cdata . dsize = d - > length ;
2007-05-03 11:12:23 +04:00
2015-04-08 07:38:26 +03:00
srvid_dispatch ( state - > ctdb - > srv , state - > srvid , 0 , cdata ) ;
2007-05-03 11:12:23 +04:00
if ( key . dsize = = 0 & & data . dsize = = 0 ) {
2013-09-06 08:51:54 +04:00
DEBUG ( DEBUG_NOTICE , ( " Ending traverse on DB %s (id %d), records %d \n " ,
state - > h - > ctdb_db - > db_name , state - > h - > reqid ,
state - > num_records ) ) ;
2013-01-22 06:27:20 +04:00
if ( state - > h - > timedout ) {
/* timed out, send TRAVERSE_KILL control */
talloc_free ( state ) ;
} else {
/* end of traverse */
talloc_set_destructor ( state , NULL ) ;
talloc_free ( state ) ;
}
2013-09-06 08:51:54 +04:00
} else {
state - > num_records + + ;
2007-05-03 11:12:23 +04:00
}
}
2009-05-06 01:32:25 +04:00
2011-12-03 05:15:30 +04:00
/**
* start a traverse_all - called as a control from a client .
* extended version to take the " withemptyrecords " parameter .
2007-05-03 11:12:23 +04:00
*/
2011-12-03 05:15:30 +04:00
int32_t ctdb_control_traverse_start_ext ( struct ctdb_context * ctdb ,
TDB_DATA data ,
TDB_DATA * outdata ,
uint32_t srcnode ,
uint32_t client_id )
2007-05-03 11:12:23 +04:00
{
2011-12-03 05:15:30 +04:00
struct ctdb_traverse_start_ext * d = ( struct ctdb_traverse_start_ext * ) data . dptr ;
2007-05-03 11:12:23 +04:00
struct traverse_start_state * state ;
struct ctdb_db_context * ctdb_db ;
2015-03-17 06:30:18 +03:00
struct ctdb_client * client = reqid_find ( ctdb - > idr , client_id , struct ctdb_client ) ;
2009-05-06 01:32:25 +04:00
if ( client = = NULL ) {
DEBUG ( DEBUG_ERR , ( __location__ " No client found \n " ) ) ;
return - 1 ;
}
2007-05-03 11:12:23 +04:00
if ( data . dsize ! = sizeof ( * d ) ) {
2008-02-04 12:07:15 +03:00
DEBUG ( DEBUG_ERR , ( " Bad record size in ctdb_control_traverse_start \n " ) ) ;
2007-05-03 11:12:23 +04:00
return - 1 ;
}
ctdb_db = find_ctdb_db ( ctdb , d - > db_id ) ;
if ( ctdb_db = = NULL ) {
return - 1 ;
}
2009-12-07 15:28:11 +03:00
if ( ctdb_db - > unhealthy_reason ) {
if ( ctdb - > tunable . allow_unhealthy_db_read = = 0 ) {
DEBUG ( DEBUG_ERR , ( " db(%s) unhealty in ctdb_control_traverse_start: %s \n " ,
ctdb_db - > db_name , ctdb_db - > unhealthy_reason ) ) ;
return - 1 ;
}
DEBUG ( DEBUG_WARNING , ( " warn: db(%s) unhealty in ctdb_control_traverse_start: %s \n " ,
ctdb_db - > db_name , ctdb_db - > unhealthy_reason ) ) ;
}
2009-05-06 01:32:25 +04:00
state = talloc ( client , struct traverse_start_state ) ;
2007-05-03 11:12:23 +04:00
if ( state = = NULL ) {
return - 1 ;
}
state - > srcnode = srcnode ;
state - > reqid = d - > reqid ;
state - > srvid = d - > srvid ;
2009-05-06 01:32:25 +04:00
state - > db_id = d - > db_id ;
2007-05-03 11:12:23 +04:00
state - > ctdb = ctdb ;
2011-11-28 02:16:33 +04:00
state - > withemptyrecords = d - > withemptyrecords ;
2013-09-06 08:51:54 +04:00
state - > num_records = 0 ;
2013-07-11 10:00:30 +04:00
2007-05-03 11:12:23 +04:00
state - > h = ctdb_daemon_traverse_all ( ctdb_db , traverse_start_callback , state ) ;
if ( state - > h = = NULL ) {
talloc_free ( state ) ;
return - 1 ;
}
2009-05-06 01:32:25 +04:00
talloc_set_destructor ( state , ctdb_traverse_start_destructor ) ;
2007-05-03 11:12:23 +04:00
return 0 ;
}
2011-12-03 05:15:30 +04:00
/**
* start a traverse_all - called as a control from a client .
*/
int32_t ctdb_control_traverse_start ( struct ctdb_context * ctdb ,
TDB_DATA data ,
TDB_DATA * outdata ,
uint32_t srcnode ,
uint32_t client_id )
{
struct ctdb_traverse_start * d = ( struct ctdb_traverse_start * ) data . dptr ;
struct ctdb_traverse_start_ext d2 ;
TDB_DATA data2 ;
ZERO_STRUCT ( d2 ) ;
d2 . db_id = d - > db_id ;
d2 . reqid = d - > reqid ;
d2 . srvid = d - > srvid ;
d2 . withemptyrecords = false ;
data2 . dsize = sizeof ( d2 ) ;
data2 . dptr = ( uint8_t * ) & d2 ;
return ctdb_control_traverse_start_ext ( ctdb , data2 , outdata , srcnode , client_id ) ;
}