2006-11-28 09:56:10 +03:00
/*
ctdb_call protocol code
Copyright ( C ) Andrew Tridgell 2006
This library is free software ; you can redistribute it and / or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation ; either
version 2 of the License , or ( at your option ) any later version .
This library is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
Lesser General Public License for more details .
You should have received a copy of the GNU Lesser General Public
License along with this library ; if not , write to the Free Software
Foundation , Inc . , 59 Temple Place , Suite 330 , Boston , MA 02111 - 1307 USA
*/
2006-12-18 08:01:11 +03:00
/*
see http : //wiki.samba.org/index.php/Samba_%26_Clustering for
protocol design and packet details
*/
2006-11-28 09:56:10 +03:00
# include "includes.h"
# include "lib/events/events.h"
2007-01-23 03:38:45 +03:00
# include "lib/tdb/include/tdb.h"
2006-11-28 09:56:10 +03:00
# include "system/network.h"
# include "system/filesys.h"
2007-01-23 03:38:45 +03:00
# include "../include/ctdb_private.h"
2006-11-28 09:56:10 +03:00
2007-04-12 09:46:50 +04:00
/*
find the ctdb_db from a db index
*/
struct ctdb_db_context * find_ctdb_db ( struct ctdb_context * ctdb , uint32_t id )
{
struct ctdb_db_context * ctdb_db ;
for ( ctdb_db = ctdb - > db_list ; ctdb_db ; ctdb_db = ctdb_db - > next ) {
if ( ctdb_db - > db_id = = id ) {
break ;
}
}
return ctdb_db ;
}
2006-11-28 09:56:10 +03:00
/*
local version of ctdb_call
*/
2007-04-19 05:28:01 +04:00
int ctdb_call_local ( struct ctdb_db_context * ctdb_db , struct ctdb_call * call ,
struct ctdb_ltdb_header * header , TDB_DATA * data ,
uint32_t caller )
2006-11-28 09:56:10 +03:00
{
2007-01-25 07:19:16 +03:00
struct ctdb_call_info * c ;
2006-11-28 12:48:34 +03:00
struct ctdb_registered_call * fn ;
2007-04-03 13:41:00 +04:00
struct ctdb_context * ctdb = ctdb_db - > ctdb ;
2007-04-12 09:46:50 +04:00
2007-01-25 07:19:16 +03:00
c = talloc ( ctdb , struct ctdb_call_info ) ;
2006-11-28 12:48:34 +03:00
CTDB_NO_MEMORY ( ctdb , c ) ;
2007-01-25 08:13:17 +03:00
c - > key = call - > key ;
c - > call_data = & call - > call_data ;
2006-12-18 06:05:49 +03:00
c - > record_data . dptr = talloc_memdup ( c , data - > dptr , data - > dsize ) ;
c - > record_data . dsize = data - > dsize ;
2006-11-28 12:48:34 +03:00
CTDB_NO_MEMORY ( ctdb , c - > record_data . dptr ) ;
c - > new_data = NULL ;
c - > reply_data = NULL ;
2007-01-29 14:30:06 +03:00
c - > status = 0 ;
2006-11-28 12:48:34 +03:00
2007-04-03 13:41:00 +04:00
for ( fn = ctdb_db - > calls ; fn ; fn = fn - > next ) {
2007-01-25 08:13:17 +03:00
if ( fn - > id = = call - > call_id ) break ;
2006-11-28 12:48:34 +03:00
}
if ( fn = = NULL ) {
2007-01-25 08:13:17 +03:00
ctdb_set_error ( ctdb , " Unknown call id %u \n " , call - > call_id ) ;
2007-04-07 04:45:00 +04:00
talloc_free ( c ) ;
2006-11-28 12:48:34 +03:00
return - 1 ;
}
if ( fn - > fn ( c ) ! = 0 ) {
2007-01-25 08:13:17 +03:00
ctdb_set_error ( ctdb , " ctdb_call %u failed \n " , call - > call_id ) ;
2007-04-07 04:45:00 +04:00
talloc_free ( c ) ;
2006-11-28 12:48:34 +03:00
return - 1 ;
}
2006-12-18 06:58:40 +03:00
if ( header - > laccessor ! = caller ) {
header - > lacount = 0 ;
}
header - > laccessor = caller ;
header - > lacount + + ;
/* we need to force the record to be written out if this was a remote access,
so that the lacount is updated */
if ( c - > new_data = = NULL & & header - > laccessor ! = ctdb - > vnn ) {
c - > new_data = & c - > record_data ;
}
2006-11-28 12:48:34 +03:00
if ( c - > new_data ) {
2007-04-22 20:19:49 +04:00
/* XXX check that we always have the lock here? */
2007-04-03 13:41:00 +04:00
if ( ctdb_ltdb_store ( ctdb_db , call - > key , header , * c - > new_data ) ! = 0 ) {
2006-11-28 12:48:34 +03:00
ctdb_set_error ( ctdb , " ctdb_call tdb_store failed \n " ) ;
2007-04-07 04:45:00 +04:00
talloc_free ( c ) ;
2006-11-28 12:48:34 +03:00
return - 1 ;
}
}
2007-01-25 08:13:17 +03:00
if ( c - > reply_data ) {
call - > reply_data = * c - > reply_data ;
talloc_steal ( ctdb , call - > reply_data . dptr ) ;
2007-04-18 05:20:24 +04:00
talloc_set_name_const ( call - > reply_data . dptr , __location__ ) ;
2007-01-25 08:13:17 +03:00
} else {
call - > reply_data . dptr = NULL ;
call - > reply_data . dsize = 0 ;
2006-11-28 12:48:34 +03:00
}
2007-01-29 14:30:06 +03:00
call - > status = c - > status ;
2006-11-28 12:48:34 +03:00
talloc_free ( c ) ;
2006-11-29 17:47:42 +03:00
return 0 ;
2006-11-28 09:56:10 +03:00
}
2006-12-18 06:05:49 +03:00
/*
send an error reply
*/
2007-01-23 03:38:45 +03:00
static void ctdb_send_error ( struct ctdb_context * ctdb ,
struct ctdb_req_header * hdr , uint32_t status ,
const char * fmt , . . . ) PRINTF_ATTRIBUTE ( 4 , 5 ) ;
2006-12-18 06:05:49 +03:00
static void ctdb_send_error ( struct ctdb_context * ctdb ,
2006-12-18 06:27:20 +03:00
struct ctdb_req_header * hdr , uint32_t status ,
const char * fmt , . . . )
2006-12-18 06:05:49 +03:00
{
2006-12-18 06:27:20 +03:00
va_list ap ;
struct ctdb_reply_error * r ;
char * msg ;
2007-01-23 03:38:45 +03:00
int msglen , len ;
2006-12-18 06:27:20 +03:00
va_start ( ap , fmt ) ;
msg = talloc_vasprintf ( ctdb , fmt , ap ) ;
if ( msg = = NULL ) {
2006-12-18 08:26:57 +03:00
ctdb_fatal ( ctdb , " Unable to allocate error in ctdb_send_error \n " ) ;
2006-12-18 06:27:20 +03:00
}
va_end ( ap ) ;
2007-01-23 03:38:45 +03:00
msglen = strlen ( msg ) + 1 ;
len = offsetof ( struct ctdb_reply_error , msg ) ;
2007-04-28 12:50:32 +04:00
r = ctdb_transport_allocate ( ctdb , msg , CTDB_REPLY_ERROR , len + msglen ,
struct ctdb_reply_error ) ;
2006-12-18 08:26:57 +03:00
CTDB_NO_MEMORY_FATAL ( ctdb , r ) ;
2007-01-23 03:38:45 +03:00
2006-12-18 06:27:20 +03:00
r - > hdr . destnode = hdr - > srcnode ;
r - > hdr . reqid = hdr - > reqid ;
r - > status = status ;
2007-01-23 03:38:45 +03:00
r - > msglen = msglen ;
memcpy ( & r - > msg [ 0 ] , msg , msglen ) ;
2006-12-18 06:27:20 +03:00
2006-12-18 08:26:57 +03:00
ctdb_queue_packet ( ctdb , & r - > hdr ) ;
2006-12-18 06:27:20 +03:00
2007-04-19 04:37:44 +04:00
talloc_free ( msg ) ;
2006-12-18 06:05:49 +03:00
}
2006-12-18 06:44:06 +03:00
2006-12-18 06:05:49 +03:00
/*
send a redirect reply
*/
static void ctdb_call_send_redirect ( struct ctdb_context * ctdb ,
2007-04-28 20:18:33 +04:00
TDB_DATA key ,
2006-12-18 06:05:49 +03:00
struct ctdb_req_call * c ,
struct ctdb_ltdb_header * header )
{
2007-04-28 20:18:33 +04:00
uint32_t lmaster = ctdb_lmaster ( ctdb , & key ) ;
if ( ctdb - > vnn = = lmaster ) {
c - > hdr . destnode = header - > dmaster ;
2007-05-01 07:25:02 +04:00
} else if ( ( c - > hopcount % CTDB_MAX_REDIRECT_COUNT ) = = 0 ) {
2007-04-28 20:18:33 +04:00
c - > hdr . destnode = lmaster ;
2007-05-01 07:25:02 +04:00
} else {
c - > hdr . destnode = header - > dmaster ;
2007-04-28 20:18:33 +04:00
}
2007-05-01 07:25:02 +04:00
c - > hopcount + + ;
2007-04-28 20:18:33 +04:00
ctdb_queue_packet ( ctdb , & c - > hdr ) ;
2006-12-18 06:05:49 +03:00
}
2007-04-22 16:26:45 +04:00
/*
send a dmaster reply
caller must have the chainlock before calling this routine . Caller must be
the lmaster
*/
static void ctdb_send_dmaster_reply ( struct ctdb_db_context * ctdb_db ,
struct ctdb_ltdb_header * header ,
TDB_DATA key , TDB_DATA data ,
uint32_t new_dmaster ,
uint32_t reqid )
{
struct ctdb_context * ctdb = ctdb_db - > ctdb ;
struct ctdb_reply_dmaster * r ;
int ret , len ;
TALLOC_CTX * tmp_ctx ;
if ( ctdb - > vnn ! = ctdb_lmaster ( ctdb , & key ) ) {
DEBUG ( 0 , ( __location__ " Caller is not lmaster! \n " ) ) ;
return ;
}
header - > dmaster = new_dmaster ;
ret = ctdb_ltdb_store ( ctdb_db , key , header , data ) ;
if ( ret ! = 0 ) {
ctdb_fatal ( ctdb , " ctdb_req_dmaster unable to update dmaster " ) ;
return ;
}
/* put the packet on a temporary context, allowing us to safely free
it below even if ctdb_reply_dmaster ( ) has freed it already */
tmp_ctx = talloc_new ( ctdb ) ;
/* send the CTDB_REPLY_DMASTER */
2007-04-29 18:19:40 +04:00
len = offsetof ( struct ctdb_reply_dmaster , data ) + key . dsize + data . dsize ;
2007-04-28 12:50:32 +04:00
r = ctdb_transport_allocate ( ctdb , tmp_ctx , CTDB_REPLY_DMASTER , len ,
struct ctdb_reply_dmaster ) ;
2007-04-22 16:26:45 +04:00
CTDB_NO_MEMORY_FATAL ( ctdb , r ) ;
r - > hdr . destnode = new_dmaster ;
r - > hdr . reqid = reqid ;
2007-04-29 18:19:40 +04:00
r - > rsn = header - > rsn ;
r - > keylen = key . dsize ;
2007-04-22 16:26:45 +04:00
r - > datalen = data . dsize ;
2007-04-29 18:19:40 +04:00
r - > db_id = ctdb_db - > db_id ;
memcpy ( & r - > data [ 0 ] , key . dptr , key . dsize ) ;
memcpy ( & r - > data [ key . dsize ] , data . dptr , data . dsize ) ;
2007-04-22 16:26:45 +04:00
ctdb_queue_packet ( ctdb , & r - > hdr ) ;
talloc_free ( tmp_ctx ) ;
}
2006-12-18 08:01:11 +03:00
/*
send a dmaster request ( give another node the dmaster for a record )
This is always sent to the lmaster , which ensures that the lmaster
always knows who the dmaster is . The lmaster will then send a
CTDB_REPLY_DMASTER to the new dmaster
*/
2007-04-03 13:41:00 +04:00
static void ctdb_call_send_dmaster ( struct ctdb_db_context * ctdb_db ,
2006-12-18 08:01:11 +03:00
struct ctdb_req_call * c ,
struct ctdb_ltdb_header * header ,
TDB_DATA * key , TDB_DATA * data )
{
struct ctdb_req_dmaster * r ;
2007-04-03 13:41:00 +04:00
struct ctdb_context * ctdb = ctdb_db - > ctdb ;
2006-12-18 08:01:11 +03:00
int len ;
2007-04-22 16:26:45 +04:00
uint32_t lmaster = ctdb_lmaster ( ctdb , key ) ;
if ( lmaster = = ctdb - > vnn ) {
ctdb_send_dmaster_reply ( ctdb_db , header , * key , * data ,
c - > hdr . srcnode , c - > hdr . reqid ) ;
return ;
}
2006-12-18 08:01:11 +03:00
2007-01-23 03:38:45 +03:00
len = offsetof ( struct ctdb_req_dmaster , data ) + key - > dsize + data - > dsize ;
2007-04-28 12:50:32 +04:00
r = ctdb_transport_allocate ( ctdb , ctdb , CTDB_REQ_DMASTER , len ,
struct ctdb_req_dmaster ) ;
2006-12-18 08:26:57 +03:00
CTDB_NO_MEMORY_FATAL ( ctdb , r ) ;
2007-04-22 16:26:45 +04:00
r - > hdr . destnode = lmaster ;
2006-12-18 08:01:11 +03:00
r - > hdr . reqid = c - > hdr . reqid ;
2007-04-03 13:41:00 +04:00
r - > db_id = c - > db_id ;
2007-04-29 18:19:40 +04:00
r - > rsn = header - > rsn ;
2007-04-05 07:18:31 +04:00
r - > dmaster = c - > hdr . srcnode ;
2006-12-18 08:01:11 +03:00
r - > keylen = key - > dsize ;
r - > datalen = data - > dsize ;
memcpy ( & r - > data [ 0 ] , key - > dptr , key - > dsize ) ;
memcpy ( & r - > data [ key - > dsize ] , data - > dptr , data - > dsize ) ;
2007-04-22 16:26:45 +04:00
header - > dmaster = c - > hdr . srcnode ;
2007-04-20 11:58:37 +04:00
ctdb_ltdb_store ( ctdb_db , * key , header , * data ) ;
ctdb_queue_packet ( ctdb , & r - > hdr ) ;
2006-12-18 08:01:11 +03:00
talloc_free ( r ) ;
}
2007-04-22 20:19:49 +04:00
/*
called when a CTDB_REPLY_DMASTER packet comes in , or when the lmaster
gets a CTDB_REQUEST_DMASTER for itself . We become the dmaster .
must be called with the chainlock held . This function releases the chainlock
*/
2007-04-29 18:19:40 +04:00
static void ctdb_become_dmaster ( struct ctdb_db_context * ctdb_db ,
uint32_t reqid , TDB_DATA key , TDB_DATA data ,
uint64_t rsn )
2007-04-22 20:19:49 +04:00
{
struct ctdb_call_state * state ;
2007-04-29 18:19:40 +04:00
struct ctdb_context * ctdb = ctdb_db - > ctdb ;
struct ctdb_ltdb_header header ;
DEBUG ( 2 , ( " vnn %u dmaster response %08x \n " , ctdb - > vnn , ctdb_hash ( & key ) ) ) ;
ZERO_STRUCT ( header ) ;
header . rsn = rsn ;
header . dmaster = ctdb - > vnn ;
if ( ctdb_ltdb_store ( ctdb_db , key , & header , data ) ! = 0 ) {
ctdb_fatal ( ctdb , " ctdb_reply_dmaster store failed \n " ) ;
ctdb_ltdb_unlock ( ctdb_db , key ) ;
return ;
}
2007-04-22 20:19:49 +04:00
2007-04-23 12:19:50 +04:00
state = ctdb_reqid_find ( ctdb , reqid , struct ctdb_call_state ) ;
2007-04-22 20:19:49 +04:00
if ( state = = NULL ) {
2007-04-29 18:19:40 +04:00
DEBUG ( 0 , ( " vnn %u Invalid reqid %u in ctdb_become_dmaster \n " ,
ctdb - > vnn , reqid ) ) ;
ctdb_ltdb_unlock ( ctdb_db , key ) ;
2007-04-22 20:19:49 +04:00
return ;
}
2007-04-23 12:19:50 +04:00
if ( reqid ! = state - > reqid ) {
/* we found a record but it was the wrong one */
2007-04-29 18:19:40 +04:00
DEBUG ( 0 , ( " Dropped orphan in ctdb_become_dmaster with reqid:%d \n " , reqid ) ) ;
ctdb_ltdb_unlock ( ctdb_db , key ) ;
2007-04-22 20:19:49 +04:00
return ;
}
2007-04-29 18:19:40 +04:00
ctdb_call_local ( ctdb_db , & state - > call , & header , & data , ctdb - > vnn ) ;
2007-04-22 20:19:49 +04:00
ctdb_ltdb_unlock ( ctdb_db , state - > call . key ) ;
talloc_steal ( state , state - > call . reply_data . dptr ) ;
state - > state = CTDB_CALL_DONE ;
if ( state - > async . fn ) {
state - > async . fn ( state ) ;
}
}
2006-12-18 08:01:11 +03:00
/*
called when a CTDB_REQ_DMASTER packet comes in
this comes into the lmaster for a record when the current dmaster
wants to give up the dmaster role and give it to someone else
*/
void ctdb_request_dmaster ( struct ctdb_context * ctdb , struct ctdb_req_header * hdr )
{
struct ctdb_req_dmaster * c = ( struct ctdb_req_dmaster * ) hdr ;
2007-01-23 03:38:45 +03:00
TDB_DATA key , data , data2 ;
2006-12-18 08:01:11 +03:00
struct ctdb_ltdb_header header ;
2007-04-03 13:41:00 +04:00
struct ctdb_db_context * ctdb_db ;
2007-04-22 16:26:45 +04:00
int ret ;
2006-12-18 08:01:11 +03:00
key . dptr = c - > data ;
key . dsize = c - > keylen ;
data . dptr = c - > data + c - > keylen ;
data . dsize = c - > datalen ;
2007-04-12 09:46:50 +04:00
ctdb_db = find_ctdb_db ( ctdb , c - > db_id ) ;
2007-04-03 13:41:00 +04:00
if ( ! ctdb_db ) {
2007-04-11 14:49:10 +04:00
ctdb_send_error ( ctdb , hdr , - 1 ,
" Unknown database in request. db_id==0x%08x " ,
c - > db_id ) ;
2007-04-03 13:41:00 +04:00
return ;
}
2007-04-05 07:18:31 +04:00
2007-04-19 10:27:56 +04:00
/* fetch the current record */
ret = ctdb_ltdb_lock_fetch_requeue ( ctdb_db , key , & header , hdr , & data2 ,
ctdb_recv_raw_pkt , ctdb ) ;
if ( ret = = - 1 ) {
ctdb_fatal ( ctdb , " ctdb_req_dmaster failed to fetch record " ) ;
return ;
}
if ( ret = = - 2 ) {
DEBUG ( 2 , ( __location__ " deferring ctdb_request_dmaster \n " ) ) ;
return ;
}
2007-04-22 20:19:49 +04:00
if ( ctdb_lmaster ( ctdb , & key ) ! = ctdb - > vnn ) {
DEBUG ( 0 , ( " vnn %u dmaster request to non-lmaster lmaster=%u \n " ,
ctdb - > vnn , ctdb_lmaster ( ctdb , & key ) ) ) ;
ctdb_fatal ( ctdb , " ctdb_req_dmaster to non-lmaster " ) ;
}
DEBUG ( 2 , ( " vnn %u dmaster request on %08x for %u from %u \n " ,
ctdb - > vnn , ctdb_hash ( & key ) , c - > dmaster , c - > hdr . srcnode ) ) ;
2007-04-19 10:27:56 +04:00
/* its a protocol error if the sending node is not the current dmaster */
2007-04-22 16:26:45 +04:00
if ( header . dmaster ! = hdr - > srcnode ) {
2007-04-22 20:19:49 +04:00
DEBUG ( 0 , ( " vnn %u dmaster request non-master %u dmaster=%u key %08x \n " ,
ctdb - > vnn , hdr - > srcnode , header . dmaster , ctdb_hash ( & key ) ) ) ;
2007-04-22 16:26:45 +04:00
ctdb_fatal ( ctdb , " ctdb_req_dmaster from non-master " ) ;
2007-04-19 10:27:56 +04:00
return ;
2006-12-18 08:01:11 +03:00
}
2007-04-22 20:19:49 +04:00
/* check if the new dmaster is the lmaster, in which case we
skip the dmaster reply */
if ( c - > dmaster = = ctdb - > vnn ) {
2007-04-29 18:19:40 +04:00
ctdb_become_dmaster ( ctdb_db , hdr - > reqid , key , data , c - > rsn ) ;
2007-04-22 20:19:49 +04:00
} else {
ctdb_send_dmaster_reply ( ctdb_db , & header , key , data , c - > dmaster , hdr - > reqid ) ;
ctdb_ltdb_unlock ( ctdb_db , key ) ;
}
2006-12-18 08:01:11 +03:00
}
2006-11-28 09:56:10 +03:00
/*
2006-12-01 07:45:24 +03:00
called when a CTDB_REQ_CALL packet comes in
2006-11-28 09:56:10 +03:00
*/
2006-12-01 07:45:24 +03:00
void ctdb_request_call ( struct ctdb_context * ctdb , struct ctdb_req_header * hdr )
2006-11-28 09:56:10 +03:00
{
2006-12-01 07:45:24 +03:00
struct ctdb_req_call * c = ( struct ctdb_req_call * ) hdr ;
2007-01-25 08:13:17 +03:00
TDB_DATA data ;
2006-12-01 07:45:24 +03:00
struct ctdb_reply_call * r ;
2007-01-23 03:38:45 +03:00
int ret , len ;
2006-12-18 06:05:49 +03:00
struct ctdb_ltdb_header header ;
2007-01-25 08:13:17 +03:00
struct ctdb_call call ;
2007-04-03 13:41:00 +04:00
struct ctdb_db_context * ctdb_db ;
2007-04-12 09:46:50 +04:00
ctdb_db = find_ctdb_db ( ctdb , c - > db_id ) ;
2007-04-03 13:41:00 +04:00
if ( ! ctdb_db ) {
2007-04-11 14:49:10 +04:00
ctdb_send_error ( ctdb , hdr , - 1 ,
" Unknown database in request. db_id==0x%08x " ,
c - > db_id ) ;
2007-04-03 13:41:00 +04:00
return ;
}
2006-12-01 07:45:24 +03:00
2007-01-25 08:13:17 +03:00
call . call_id = c - > callid ;
call . key . dptr = c - > data ;
call . key . dsize = c - > keylen ;
call . call_data . dptr = c - > data + c - > keylen ;
call . call_data . dsize = c - > calldatalen ;
2006-12-01 07:45:24 +03:00
2006-12-18 06:05:49 +03:00
/* determine if we are the dmaster for this key. This also
fetches the record data ( if any ) , thus avoiding a 2 nd fetch of the data
if the call will be answered locally */
2007-01-29 14:11:16 +03:00
2007-04-19 10:27:56 +04:00
ret = ctdb_ltdb_lock_fetch_requeue ( ctdb_db , call . key , & header , hdr , & data ,
ctdb_recv_raw_pkt , ctdb ) ;
2007-04-17 10:54:03 +04:00
if ( ret = = - 1 ) {
2006-12-18 06:27:20 +03:00
ctdb_send_error ( ctdb , hdr , ret , " ltdb fetch failed in ctdb_request_call " ) ;
2006-12-18 06:05:49 +03:00
return ;
}
2007-04-17 10:54:03 +04:00
if ( ret = = - 2 ) {
2007-04-17 16:13:06 +04:00
DEBUG ( 2 , ( __location__ " deferred ctdb_request_call \n " ) ) ;
2007-04-17 10:54:03 +04:00
return ;
}
2006-12-18 06:05:49 +03:00
/* if we are not the dmaster, then send a redirect to the
requesting node */
if ( header . dmaster ! = ctdb - > vnn ) {
talloc_free ( data . dptr ) ;
2007-04-28 20:55:37 +04:00
ctdb_call_send_redirect ( ctdb , call . key , c , & header ) ;
2007-04-17 10:54:03 +04:00
ctdb_ltdb_unlock ( ctdb_db , call . key ) ;
2006-12-18 06:05:49 +03:00
return ;
}
2007-05-01 07:25:02 +04:00
if ( c - > hopcount > ctdb - > status . max_hop_count ) {
ctdb - > status . max_hop_count = c - > hopcount ;
}
2006-12-18 08:01:11 +03:00
/* if this nodes has done enough consecutive calls on the same record
2007-04-04 15:15:56 +04:00
then give them the record
or if the node requested an immediate migration
*/
2007-04-21 11:22:46 +04:00
if ( c - > hdr . srcnode ! = ctdb - > vnn & &
( ( header . laccessor = = c - > hdr . srcnode
& & header . lacount > = ctdb - > max_lacount )
| | ( c - > flags & CTDB_IMMEDIATE_MIGRATION ) ) ) {
2007-04-22 20:19:49 +04:00
DEBUG ( 2 , ( " vnn %u starting migration of %08x to %u \n " ,
ctdb - > vnn , ctdb_hash ( & call . key ) , c - > hdr . srcnode ) ) ;
2007-04-03 13:41:00 +04:00
ctdb_call_send_dmaster ( ctdb_db , c , & header , & call . key , & data ) ;
2006-12-18 08:01:11 +03:00
talloc_free ( data . dptr ) ;
2007-04-17 10:54:03 +04:00
ctdb_ltdb_unlock ( ctdb_db , call . key ) ;
2006-12-18 08:01:11 +03:00
return ;
}
2007-04-03 13:41:00 +04:00
ctdb_call_local ( ctdb_db , & call , & header , & data , c - > hdr . srcnode ) ;
2006-12-01 07:45:24 +03:00
2007-04-17 10:54:03 +04:00
ctdb_ltdb_unlock ( ctdb_db , call . key ) ;
2007-01-25 08:13:17 +03:00
len = offsetof ( struct ctdb_reply_call , data ) + call . reply_data . dsize ;
2007-04-28 12:50:32 +04:00
r = ctdb_transport_allocate ( ctdb , ctdb , CTDB_REPLY_CALL , len ,
struct ctdb_reply_call ) ;
2006-12-18 08:26:57 +03:00
CTDB_NO_MEMORY_FATAL ( ctdb , r ) ;
2006-12-01 07:45:24 +03:00
r - > hdr . destnode = hdr - > srcnode ;
r - > hdr . reqid = hdr - > reqid ;
2007-01-29 14:30:06 +03:00
r - > status = call . status ;
2007-01-25 08:13:17 +03:00
r - > datalen = call . reply_data . dsize ;
2007-01-29 14:30:06 +03:00
if ( call . reply_data . dsize ) {
memcpy ( & r - > data [ 0 ] , call . reply_data . dptr , call . reply_data . dsize ) ;
talloc_free ( call . reply_data . dptr ) ;
}
2006-12-01 07:45:24 +03:00
2006-12-18 08:26:57 +03:00
ctdb_queue_packet ( ctdb , & r - > hdr ) ;
2006-12-01 07:45:24 +03:00
talloc_free ( r ) ;
}
/*
called when a CTDB_REPLY_CALL packet comes in
2006-12-18 06:49:32 +03:00
This packet comes in response to a CTDB_REQ_CALL request packet . It
2007-04-17 23:23:22 +04:00
contains any reply data from the call
2006-12-01 07:45:24 +03:00
*/
void ctdb_reply_call ( struct ctdb_context * ctdb , struct ctdb_req_header * hdr )
{
struct ctdb_reply_call * c = ( struct ctdb_reply_call * ) hdr ;
struct ctdb_call_state * state ;
2007-04-23 12:19:50 +04:00
state = ctdb_reqid_find ( ctdb , hdr - > reqid , struct ctdb_call_state ) ;
2007-04-18 01:03:30 +04:00
if ( state = = NULL ) {
2007-04-21 11:22:46 +04:00
DEBUG ( 0 , ( __location__ " reqid %d not found \n " , hdr - > reqid ) ) ;
2007-04-18 01:03:30 +04:00
return ;
}
2006-12-01 07:45:24 +03:00
2007-04-23 12:19:50 +04:00
if ( hdr - > reqid ! = state - > reqid ) {
/* we found a record but it was the wrong one */
2007-04-29 18:19:40 +04:00
DEBUG ( 0 , ( " Dropped orphaned call reply with reqid:%d \n " , hdr - > reqid ) ) ;
2007-04-23 12:19:50 +04:00
return ;
}
2007-01-29 14:30:06 +03:00
state - > call . reply_data . dptr = c - > data ;
state - > call . reply_data . dsize = c - > datalen ;
state - > call . status = c - > status ;
2006-12-01 07:45:24 +03:00
talloc_steal ( state , c ) ;
state - > state = CTDB_CALL_DONE ;
2007-04-12 09:46:50 +04:00
if ( state - > async . fn ) {
state - > async . fn ( state ) ;
}
2006-12-01 07:45:24 +03:00
}
2007-04-22 20:19:49 +04:00
2006-12-18 08:01:11 +03:00
/*
called when a CTDB_REPLY_DMASTER packet comes in
This packet comes in from the lmaster response to a CTDB_REQ_CALL
request packet . It means that the current dmaster wants to give us
the dmaster role
*/
void ctdb_reply_dmaster ( struct ctdb_context * ctdb , struct ctdb_req_header * hdr )
{
struct ctdb_reply_dmaster * c = ( struct ctdb_reply_dmaster * ) hdr ;
2007-04-03 13:41:00 +04:00
struct ctdb_db_context * ctdb_db ;
2007-04-29 18:19:40 +04:00
TDB_DATA key , data ;
2007-04-19 11:43:27 +04:00
int ret ;
2006-12-18 08:01:11 +03:00
2007-04-29 18:19:40 +04:00
ctdb_db = find_ctdb_db ( ctdb , c - > db_id ) ;
if ( ctdb_db = = NULL ) {
DEBUG ( 0 , ( " Unknown db_id 0x%x in ctdb_reply_dmaster \n " , c - > db_id ) ) ;
2007-04-23 12:19:50 +04:00
return ;
}
2007-04-29 18:19:40 +04:00
key . dptr = c - > data ;
key . dsize = c - > keylen ;
data . dptr = & c - > data [ key . dsize ] ;
data . dsize = c - > datalen ;
2007-04-23 12:19:50 +04:00
2007-04-29 18:19:40 +04:00
ret = ctdb_ltdb_lock_requeue ( ctdb_db , key , hdr ,
2007-04-19 11:43:27 +04:00
ctdb_recv_raw_pkt , ctdb ) ;
if ( ret = = - 2 ) {
return ;
}
if ( ret ! = 0 ) {
DEBUG ( 0 , ( __location__ " Failed to get lock in ctdb_reply_dmaster \n " ) ) ;
return ;
}
2007-04-29 18:19:40 +04:00
ctdb_become_dmaster ( ctdb_db , hdr - > reqid , key , data , c - > rsn ) ;
2006-12-18 08:01:11 +03:00
}
2006-12-18 06:27:20 +03:00
/*
called when a CTDB_REPLY_ERROR packet comes in
*/
void ctdb_reply_error ( struct ctdb_context * ctdb , struct ctdb_req_header * hdr )
{
struct ctdb_reply_error * c = ( struct ctdb_reply_error * ) hdr ;
struct ctdb_call_state * state ;
2007-04-23 12:19:50 +04:00
state = ctdb_reqid_find ( ctdb , hdr - > reqid , struct ctdb_call_state ) ;
if ( state = = NULL ) {
2007-04-29 18:19:40 +04:00
DEBUG ( 0 , ( " vnn %u Invalid reqid %u in ctdb_reply_error \n " ,
ctdb - > vnn , hdr - > reqid ) ) ;
2007-04-23 12:19:50 +04:00
return ;
}
if ( hdr - > reqid ! = state - > reqid ) {
/* we found a record but it was the wrong one */
2007-04-29 18:19:40 +04:00
DEBUG ( 0 , ( " Dropped orphaned error reply with reqid:%d \n " , hdr - > reqid ) ) ;
2007-04-23 12:19:50 +04:00
return ;
}
2006-12-18 06:27:20 +03:00
talloc_steal ( state , c ) ;
state - > state = CTDB_CALL_ERROR ;
state - > errmsg = ( char * ) c - > msg ;
2007-04-12 09:46:50 +04:00
if ( state - > async . fn ) {
state - > async . fn ( state ) ;
}
2006-12-18 06:27:20 +03:00
}
2006-12-18 06:44:06 +03:00
2006-12-01 07:45:24 +03:00
/*
destroy a ctdb_call
*/
static int ctdb_call_destructor ( struct ctdb_call_state * state )
{
2007-04-28 20:55:37 +04:00
ctdb_reqid_remove ( state - > ctdb_db - > ctdb , state - > reqid ) ;
2006-12-01 07:45:24 +03:00
return 0 ;
}
2006-12-18 06:49:32 +03:00
2006-12-01 07:45:24 +03:00
/*
2006-12-18 06:49:32 +03:00
called when a ctdb_call times out
2006-12-01 07:45:24 +03:00
*/
2007-05-10 08:06:48 +04:00
static void ctdb_call_timeout ( struct event_context * ev , struct timed_event * te ,
struct timeval t , void * private_data )
2006-12-01 07:45:24 +03:00
{
2007-04-13 14:38:24 +04:00
struct ctdb_call_state * state = talloc_get_type ( private_data , struct ctdb_call_state ) ;
2007-05-10 08:06:48 +04:00
struct ctdb_context * ctdb = state - > ctdb_db - > ctdb ;
ctdb - > status . timeouts . call + + ;
event_add_timed ( ctdb - > ev , state , timeval_current_ofs ( CTDB_CALL_TIMEOUT , 0 ) ,
ctdb_call_timeout , state ) ;
2007-05-10 11:43:45 +04:00
if ( + + state - > resend_count < 10 & &
( ctdb - > vnn_map - > generation = = state - > generation | |
ctdb - > recovery_mode ! = CTDB_RECOVERY_NORMAL ) ) {
2007-05-10 08:06:48 +04:00
/* the call is just being slow, or we are curently
recovering , give it more time */
return ;
2007-04-12 09:46:50 +04:00
}
2007-05-10 08:06:48 +04:00
2007-05-10 11:43:45 +04:00
/* the generation count changed or we're timing out too much -
the call must be re - issued */
2007-05-10 08:06:48 +04:00
state - > generation = ctdb - > vnn_map - > generation ;
2007-05-10 11:43:45 +04:00
state - > resend_count = 0 ;
2007-05-10 08:06:48 +04:00
/* use a new reqid, in case the old reply does eventually come in */
ctdb_reqid_remove ( ctdb , state - > reqid ) ;
state - > reqid = ctdb_reqid_new ( ctdb , state ) ;
state - > c - > hdr . reqid = state - > reqid ;
2007-05-10 11:43:45 +04:00
/* update the generation count for this request, so its valid with the new vnn_map */
state - > c - > hdr . generation = state - > generation ;
2007-05-10 08:06:48 +04:00
/* send the packet to ourselves, it will be redirected appropriately */
state - > c - > hdr . destnode = ctdb - > vnn ;
ctdb_queue_packet ( ctdb , & state - > c - > hdr ) ;
2006-12-01 07:45:24 +03:00
}
2007-04-12 09:46:50 +04:00
/*
this allows the caller to setup a async . fn
*/
static void call_local_trigger ( struct event_context * ev , struct timed_event * te ,
2007-04-13 14:38:24 +04:00
struct timeval t , void * private_data )
2007-04-12 09:46:50 +04:00
{
2007-04-13 14:38:24 +04:00
struct ctdb_call_state * state = talloc_get_type ( private_data , struct ctdb_call_state ) ;
2007-04-12 09:46:50 +04:00
if ( state - > async . fn ) {
state - > async . fn ( state ) ;
}
}
2006-12-01 07:45:24 +03:00
/*
2006-12-18 06:49:32 +03:00
construct an event driven local ctdb_call
this is used so that locally processed ctdb_call requests are processed
in an event driven manner
2006-12-01 07:45:24 +03:00
*/
2007-04-03 13:41:00 +04:00
struct ctdb_call_state * ctdb_call_local_send ( struct ctdb_db_context * ctdb_db ,
2007-01-25 08:13:17 +03:00
struct ctdb_call * call ,
2006-12-18 06:05:49 +03:00
struct ctdb_ltdb_header * header ,
TDB_DATA * data )
2006-12-01 07:45:24 +03:00
{
struct ctdb_call_state * state ;
2007-04-03 13:41:00 +04:00
struct ctdb_context * ctdb = ctdb_db - > ctdb ;
2006-12-01 07:45:24 +03:00
int ret ;
2007-04-03 13:41:00 +04:00
state = talloc_zero ( ctdb_db , struct ctdb_call_state ) ;
2006-12-01 12:26:21 +03:00
CTDB_NO_MEMORY_NULL ( ctdb , state ) ;
2006-12-01 07:45:24 +03:00
2007-04-07 04:58:14 +04:00
talloc_steal ( state , data - > dptr ) ;
2006-12-01 07:45:24 +03:00
state - > state = CTDB_CALL_DONE ;
2007-01-29 14:11:16 +03:00
state - > call = * call ;
2007-04-03 13:41:00 +04:00
state - > ctdb_db = ctdb_db ;
2007-01-29 14:11:16 +03:00
2007-04-03 13:41:00 +04:00
ret = ctdb_call_local ( ctdb_db , & state - > call , header , data , ctdb - > vnn ) ;
2007-04-18 05:20:24 +04:00
talloc_steal ( state , state - > call . reply_data . dptr ) ;
2006-12-01 07:45:24 +03:00
2007-04-12 09:46:50 +04:00
event_add_timed ( ctdb - > ev , state , timeval_zero ( ) , call_local_trigger , state ) ;
2006-12-01 07:45:24 +03:00
return state ;
}
/*
2007-04-11 05:01:42 +04:00
make a remote ctdb call - async send . Called in daemon context .
2006-12-18 06:49:32 +03:00
This constructs a ctdb_call request and queues it for processing .
This call never blocks .
2006-12-01 07:45:24 +03:00
*/
2007-04-17 10:20:32 +04:00
struct ctdb_call_state * ctdb_daemon_call_send_remote ( struct ctdb_db_context * ctdb_db ,
struct ctdb_call * call ,
struct ctdb_ltdb_header * header )
2006-12-01 07:45:24 +03:00
{
uint32_t len ;
struct ctdb_call_state * state ;
2007-04-03 13:41:00 +04:00
struct ctdb_context * ctdb = ctdb_db - > ctdb ;
2006-11-28 09:56:10 +03:00
2007-04-03 13:41:00 +04:00
state = talloc_zero ( ctdb_db , struct ctdb_call_state ) ;
2006-12-01 12:26:21 +03:00
CTDB_NO_MEMORY_NULL ( ctdb , state ) ;
2007-04-23 12:19:50 +04:00
state - > reqid = ctdb_reqid_new ( ctdb , state ) ;
2007-05-10 11:43:45 +04:00
state - > ctdb_db = ctdb_db ;
2007-04-23 12:19:50 +04:00
talloc_set_destructor ( state , ctdb_call_destructor ) ;
2006-11-28 12:48:34 +03:00
2007-01-25 08:13:17 +03:00
len = offsetof ( struct ctdb_req_call , data ) + call - > key . dsize + call - > call_data . dsize ;
2007-04-28 12:50:32 +04:00
state - > c = ctdb_transport_allocate ( ctdb , state , CTDB_REQ_CALL , len ,
struct ctdb_req_call ) ;
2006-12-01 12:26:21 +03:00
CTDB_NO_MEMORY_NULL ( ctdb , state - > c ) ;
2007-04-17 10:20:32 +04:00
state - > c - > hdr . destnode = header - > dmaster ;
2007-04-28 19:13:30 +04:00
2006-11-28 12:48:34 +03:00
/* this limits us to 16k outstanding messages - not unreasonable */
2007-04-23 12:19:50 +04:00
state - > c - > hdr . reqid = state - > reqid ;
2007-04-04 15:15:56 +04:00
state - > c - > flags = call - > flags ;
2007-04-03 13:41:00 +04:00
state - > c - > db_id = ctdb_db - > db_id ;
2007-01-25 08:13:17 +03:00
state - > c - > callid = call - > call_id ;
2007-05-01 07:25:02 +04:00
state - > c - > hopcount = 0 ;
2007-01-25 08:13:17 +03:00
state - > c - > keylen = call - > key . dsize ;
state - > c - > calldatalen = call - > call_data . dsize ;
memcpy ( & state - > c - > data [ 0 ] , call - > key . dptr , call - > key . dsize ) ;
memcpy ( & state - > c - > data [ call - > key . dsize ] ,
call - > call_data . dptr , call - > call_data . dsize ) ;
2007-01-29 14:11:16 +03:00
state - > call = * call ;
2007-01-25 08:13:17 +03:00
state - > call . call_data . dptr = & state - > c - > data [ call - > key . dsize ] ;
2007-01-29 14:11:16 +03:00
state - > call . key . dptr = & state - > c - > data [ 0 ] ;
2006-11-28 12:48:34 +03:00
2006-12-18 08:01:11 +03:00
state - > state = CTDB_CALL_WAIT ;
2007-05-10 08:06:48 +04:00
state - > generation = ctdb - > vnn_map - > generation ;
2006-11-28 12:48:34 +03:00
2006-12-18 08:26:57 +03:00
ctdb_queue_packet ( ctdb , & state - > c - > hdr ) ;
2006-11-28 12:48:34 +03:00
2007-05-10 08:06:48 +04:00
event_add_timed ( ctdb - > ev , state , timeval_current_ofs ( CTDB_CALL_TIMEOUT , 0 ) ,
2006-12-01 07:45:24 +03:00
ctdb_call_timeout , state ) ;
return state ;
}
/*
2007-04-11 05:01:42 +04:00
make a remote ctdb call - async recv - called in daemon context
2006-12-18 06:49:32 +03:00
This is called when the program wants to wait for a ctdb_call to complete and get the
results . This call will block unless the call has already completed .
2006-12-01 07:45:24 +03:00
*/
2007-04-17 08:52:51 +04:00
int ctdb_daemon_call_recv ( struct ctdb_call_state * state , struct ctdb_call * call )
2006-12-01 07:45:24 +03:00
{
2007-04-05 07:18:31 +04:00
while ( state - > state < CTDB_CALL_DONE ) {
2007-04-28 20:55:37 +04:00
event_loop_once ( state - > ctdb_db - > ctdb - > ev ) ;
2007-04-05 07:18:31 +04:00
}
if ( state - > state ! = CTDB_CALL_DONE ) {
2007-04-28 20:55:37 +04:00
ctdb_set_error ( state - > ctdb_db - > ctdb , " %s " , state - > errmsg ) ;
2007-04-05 07:18:31 +04:00
talloc_free ( state ) ;
return - 1 ;
}
2007-01-29 14:30:06 +03:00
if ( state - > call . reply_data . dsize ) {
2007-04-28 20:55:37 +04:00
call - > reply_data . dptr = talloc_memdup ( state - > ctdb_db - > ctdb ,
2007-01-29 14:30:06 +03:00
state - > call . reply_data . dptr ,
state - > call . reply_data . dsize ) ;
call - > reply_data . dsize = state - > call . reply_data . dsize ;
} else {
call - > reply_data . dptr = NULL ;
call - > reply_data . dsize = 0 ;
}
call - > status = state - > call . status ;
2006-12-01 07:45:24 +03:00
talloc_free ( state ) ;
return 0 ;
2006-11-28 09:56:10 +03:00
}
2007-04-11 05:01:42 +04:00