2006-11-27 13:38:13 +03:00
/*
2006-11-28 09:56:10 +03:00
ctdb main protocol code
2006-11-27 13:38:13 +03:00
Copyright ( C ) Andrew Tridgell 2006
This library is free software ; you can redistribute it and / or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation ; either
version 2 of the License , or ( at your option ) any later version .
This library is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
Lesser General Public License for more details .
You should have received a copy of the GNU Lesser General Public
License along with this library ; if not , write to the Free Software
Foundation , Inc . , 59 Temple Place , Suite 330 , Boston , MA 02111 - 1307 USA
*/
# include "includes.h"
2007-01-23 03:38:45 +03:00
# include "lib/tdb/include/tdb.h"
2006-11-27 13:38:13 +03:00
# include "lib/events/events.h"
2007-01-23 03:38:45 +03:00
# include "lib/util/dlinklist.h"
2006-11-27 13:38:13 +03:00
# include "system/network.h"
# include "system/filesys.h"
2007-01-23 03:38:45 +03:00
# include "../include/ctdb_private.h"
2006-11-27 13:38:13 +03:00
/*
choose the transport we will use
*/
int ctdb_set_transport ( struct ctdb_context * ctdb , const char * transport )
{
2007-04-20 16:26:19 +04:00
ctdb - > transport = talloc_strdup ( ctdb , transport ) ;
return 0 ;
2006-11-27 13:38:13 +03:00
}
2007-04-30 00:42:23 +04:00
/*
choose the logfile location
*/
int ctdb_set_logfile ( struct ctdb_context * ctdb , const char * logfile )
{
ctdb - > logfile = talloc_strdup ( ctdb , logfile ) ;
if ( ctdb - > logfile ! = NULL ) {
int fd ;
close ( 1 ) ;
fd = open ( ctdb - > logfile , O_WRONLY | O_APPEND | O_CREAT , 0666 ) ;
if ( fd = = - 1 ) {
abort ( ) ;
}
if ( fd ! = 1 ) {
dup2 ( fd , 1 ) ;
close ( fd ) ;
}
}
return 0 ;
}
2006-11-27 13:38:13 +03:00
2006-12-01 07:45:24 +03:00
/*
set some ctdb flags
*/
void ctdb_set_flags ( struct ctdb_context * ctdb , unsigned flags )
{
ctdb - > flags | = flags ;
}
2007-04-10 00:03:39 +04:00
/*
clear some ctdb flags
*/
void ctdb_clear_flags ( struct ctdb_context * ctdb , unsigned flags )
{
ctdb - > flags & = ~ flags ;
}
2007-01-23 03:38:45 +03:00
/*
set max acess count before a dmaster migration
*/
void ctdb_set_max_lacount ( struct ctdb_context * ctdb , unsigned count )
{
ctdb - > max_lacount = count ;
}
2006-11-27 13:38:13 +03:00
2007-04-18 18:36:22 +04:00
/*
set the directory for the local databases
*/
2007-04-19 03:14:25 +04:00
int ctdb_set_tdb_dir ( struct ctdb_context * ctdb , const char * dir )
2007-04-18 18:36:22 +04:00
{
2007-04-19 04:03:20 +04:00
if ( dir = = NULL ) {
ctdb - > db_directory = talloc_asprintf ( ctdb , " ctdb-%u " , ctdb_get_vnn ( ctdb ) ) ;
} else {
ctdb - > db_directory = talloc_strdup ( ctdb , dir ) ;
}
2007-04-19 03:14:25 +04:00
if ( ctdb - > db_directory = = NULL ) {
return - 1 ;
}
return 0 ;
2007-04-18 18:36:22 +04:00
}
2006-11-27 13:38:13 +03:00
/*
add a node to the list of active nodes
*/
static int ctdb_add_node ( struct ctdb_context * ctdb , char * nstr )
{
2006-11-28 09:56:10 +03:00
struct ctdb_node * node , * * nodep ;
nodep = talloc_realloc ( ctdb , ctdb - > nodes , struct ctdb_node * , ctdb - > num_nodes + 1 ) ;
CTDB_NO_MEMORY ( ctdb , nodep ) ;
ctdb - > nodes = nodep ;
nodep = & ctdb - > nodes [ ctdb - > num_nodes ] ;
( * nodep ) = talloc_zero ( ctdb - > nodes , struct ctdb_node ) ;
CTDB_NO_MEMORY ( ctdb , * nodep ) ;
node = * nodep ;
2006-11-27 13:38:13 +03:00
if ( ctdb_parse_address ( ctdb , node , nstr , & node - > address ) ! = 0 ) {
return - 1 ;
}
node - > ctdb = ctdb ;
2006-11-28 06:15:46 +03:00
node - > name = talloc_asprintf ( node , " %s:%u " ,
node - > address . address ,
node - > address . port ) ;
2006-11-28 09:56:10 +03:00
/* for now we just set the vnn to the line in the file - this
will change ! */
node - > vnn = ctdb - > num_nodes ;
2006-11-27 13:38:13 +03:00
2006-11-28 09:56:10 +03:00
if ( ctdb_same_address ( & ctdb - > address , & node - > address ) ) {
ctdb - > vnn = node - > vnn ;
2007-04-28 19:42:40 +04:00
node - > flags | = NODE_FLAGS_CONNECTED ;
2006-11-28 09:56:10 +03:00
}
ctdb - > num_nodes + + ;
2006-11-27 13:38:13 +03:00
return 0 ;
}
/*
setup the node list from a file
*/
int ctdb_set_nlist ( struct ctdb_context * ctdb , const char * nlist )
{
char * * lines ;
int nlines ;
int i ;
lines = file_lines_load ( nlist , & nlines , ctdb ) ;
if ( lines = = NULL ) {
ctdb_set_error ( ctdb , " Failed to load nlist '%s' \n " , nlist ) ;
return - 1 ;
}
for ( i = 0 ; i < nlines ; i + + ) {
if ( ctdb_add_node ( ctdb , lines [ i ] ) ! = 0 ) {
talloc_free ( lines ) ;
return - 1 ;
}
}
talloc_free ( lines ) ;
return 0 ;
}
/*
setup the local node address
*/
int ctdb_set_address ( struct ctdb_context * ctdb , const char * address )
{
2006-11-28 09:56:10 +03:00
if ( ctdb_parse_address ( ctdb , ctdb , address , & ctdb - > address ) ! = 0 ) {
return - 1 ;
}
ctdb - > name = talloc_asprintf ( ctdb , " %s:%u " ,
ctdb - > address . address ,
ctdb - > address . port ) ;
return 0 ;
2006-11-27 13:38:13 +03:00
}
2007-04-26 16:27:49 +04:00
/*
setup the local socket name
*/
int ctdb_set_socketname ( struct ctdb_context * ctdb , const char * socketname )
{
ctdb - > daemon . name = talloc_strdup ( ctdb , socketname ) ;
return 0 ;
}
2006-11-27 13:38:13 +03:00
/*
add a node to the list of active nodes
*/
2007-04-03 13:41:00 +04:00
int ctdb_set_call ( struct ctdb_db_context * ctdb_db , ctdb_fn_t fn , int id )
2006-11-27 13:38:13 +03:00
{
struct ctdb_registered_call * call ;
2007-04-03 13:41:00 +04:00
call = talloc ( ctdb_db , struct ctdb_registered_call ) ;
2006-11-27 13:38:13 +03:00
call - > fn = fn ;
call - > id = id ;
2007-04-03 13:41:00 +04:00
DLIST_ADD ( ctdb_db - > calls , call ) ;
2006-11-27 13:38:13 +03:00
return 0 ;
}
2007-01-23 03:38:45 +03:00
/*
return the vnn of this node
*/
uint32_t ctdb_get_vnn ( struct ctdb_context * ctdb )
{
return ctdb - > vnn ;
}
2007-02-09 04:45:58 +03:00
/*
return the number of nodes
*/
uint32_t ctdb_get_num_nodes ( struct ctdb_context * ctdb )
{
return ctdb - > num_nodes ;
}
2006-11-27 13:38:13 +03:00
/*
2006-11-28 09:56:10 +03:00
called by the transport layer when a packet comes in
2006-11-27 13:38:13 +03:00
*/
2007-04-16 17:52:14 +04:00
void ctdb_recv_pkt ( struct ctdb_context * ctdb , uint8_t * data , uint32_t length )
2006-11-27 13:38:13 +03:00
{
2007-04-18 05:20:24 +04:00
struct ctdb_req_header * hdr = ( struct ctdb_req_header * ) data ;
TALLOC_CTX * tmp_ctx ;
2007-04-20 14:07:47 +04:00
ctdb - > status . node_packets_recv + + ;
2007-04-18 05:20:24 +04:00
/* place the packet as a child of the tmp_ctx. We then use
talloc_free ( ) below to free it . If any of the calls want
to keep it , then they will steal it somewhere else , and the
talloc_free ( ) will only free the tmp_ctx */
tmp_ctx = talloc_new ( ctdb ) ;
talloc_steal ( tmp_ctx , hdr ) ;
2007-02-07 05:26:07 +03:00
2006-12-01 07:45:24 +03:00
if ( length < sizeof ( * hdr ) ) {
ctdb_set_error ( ctdb , " Bad packet length %d \n " , length ) ;
2007-04-18 05:20:24 +04:00
goto done ;
2006-12-01 07:45:24 +03:00
}
if ( length ! = hdr - > length ) {
ctdb_set_error ( ctdb , " Bad header length %d expected %d \n " ,
hdr - > length , length ) ;
2007-04-18 05:20:24 +04:00
goto done ;
2006-12-01 07:45:24 +03:00
}
2007-01-23 03:38:45 +03:00
2007-04-06 08:41:05 +04:00
if ( hdr - > ctdb_magic ! = CTDB_MAGIC ) {
ctdb_set_error ( ctdb , " Non CTDB packet rejected \n " ) ;
2007-04-18 05:20:24 +04:00
goto done ;
2007-04-06 08:41:05 +04:00
}
if ( hdr - > ctdb_version ! = CTDB_VERSION ) {
ctdb_set_error ( ctdb , " Bad CTDB version 0x%x rejected \n " , hdr - > ctdb_version ) ;
2007-04-18 05:20:24 +04:00
goto done ;
2007-04-06 08:41:05 +04:00
}
2007-04-17 19:59:39 +04:00
DEBUG ( 3 , ( __location__ " ctdb request %d of type %d length %d from "
" node %d to %d \n " , hdr - > reqid , hdr - > operation , hdr - > length ,
hdr - > srcnode , hdr - > destnode ) ) ;
2007-04-17 16:13:06 +04:00
2006-12-01 07:45:24 +03:00
switch ( hdr - > operation ) {
case CTDB_REQ_CALL :
2007-04-27 19:48:31 +04:00
/* verify that the remote node that sent us the call
is running in the same generation instance as this node
*/
if ( ctdb - > vnn_map - > generation ! = hdr - > generation ) {
DEBUG ( 0 , ( __location__ " ctdb request %d of type "
" %d length %d from node %d to %d had an "
" invalid generation id:%d while our "
" generation id is:%d \n " ,
hdr - > reqid , hdr - > operation , hdr - > length ,
hdr - > srcnode , hdr - > destnode ,
ctdb - > vnn_map - > generation ,
hdr - > generation ) ) ;
break ;
}
2007-04-20 15:02:53 +04:00
ctdb - > status . count . req_call + + ;
2006-12-01 07:45:24 +03:00
ctdb_request_call ( ctdb , hdr ) ;
break ;
case CTDB_REPLY_CALL :
2007-04-20 15:02:53 +04:00
ctdb - > status . count . reply_call + + ;
2006-12-01 07:45:24 +03:00
ctdb_reply_call ( ctdb , hdr ) ;
break ;
2006-12-18 06:27:20 +03:00
case CTDB_REPLY_ERROR :
2007-04-20 15:02:53 +04:00
ctdb - > status . count . reply_error + + ;
2006-12-18 06:27:20 +03:00
ctdb_reply_error ( ctdb , hdr ) ;
break ;
2006-12-18 08:01:11 +03:00
case CTDB_REQ_DMASTER :
2007-04-20 15:02:53 +04:00
ctdb - > status . count . req_dmaster + + ;
2006-12-18 08:01:11 +03:00
ctdb_request_dmaster ( ctdb , hdr ) ;
break ;
case CTDB_REPLY_DMASTER :
2007-04-20 15:02:53 +04:00
ctdb - > status . count . reply_dmaster + + ;
2006-12-18 08:01:11 +03:00
ctdb_reply_dmaster ( ctdb , hdr ) ;
break ;
2007-02-09 01:42:04 +03:00
case CTDB_REQ_MESSAGE :
2007-04-20 15:02:53 +04:00
ctdb - > status . count . req_message + + ;
2007-02-09 01:42:04 +03:00
ctdb_request_message ( ctdb , hdr ) ;
break ;
2007-04-18 05:55:54 +04:00
case CTDB_REQ_FINISHED :
2007-04-20 15:02:53 +04:00
ctdb - > status . count . req_finished + + ;
2007-04-18 05:55:54 +04:00
ctdb_request_finished ( ctdb , hdr ) ;
break ;
2007-04-26 16:27:49 +04:00
case CTDB_REQ_CONTROL :
ctdb - > status . count . req_control + + ;
ctdb_request_control ( ctdb , hdr ) ;
break ;
case CTDB_REPLY_CONTROL :
ctdb - > status . count . reply_control + + ;
ctdb_reply_control ( ctdb , hdr ) ;
break ;
2006-12-01 07:45:24 +03:00
default :
2007-04-17 16:13:06 +04:00
DEBUG ( 0 , ( " %s: Packet with unknown operation %d \n " ,
__location__ , hdr - > operation ) ) ;
2006-12-01 07:45:24 +03:00
break ;
}
2007-04-18 05:20:24 +04:00
done :
talloc_free ( tmp_ctx ) ;
2006-11-27 13:38:13 +03:00
}
2007-04-19 10:27:56 +04:00
/*
called by the transport layer when a packet comes in
*/
void ctdb_recv_raw_pkt ( void * p , uint8_t * data , uint32_t length )
{
struct ctdb_context * ctdb = talloc_get_type ( p , struct ctdb_context ) ;
ctdb_recv_pkt ( ctdb , data , length ) ;
}
2006-11-28 03:51:33 +03:00
/*
2006-11-28 09:56:10 +03:00
called by the transport layer when a node is dead
2006-11-28 03:51:33 +03:00
*/
2006-11-28 09:56:10 +03:00
static void ctdb_node_dead ( struct ctdb_node * node )
2006-11-28 03:51:33 +03:00
{
2006-12-01 00:58:08 +03:00
node - > ctdb - > num_connected - - ;
2007-04-28 14:40:26 +04:00
node - > flags & = ~ NODE_FLAGS_CONNECTED ;
2007-04-17 16:13:06 +04:00
DEBUG ( 1 , ( " %s: node %s is dead: %d connected \n " ,
node - > ctdb - > name , node - > name , node - > ctdb - > num_connected ) ) ;
2006-11-28 03:51:33 +03:00
}
2006-11-28 06:15:46 +03:00
/*
2007-02-07 05:26:07 +03:00
called by the transport layer when a node is connected
2006-11-28 06:15:46 +03:00
*/
2006-11-28 09:56:10 +03:00
static void ctdb_node_connected ( struct ctdb_node * node )
2006-11-28 06:15:46 +03:00
{
2006-12-01 00:58:08 +03:00
node - > ctdb - > num_connected + + ;
2007-04-28 14:40:26 +04:00
node - > flags | = NODE_FLAGS_CONNECTED ;
2007-04-17 16:13:06 +04:00
DEBUG ( 1 , ( " %s: connected to %s - %d connected \n " ,
node - > ctdb - > name , node - > name , node - > ctdb - > num_connected ) ) ;
2006-12-01 00:58:08 +03:00
}
/*
wait for all nodes to be connected
*/
2007-04-11 08:54:47 +04:00
void ctdb_daemon_connect_wait ( struct ctdb_context * ctdb )
2006-12-01 00:58:08 +03:00
{
2006-12-01 07:45:24 +03:00
int expected = ctdb - > num_nodes - 1 ;
if ( ctdb - > flags & CTDB_FLAG_SELF_CONNECT ) {
expected + + ;
}
while ( ctdb - > num_connected ! = expected ) {
2007-04-17 16:39:23 +04:00
DEBUG ( 3 , ( " ctdb_connect_wait: waiting for %d nodes (have %d) \n " ,
expected , ctdb - > num_connected ) ) ;
2006-12-01 07:45:24 +03:00
event_loop_once ( ctdb - > ev ) ;
}
2007-04-17 16:39:23 +04:00
DEBUG ( 3 , ( " ctdb_connect_wait: got all %d nodes \n " , expected ) ) ;
2006-12-01 07:45:24 +03:00
}
2007-04-20 11:58:37 +04:00
struct queue_next {
struct ctdb_context * ctdb ;
struct ctdb_req_header * hdr ;
} ;
/*
trigered when a deferred packet is due
*/
static void queue_next_trigger ( struct event_context * ev , struct timed_event * te ,
struct timeval t , void * private_data )
{
struct queue_next * q = talloc_get_type ( private_data , struct queue_next ) ;
ctdb_recv_pkt ( q - > ctdb , ( uint8_t * ) q - > hdr , q - > hdr - > length ) ;
talloc_free ( q ) ;
}
/*
defer a packet , so it is processed on the next event loop
this is used for sending packets to ourselves
*/
static void ctdb_defer_packet ( struct ctdb_context * ctdb , struct ctdb_req_header * hdr )
{
struct queue_next * q ;
q = talloc ( ctdb , struct queue_next ) ;
if ( q = = NULL ) {
DEBUG ( 0 , ( __location__ " Failed to allocate deferred packet \n " ) ) ;
return ;
}
q - > ctdb = ctdb ;
q - > hdr = talloc_memdup ( ctdb , hdr , hdr - > length ) ;
if ( q - > hdr = = NULL ) {
DEBUG ( 0 , ( " Error copying deferred packet to self \n " ) ) ;
return ;
}
2007-04-22 16:26:45 +04:00
#if 0
/* use this to put packets directly into our recv function */
ctdb_recv_pkt ( q - > ctdb , ( uint8_t * ) q - > hdr , q - > hdr - > length ) ;
talloc_free ( q ) ;
# else
2007-04-20 11:58:37 +04:00
event_add_timed ( ctdb - > ev , q , timeval_zero ( ) , queue_next_trigger , q ) ;
2007-04-22 16:26:45 +04:00
# endif
2007-04-20 11:58:37 +04:00
}
2007-02-09 01:42:04 +03:00
/*
queue a packet or die
*/
void ctdb_queue_packet ( struct ctdb_context * ctdb , struct ctdb_req_header * hdr )
{
struct ctdb_node * node ;
2007-04-20 14:07:47 +04:00
ctdb - > status . node_packets_sent + + ;
2007-04-26 17:38:33 +04:00
if ( ! ctdb_validate_vnn ( ctdb , hdr - > destnode ) ) {
DEBUG ( 0 , ( __location__ " cant send to node %u that does not exist \n " ,
hdr - > destnode ) ) ;
return ;
}
2007-02-09 01:42:04 +03:00
node = ctdb - > nodes [ hdr - > destnode ] ;
2007-04-26 17:38:33 +04:00
2007-04-20 11:58:37 +04:00
if ( hdr - > destnode = = ctdb - > vnn & & ! ( ctdb - > flags & CTDB_FLAG_SELF_CONNECT ) ) {
ctdb_defer_packet ( ctdb , hdr ) ;
} else if ( ctdb - > methods - > queue_pkt ( node , ( uint8_t * ) hdr , hdr - > length ) ! = 0 ) {
2007-02-09 01:42:04 +03:00
ctdb_fatal ( ctdb , " Unable to queue packet \n " ) ;
}
}
2006-11-28 03:51:33 +03:00
static const struct ctdb_upcalls ctdb_upcalls = {
2006-11-28 09:56:10 +03:00
. recv_pkt = ctdb_recv_pkt ,
. node_dead = ctdb_node_dead ,
. node_connected = ctdb_node_connected
2006-11-28 03:51:33 +03:00
} ;
/*
initialise the ctdb daemon .
NOTE : In current code the daemon does not fork . This is for testing purposes only
and to simplify the code .
*/
struct ctdb_context * ctdb_init ( struct event_context * ev )
{
struct ctdb_context * ctdb ;
ctdb = talloc_zero ( ev , struct ctdb_context ) ;
ctdb - > ev = ev ;
ctdb - > upcalls = & ctdb_upcalls ;
2006-11-28 12:48:34 +03:00
ctdb - > idr = idr_init ( ctdb ) ;
2007-01-23 03:38:45 +03:00
ctdb - > max_lacount = CTDB_DEFAULT_MAX_LACOUNT ;
2006-11-28 03:51:33 +03:00
return ctdb ;
}