2006-11-18 02:41:20 +03:00
/*
ctdb over TCP
Copyright ( C ) Andrew Tridgell 2006
This library is free software ; you can redistribute it and / or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation ; either
version 2 of the License , or ( at your option ) any later version .
This library is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
Lesser General Public License for more details .
You should have received a copy of the GNU Lesser General Public
License along with this library ; if not , write to the Free Software
Foundation , Inc . , 59 Temple Place , Suite 330 , Boston , MA 02111 - 1307 USA
*/
# include "includes.h"
# include "lib/events/events.h"
2007-01-23 03:38:45 +03:00
# include "lib/tdb/include/tdb.h"
2006-11-18 02:41:20 +03:00
# include "system/network.h"
# include "system/filesys.h"
2007-01-23 03:38:45 +03:00
# include "../include/ctdb_private.h"
2006-11-27 13:38:13 +03:00
# include "ctdb_tcp.h"
2006-11-18 02:41:20 +03:00
2006-11-28 06:15:46 +03:00
static void set_nonblocking ( int fd )
{
unsigned v ;
v = fcntl ( fd , F_GETFL , 0 ) ;
fcntl ( fd , F_SETFL , v | O_NONBLOCK ) ;
}
2007-04-10 13:33:21 +04:00
/*
called when a complete packet has come in - should not happen on this socket
*/
2007-04-13 14:38:24 +04:00
void ctdb_tcp_tnode_cb ( uint8_t * data , size_t cnt , void * private_data )
2007-04-10 13:33:21 +04:00
{
2007-04-13 14:38:24 +04:00
struct ctdb_node * node = talloc_get_type ( private_data , struct ctdb_node ) ;
2007-04-11 22:12:15 +04:00
struct ctdb_tcp_node * tnode = talloc_get_type (
node - > private_data , struct ctdb_tcp_node ) ;
2007-04-10 13:33:21 +04:00
2007-04-17 13:41:29 +04:00
if ( data = = NULL ) {
node - > ctdb - > upcalls - > node_dead ( node ) ;
}
2007-04-10 13:33:21 +04:00
/* start a new connect cycle to try to re-establish the
link */
ctdb_queue_set_fd ( tnode - > queue , - 1 ) ;
2007-05-25 16:07:45 +04:00
tnode - > fd = - 1 ;
2007-04-10 13:33:21 +04:00
event_add_timed ( node - > ctdb - > ev , node , timeval_zero ( ) ,
ctdb_tcp_node_connect , node ) ;
}
2006-11-18 02:41:20 +03:00
/*
called when socket becomes writeable on connect
*/
static void ctdb_node_connect_write ( struct event_context * ev , struct fd_event * fde ,
2007-04-11 22:12:15 +04:00
uint16_t flags , void * private_data )
2006-11-18 02:41:20 +03:00
{
2007-04-11 22:12:15 +04:00
struct ctdb_node * node = talloc_get_type ( private_data ,
struct ctdb_node ) ;
struct ctdb_tcp_node * tnode = talloc_get_type ( node - > private_data ,
2006-11-27 13:38:13 +03:00
struct ctdb_tcp_node ) ;
2006-11-18 02:41:20 +03:00
struct ctdb_context * ctdb = node - > ctdb ;
2006-11-28 09:56:10 +03:00
int error = 0 ;
2006-12-01 07:45:24 +03:00
socklen_t len = sizeof ( error ) ;
2007-02-20 06:57:13 +03:00
int one = 1 ;
2006-11-18 02:41:20 +03:00
2007-05-25 16:07:45 +04:00
talloc_free ( tnode - > connect_te ) ;
tnode - > connect_te = NULL ;
2006-11-27 13:38:13 +03:00
if ( getsockopt ( tnode - > fd , SOL_SOCKET , SO_ERROR , & error , & len ) ! = 0 | |
2006-11-18 07:27:36 +03:00
error ! = 0 ) {
talloc_free ( fde ) ;
2006-11-27 13:38:13 +03:00
close ( tnode - > fd ) ;
tnode - > fd = - 1 ;
2006-11-18 02:41:20 +03:00
event_add_timed ( ctdb - > ev , node , timeval_current_ofs ( 1 , 0 ) ,
2006-11-28 03:51:33 +03:00
ctdb_tcp_node_connect , node ) ;
2006-11-18 02:41:20 +03:00
return ;
}
talloc_free ( fde ) ;
2007-04-10 13:33:21 +04:00
setsockopt ( tnode - > fd , IPPROTO_TCP , TCP_NODELAY , ( char * ) & one , sizeof ( one ) ) ;
2007-05-15 12:40:56 +04:00
setsockopt ( tnode - > fd , SOL_SOCKET , SO_KEEPALIVE , ( char * ) & one , sizeof ( one ) ) ;
2007-04-10 13:33:21 +04:00
2007-04-10 14:48:31 +04:00
ctdb_queue_set_fd ( tnode - > queue , tnode - > fd ) ;
2006-11-28 09:56:10 +03:00
/* tell the ctdb layer we are connected */
node - > ctdb - > upcalls - > node_connected ( node ) ;
2006-11-18 02:41:20 +03:00
}
2007-02-20 05:22:18 +03:00
static int ctdb_tcp_get_address ( struct ctdb_context * ctdb ,
const char * address , struct in_addr * addr )
{
if ( inet_pton ( AF_INET , address , addr ) < = 0 ) {
struct hostent * he = gethostbyname ( address ) ;
if ( he = = NULL | | he - > h_length > sizeof ( * addr ) ) {
ctdb_set_error ( ctdb , " invalid nework address '%s' \n " ,
address ) ;
return - 1 ;
}
memcpy ( addr , he - > h_addr , he - > h_length ) ;
}
return 0 ;
}
2006-11-18 02:41:20 +03:00
/*
called when we should try and establish a tcp connection to a node
*/
2006-11-28 03:51:33 +03:00
void ctdb_tcp_node_connect ( struct event_context * ev , struct timed_event * te ,
2007-04-11 22:12:15 +04:00
struct timeval t , void * private_data )
2006-11-18 02:41:20 +03:00
{
2007-04-11 22:12:15 +04:00
struct ctdb_node * node = talloc_get_type ( private_data ,
struct ctdb_node ) ;
struct ctdb_tcp_node * tnode = talloc_get_type ( node - > private_data ,
2006-11-27 13:38:13 +03:00
struct ctdb_tcp_node ) ;
2006-11-18 02:41:20 +03:00
struct ctdb_context * ctdb = node - > ctdb ;
2007-04-06 03:08:41 +04:00
struct sockaddr_in sock_in ;
2006-11-18 02:41:20 +03:00
struct sockaddr_in sock_out ;
2007-05-25 16:07:45 +04:00
if ( tnode - > fd ! = - 1 ) {
talloc_free ( tnode - > connect_fde ) ;
tnode - > connect_fde = NULL ;
close ( tnode - > fd ) ;
tnode - > fd = - 1 ;
}
2006-11-27 13:38:13 +03:00
tnode - > fd = socket ( PF_INET , SOCK_STREAM , IPPROTO_TCP ) ;
2006-11-18 02:41:20 +03:00
2006-11-28 06:15:46 +03:00
set_nonblocking ( tnode - > fd ) ;
2006-11-18 02:41:20 +03:00
2007-05-15 03:42:52 +04:00
ZERO_STRUCT ( sock_out ) ;
# ifdef HAVE_SOCK_SIN_LEN
sock_out . sin_len = sizeof ( sock_out ) ;
# endif
2007-02-20 05:22:18 +03:00
if ( ctdb_tcp_get_address ( ctdb , node - > address . address , & sock_out . sin_addr ) ! = 0 ) {
return ;
}
2006-11-18 03:21:40 +03:00
sock_out . sin_port = htons ( node - > address . port ) ;
2006-11-18 02:41:20 +03:00
sock_out . sin_family = PF_INET ;
2007-04-06 03:08:41 +04:00
/* Bind our side of the socketpair to the same address we use to listen
* on incoming CTDB traffic .
* We must specify this address to make sure that the address we expose to
* the remote side is actually routable in case CTDB traffic will run on
* a dedicated non - routeable network .
*/
2007-05-15 03:42:52 +04:00
ZERO_STRUCT ( sock_in ) ;
# ifdef HAVE_SOCK_SIN_LEN
sock_in . sin_len = sizeof ( sock_in ) ;
# endif
2007-04-06 03:08:41 +04:00
if ( ctdb_tcp_get_address ( ctdb , ctdb - > address . address , & sock_in . sin_addr ) ! = 0 ) {
return ;
}
sock_in . sin_port = htons ( 0 ) ; /* INPORT_ANY is not always available */
sock_in . sin_family = PF_INET ;
bind ( tnode - > fd , ( struct sockaddr * ) & sock_in , sizeof ( sock_in ) ) ;
2007-01-23 03:38:45 +03:00
if ( connect ( tnode - > fd , ( struct sockaddr * ) & sock_out , sizeof ( sock_out ) ) ! = 0 & &
2006-11-18 02:41:20 +03:00
errno ! = EINPROGRESS ) {
/* try again once a second */
2006-11-27 13:38:13 +03:00
close ( tnode - > fd ) ;
2007-05-15 08:08:58 +04:00
tnode - > fd = - 1 ;
2006-11-18 02:41:20 +03:00
event_add_timed ( ctdb - > ev , node , timeval_current_ofs ( 1 , 0 ) ,
2006-11-28 03:51:33 +03:00
ctdb_tcp_node_connect , node ) ;
2006-11-18 02:41:20 +03:00
return ;
}
/* non-blocking connect - wait for write event */
2007-05-25 16:07:45 +04:00
tnode - > connect_fde = event_add_fd ( node - > ctdb - > ev , node , tnode - > fd ,
EVENT_FD_WRITE | EVENT_FD_READ ,
ctdb_node_connect_write , node ) ;
/* don't give it long to connect - retry in one second. This ensures
that we find a node is up quickly ( tcp normally backs off a syn reply
delay by quite a lot ) */
tnode - > connect_te = event_add_timed ( ctdb - > ev , node , timeval_current_ofs ( 1 , 0 ) ,
ctdb_tcp_node_connect , node ) ;
2006-11-18 02:41:20 +03:00
}
2006-11-18 05:45:04 +03:00
/*
called when we get contacted by another node
currently makes no attempt to check if the connection is really from a ctdb
node in our cluster
*/
static void ctdb_listen_event ( struct event_context * ev , struct fd_event * fde ,
2007-04-13 14:38:24 +04:00
uint16_t flags , void * private_data )
2006-11-18 05:45:04 +03:00
{
struct ctdb_context * ctdb ;
2006-11-27 13:38:13 +03:00
struct ctdb_tcp * ctcp ;
2006-11-18 07:27:36 +03:00
struct sockaddr_in addr ;
2006-11-18 05:45:04 +03:00
socklen_t len ;
int fd ;
struct ctdb_incoming * in ;
2007-05-15 12:40:56 +04:00
int one = 1 ;
2006-11-18 05:45:04 +03:00
2007-04-13 14:38:24 +04:00
ctdb = talloc_get_type ( private_data , struct ctdb_context ) ;
2007-04-11 22:12:15 +04:00
ctcp = talloc_get_type ( ctdb - > private_data , struct ctdb_tcp ) ;
2006-11-18 07:27:36 +03:00
memset ( & addr , 0 , sizeof ( addr ) ) ;
len = sizeof ( addr ) ;
2006-11-27 13:38:13 +03:00
fd = accept ( ctcp - > listen_fd , ( struct sockaddr * ) & addr , & len ) ;
2006-11-18 05:45:04 +03:00
if ( fd = = - 1 ) return ;
2006-12-19 04:03:10 +03:00
in = talloc_zero ( ctdb , struct ctdb_incoming ) ;
2006-11-18 05:45:04 +03:00
in - > fd = fd ;
in - > ctdb = ctdb ;
2006-11-28 06:15:46 +03:00
set_nonblocking ( in - > fd ) ;
2007-05-15 12:40:56 +04:00
setsockopt ( in - > fd , SOL_SOCKET , SO_KEEPALIVE , ( char * ) & one , sizeof ( one ) ) ;
2007-04-10 13:33:21 +04:00
in - > queue = ctdb_queue_setup ( ctdb , in , in - > fd , CTDB_TCP_ALIGNMENT ,
ctdb_tcp_read_cb , in ) ;
2006-11-18 05:45:04 +03:00
}
/*
2007-05-01 00:34:55 +04:00
automatically find which address to listen on
2006-11-18 05:45:04 +03:00
*/
2007-05-01 00:34:55 +04:00
static int ctdb_tcp_listen_automatic ( struct ctdb_context * ctdb )
2006-11-18 05:45:04 +03:00
{
2007-04-11 22:12:15 +04:00
struct ctdb_tcp * ctcp = talloc_get_type ( ctdb - > private_data ,
struct ctdb_tcp ) ;
2006-11-18 05:45:04 +03:00
struct sockaddr_in sock ;
2007-05-01 00:34:55 +04:00
int lock_fd , i ;
const char * lock_path = " /tmp/.ctdb_socket_lock " ;
struct flock lock ;
/* in order to ensure that we don't get two nodes with the
same adddress , we must make the bind ( ) and listen ( ) calls
atomic . The SO_REUSEADDR setsockopt only prevents double
binds if the first socket is in LISTEN state */
lock_fd = open ( lock_path , O_RDWR | O_CREAT , 0666 ) ;
if ( lock_fd = = - 1 ) {
DEBUG ( 0 , ( " Unable to open %s \n " , lock_path ) ) ;
return - 1 ;
}
lock . l_type = F_WRLCK ;
lock . l_whence = SEEK_SET ;
lock . l_start = 0 ;
lock . l_len = 1 ;
lock . l_pid = 0 ;
2006-11-18 05:45:04 +03:00
2007-05-01 00:34:55 +04:00
if ( fcntl ( lock_fd , F_SETLKW , & lock ) ! = 0 ) {
DEBUG ( 0 , ( " Unable to lock %s \n " , lock_path ) ) ;
close ( lock_fd ) ;
2007-02-20 05:22:18 +03:00
return - 1 ;
}
2006-11-18 05:45:04 +03:00
2007-05-01 00:34:55 +04:00
for ( i = 0 ; i < ctdb - > num_nodes ; i + + ) {
2007-05-15 03:42:52 +04:00
ZERO_STRUCT ( sock ) ;
# ifdef HAVE_SOCK_SIN_LEN
sock . sin_len = sizeof ( sock ) ;
# endif
2007-05-01 00:34:55 +04:00
sock . sin_port = htons ( ctdb - > nodes [ i ] - > address . port ) ;
sock . sin_family = PF_INET ;
if ( ctdb_tcp_get_address ( ctdb , ctdb - > nodes [ i ] - > address . address ,
& sock . sin_addr ) ! = 0 ) {
continue ;
}
if ( bind ( ctcp - > listen_fd , ( struct sockaddr * ) & sock ,
sizeof ( sock ) ) = = 0 ) {
break ;
}
}
if ( i = = ctdb - > num_nodes ) {
DEBUG ( 0 , ( " Unable to bind to any of the node addresses - giving up \n " ) ) ;
goto failed ;
}
ctdb - > address = ctdb - > nodes [ i ] - > address ;
ctdb - > name = talloc_asprintf ( ctdb , " %s:%u " ,
ctdb - > address . address ,
ctdb - > address . port ) ;
ctdb - > vnn = ctdb - > nodes [ i ] - > vnn ;
ctdb - > nodes [ i ] - > flags | = NODE_FLAGS_CONNECTED ;
2007-05-01 07:25:02 +04:00
DEBUG ( 1 , ( " ctdb chose network address %s:%u vnn %u \n " ,
2007-05-01 00:34:55 +04:00
ctdb - > address . address ,
ctdb - > address . port ,
ctdb - > vnn ) ) ;
if ( listen ( ctcp - > listen_fd , 10 ) = = - 1 ) {
goto failed ;
}
event_add_fd ( ctdb - > ev , ctdb , ctcp - > listen_fd , EVENT_FD_READ ,
ctdb_listen_event , ctdb ) ;
close ( lock_fd ) ;
return 0 ;
failed :
close ( lock_fd ) ;
close ( ctcp - > listen_fd ) ;
ctcp - > listen_fd = - 1 ;
return - 1 ;
}
/*
listen on our own address
*/
int ctdb_tcp_listen ( struct ctdb_context * ctdb )
{
struct ctdb_tcp * ctcp = talloc_get_type ( ctdb - > private_data ,
struct ctdb_tcp ) ;
struct sockaddr_in sock ;
int one = 1 ;
ctcp - > listen_fd = socket ( PF_INET , SOCK_STREAM , IPPROTO_TCP ) ;
if ( ctcp - > listen_fd = = - 1 ) {
2006-11-18 05:45:04 +03:00
ctdb_set_error ( ctdb , " socket failed \n " ) ;
return - 1 ;
2007-05-01 00:34:55 +04:00
}
2006-11-18 05:45:04 +03:00
2006-11-27 13:38:13 +03:00
setsockopt ( ctcp - > listen_fd , SOL_SOCKET , SO_REUSEADDR , ( char * ) & one , sizeof ( one ) ) ;
2006-11-18 05:45:04 +03:00
2007-05-01 00:34:55 +04:00
/* we can either auto-bind to the first available address, or we can
use a specified address */
if ( ! ctdb - > address . address ) {
return ctdb_tcp_listen_automatic ( ctdb ) ;
}
2007-05-15 03:42:52 +04:00
ZERO_STRUCT ( sock ) ;
# ifdef HAVE_SOCK_SIN_LEN
sock . sin_len = sizeof ( sock ) ;
# endif
2007-05-01 00:34:55 +04:00
sock . sin_port = htons ( ctdb - > address . port ) ;
sock . sin_family = PF_INET ;
if ( ctdb_tcp_get_address ( ctdb , ctdb - > address . address ,
& sock . sin_addr ) ! = 0 ) {
goto failed ;
}
if ( bind ( ctcp - > listen_fd , ( struct sockaddr * ) & sock , sizeof ( sock ) ) ! = 0 ) {
goto failed ;
}
2006-11-18 05:45:04 +03:00
2006-11-27 13:38:13 +03:00
if ( listen ( ctcp - > listen_fd , 10 ) = = - 1 ) {
2007-05-01 00:34:55 +04:00
goto failed ;
2006-11-18 05:45:04 +03:00
}
2006-11-27 13:38:13 +03:00
event_add_fd ( ctdb - > ev , ctdb , ctcp - > listen_fd , EVENT_FD_READ ,
2006-11-18 05:45:04 +03:00
ctdb_listen_event , ctdb ) ;
return 0 ;
2007-05-01 00:34:55 +04:00
failed :
close ( ctcp - > listen_fd ) ;
ctcp - > listen_fd = - 1 ;
return - 1 ;
2006-11-18 05:45:04 +03:00
}