2004-06-24 12:02:38 +04:00
/******************************************************************************
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* *
* * Copyright ( C ) Sistina Software , Inc . 2002 - 2003 All rights reserved .
2005-02-22 19:26:21 +03:00
* * Copyright ( C ) 2004 - 2005 Red Hat , Inc . All rights reserved .
2004-06-24 12:02:38 +04:00
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/* This provides the interface between clvmd and gulm as the cluster
* and lock manager .
*
* It also provides the " liblm " functions too as it ' s hard ( and pointless )
* to seperate them out when using gulm .
*
* What it does / not / provide is the communications between clvmd daemons
* on the cluster nodes . That is done in tcp - comms . c
*/
# include <pthread.h>
# include <sys/types.h>
# include <sys/utsname.h>
# include <sys/ioctl.h>
# include <sys/socket.h>
# include <sys/stat.h>
# include <sys/file.h>
# include <sys/socket.h>
# include <netinet/in.h>
# include <stdio.h>
# include <stdlib.h>
# include <stdint.h>
# include <signal.h>
# include <fcntl.h>
# include <string.h>
# include <stddef.h>
# include <stdint.h>
# include <unistd.h>
# include <errno.h>
# include <utmpx.h>
# include <syslog.h>
# include <assert.h>
2007-04-27 21:46:16 +04:00
# include <libdevmapper.h>
# include <ccs.h>
# include <libgulm.h>
2004-06-24 12:02:38 +04:00
# include "list.h"
# include "locking.h"
# include "log.h"
# include "clvm.h"
# include "clvmd-comms.h"
2005-02-22 19:26:21 +03:00
# include "lvm-functions.h"
2004-06-24 12:02:38 +04:00
# include "clvmd.h"
# include "clvmd-gulm.h"
/* Hash list of nodes in the cluster */
2005-11-09 12:24:10 +03:00
static struct dm_hash_table * node_hash ;
2004-06-24 12:02:38 +04:00
/* hash list of outstanding lock requests */
2005-11-09 12:24:10 +03:00
static struct dm_hash_table * lock_hash ;
2004-06-24 12:02:38 +04:00
2005-04-13 17:50:07 +04:00
/* Copy of the current quorate state */
static uint8_t gulm_quorate = 0 ;
static enum { INIT_NOTDONE , INIT_DONE , INIT_WAITQUORATE } init_state = INIT_NOTDONE ;
2004-06-24 12:02:38 +04:00
/* Number of active nodes */
static int num_nodes ;
static char * cluster_name ;
2005-02-22 19:26:21 +03:00
static int in_shutdown = 0 ;
2004-06-24 12:02:38 +04:00
static pthread_mutex_t lock_start_mutex ;
static volatile int lock_start_flag ;
struct node_info
{
enum { NODE_UNKNOWN , NODE_DOWN , NODE_UP , NODE_CLVMD } state ;
2005-01-13 16:24:02 +03:00
char name [ GULM_MAX_CLUSTER_MEMBER_NAME_LEN ] ;
2004-06-24 12:02:38 +04:00
} ;
struct lock_wait
{
pthread_cond_t cond ;
pthread_mutex_t mutex ;
int status ;
} ;
/* Forward */
2007-07-24 19:35:11 +04:00
static int read_from_core_sock ( struct local_client * client , char * buf , int len , const char * csid ,
2004-06-24 12:02:38 +04:00
struct local_client * * new_client ) ;
2007-07-24 19:35:11 +04:00
static int read_from_lock_sock ( struct local_client * client , char * buf , int len , const char * csid ,
2004-06-24 12:02:38 +04:00
struct local_client * * new_client ) ;
static int get_all_cluster_nodes ( void ) ;
2007-07-24 19:35:11 +04:00
static int _csid_from_name ( char * csid , const char * name ) ;
2005-01-13 16:24:02 +03:00
static void _cluster_closedown ( void ) ;
2004-06-24 12:02:38 +04:00
/* In tcp-comms.c */
2005-11-09 12:24:10 +03:00
extern struct dm_hash_table * sock_hash ;
2004-06-24 12:02:38 +04:00
static int add_internal_client ( int fd , fd_callback_t callback )
{
struct local_client * client ;
DEBUGLOG ( " Add_internal_client, fd = %d \n " , fd ) ;
/* Add a GULM file descriptor it to the main loop */
client = malloc ( sizeof ( struct local_client ) ) ;
if ( ! client )
{
DEBUGLOG ( " malloc failed \n " ) ;
return - 1 ;
}
memset ( client , 0 , sizeof ( struct local_client ) ) ;
client - > fd = fd ;
client - > type = CLUSTER_INTERNAL ;
client - > callback = callback ;
add_client ( client ) ;
2005-03-07 20:03:44 +03:00
/* Set Close-on-exec */
fcntl ( fd , F_SETFD , 1 ) ;
2004-06-24 12:02:38 +04:00
return 0 ;
}
/* Gulm library handle */
static gulm_interface_p gulm_if ;
static lg_core_callbacks_t core_callbacks ;
static lg_lockspace_callbacks_t lock_callbacks ;
static void badsig_handler ( int sig )
{
DEBUGLOG ( " got sig %d \n " , sig ) ;
2005-01-13 16:24:02 +03:00
_cluster_closedown ( ) ;
2004-06-24 12:02:38 +04:00
exit ( 0 ) ;
}
2005-04-19 14:36:42 +04:00
static void _reread_config ( void )
2004-06-24 12:02:38 +04:00
{
2005-04-19 14:36:42 +04:00
/* Re-read CCS node list */
DEBUGLOG ( " Re-reading CCS config \n " ) ;
get_all_cluster_nodes ( ) ;
2004-06-24 12:02:38 +04:00
}
2005-01-13 16:24:02 +03:00
static int _init_cluster ( void )
2004-06-24 12:02:38 +04:00
{
int status ;
int ccs_h ;
2004-11-03 13:45:07 +03:00
int port = 0 ;
char * portstr ;
2004-06-24 12:02:38 +04:00
/* Get cluster name from CCS */
2005-01-06 14:48:25 +03:00
ccs_h = ccs_force_connect ( NULL , 0 ) ;
if ( ccs_h < 0 )
{
syslog ( LOG_ERR , " Cannot login in to CCSD server \n " ) ;
2004-11-03 13:45:07 +03:00
return - 1 ;
2005-01-06 14:48:25 +03:00
}
2004-11-03 13:45:07 +03:00
2004-06-24 12:02:38 +04:00
ccs_get ( ccs_h , " //cluster/@name " , & cluster_name ) ;
2004-11-03 13:45:07 +03:00
DEBUGLOG ( " got cluster name %s \n " , cluster_name ) ;
2005-01-06 14:48:25 +03:00
if ( ! ccs_get ( ccs_h , " //cluster/clvm/@port " , & portstr ) )
2004-11-03 13:45:07 +03:00
{
port = atoi ( portstr ) ;
free ( portstr ) ;
DEBUGLOG ( " got port number %d \n " , port ) ;
if ( port < = 0 & & port > = 65536 )
port = 0 ;
}
2004-06-24 12:02:38 +04:00
ccs_disconnect ( ccs_h ) ;
/* Block locking until we are logged in */
pthread_mutex_init ( & lock_start_mutex , NULL ) ;
pthread_mutex_lock ( & lock_start_mutex ) ;
lock_start_flag = 1 ;
2005-11-09 12:24:10 +03:00
node_hash = dm_hash_create ( 100 ) ;
lock_hash = dm_hash_create ( 10 ) ;
2004-06-24 12:02:38 +04:00
/* Get all nodes from CCS */
2004-11-03 13:45:07 +03:00
if ( get_all_cluster_nodes ( ) )
return - 1 ;
2004-06-24 12:02:38 +04:00
/* Initialise GULM library */
status = lg_initialize ( & gulm_if , cluster_name , " clvmd " ) ;
if ( status )
{
DEBUGLOG ( " lg_initialize failed: %d \n " , status ) ;
return status ;
}
/* Connect to core - we are not "important" :-) */
status = lg_core_login ( gulm_if , 0 ) ;
if ( status )
{
DEBUGLOG ( " lg_core_login failed: %d \n " , status ) ;
return status ;
}
/* Initialise the inter-node comms */
2004-11-03 13:45:07 +03:00
status = init_comms ( port ) ;
2004-06-24 12:02:38 +04:00
if ( status )
return status ;
/* Add core FD to the list */
status = add_internal_client ( lg_core_selector ( gulm_if ) , read_from_core_sock ) ;
if ( status )
{
DEBUGLOG ( " can't allocate client space \n " ) ;
return status ;
}
/* Connect to the lock server */
if ( lg_lock_login ( gulm_if , " CLVM " ) )
{
syslog ( LOG_ERR , " Cannot login in to LOCK server \n " ) ;
DEBUGLOG ( " Cannot login in to LOCK server \n " ) ;
exit ( 88 ) ;
}
/* Add lockspace FD to the list */
status = add_internal_client ( lg_lock_selector ( gulm_if ) , read_from_lock_sock ) ;
if ( status )
{
DEBUGLOG ( " can't allocate client space \n " ) ;
exit ( status ) ;
}
2005-02-21 17:36:09 +03:00
/* Request a list of nodes, we can't really do anything until
2004-06-24 12:02:38 +04:00
this comes back */
status = lg_core_nodelist ( gulm_if ) ;
if ( status )
{
DEBUGLOG ( " lg_core_nodelist failed: %d \n " , status ) ;
return status ;
}
/* So I can kill it without taking GULM down too */
signal ( SIGINT , badsig_handler ) ;
signal ( SIGTERM , badsig_handler ) ;
return 0 ;
}
2005-01-13 16:24:02 +03:00
static void _cluster_closedown ( void )
2004-06-24 12:02:38 +04:00
{
DEBUGLOG ( " cluster_closedown \n " ) ;
2005-02-22 19:26:21 +03:00
in_shutdown = 1 ;
unlock_all ( ) ;
2004-06-24 12:02:38 +04:00
lg_lock_logout ( gulm_if ) ;
lg_core_logout ( gulm_if ) ;
lg_release ( gulm_if ) ;
}
/* Expire locks for a named node, or us */
# define GIO_KEY_SIZE 46
static void drop_expired_locks ( char * nodename )
{
struct utsname nodeinfo ;
uint8_t mask [ GIO_KEY_SIZE ] ;
2005-02-22 19:26:21 +03:00
DEBUGLOG ( " Dropping expired locks for %s \n " , nodename ? nodename : " (null) " ) ;
2004-06-24 12:02:38 +04:00
memset ( mask , 0xff , GIO_KEY_SIZE ) ;
if ( ! nodename )
{
uname ( & nodeinfo ) ;
nodename = nodeinfo . nodename ;
}
if ( lg_lock_drop_exp ( gulm_if , nodename , mask , GIO_KEY_SIZE ) )
{
DEBUGLOG ( " Error calling lg_lock_drop_exp() \n " ) ;
}
}
2007-07-24 19:35:11 +04:00
static int read_from_core_sock ( struct local_client * client , char * buf , int len , const char * csid ,
2004-06-24 12:02:38 +04:00
struct local_client * * new_client )
{
int status ;
* new_client = NULL ;
status = lg_core_handle_messages ( gulm_if , & core_callbacks , NULL ) ;
return status < 0 ? status : 1 ;
}
2007-07-24 19:35:11 +04:00
static int read_from_lock_sock ( struct local_client * client , char * buf , int len , const char * csid ,
2004-06-24 12:02:38 +04:00
struct local_client * * new_client )
{
int status ;
* new_client = NULL ;
status = lg_lock_handle_messages ( gulm_if , & lock_callbacks , NULL ) ;
return status < 0 ? status : 1 ;
}
/* CORE callback routines */
static int core_login_reply ( void * misc , uint64_t gen , uint32_t error , uint32_t rank , uint8_t corestate )
{
DEBUGLOG ( " CORE Got a Login reply. gen:%lld err:%d rank:%d corestate:%d \n " ,
gen , error , rank , corestate ) ;
if ( error )
exit ( error ) ;
2005-04-13 17:50:07 +04:00
/* Get the current core state (for quorum) */
lg_core_corestate ( gulm_if ) ;
2004-06-24 12:02:38 +04:00
return 0 ;
}
static void set_node_state ( struct node_info * ninfo , char * csid , uint8_t nodestate )
{
if ( nodestate = = lg_core_Logged_in )
{
/* Don't clobber NODE_CLVMD state */
if ( ninfo - > state ! = NODE_CLVMD )
{
if ( ninfo - > state = = NODE_UNKNOWN | |
ninfo - > state = = NODE_DOWN )
num_nodes + + ;
ninfo - > state = NODE_UP ;
}
}
else
{
if ( nodestate = = lg_core_Expired | |
nodestate = = lg_core_Fenced | |
nodestate = = lg_core_Logged_out )
{
if ( ninfo - > state ! = NODE_DOWN )
num_nodes - - ;
ninfo - > state = NODE_DOWN ;
}
}
2005-04-13 17:50:07 +04:00
/* Gulm doesn't always send node DOWN events, so even if this a a node UP we must
* assume ( ahem ) that it prevously went down at some time . So we close
* the sockets here to make sure that we don ' t have any dead connections
* to that node .
*/
tcp_remove_client ( csid ) ;
2005-11-10 11:49:29 +03:00
DEBUGLOG ( " set_node_state, '%s' state = %d num_nodes=%d \n " ,
ninfo - > name , ninfo - > state , num_nodes ) ;
2004-06-24 12:02:38 +04:00
}
2004-11-03 13:45:07 +03:00
static struct node_info * add_or_set_node ( char * name , struct in6_addr * ip , uint8_t state )
2004-06-24 12:02:38 +04:00
{
struct node_info * ninfo ;
2005-11-09 12:24:10 +03:00
ninfo = dm_hash_lookup_binary ( node_hash , ( char * ) ip , GULM_MAX_CSID_LEN ) ;
2004-06-24 12:02:38 +04:00
if ( ! ninfo )
{
/* If we can't find that node then re-read the config file in case it
was added after we were started */
DEBUGLOG ( " Node %s not found, re-reading config file \n " , name ) ;
get_all_cluster_nodes ( ) ;
/* Now try again */
2005-11-09 12:24:10 +03:00
ninfo = dm_hash_lookup_binary ( node_hash , ( char * ) ip , GULM_MAX_CSID_LEN ) ;
2004-06-24 12:02:38 +04:00
if ( ! ninfo )
{
DEBUGLOG ( " Ignoring node %s, not part of the SAN cluster \n " , name ) ;
return NULL ;
}
}
2005-01-25 19:46:29 +03:00
set_node_state ( ninfo , ( char * ) ip , state ) ;
2004-06-24 12:02:38 +04:00
return ninfo ;
}
2005-01-13 16:24:02 +03:00
static void _get_our_csid ( char * csid )
{
get_our_gulm_csid ( csid ) ;
}
2004-11-03 13:45:07 +03:00
static int core_nodelist ( void * misc , lglcb_t type , char * name , struct in6_addr * ip , uint8_t state )
2004-06-24 12:02:38 +04:00
{
DEBUGLOG ( " CORE nodelist \n " ) ;
if ( type = = lglcb_start )
{
DEBUGLOG ( " Got Nodelist, start \n " ) ;
}
else
{
if ( type = = lglcb_item )
{
2004-11-03 13:45:07 +03:00
DEBUGLOG ( " Got nodelist, item: %s, %#x \n " , name , state ) ;
2004-06-24 12:02:38 +04:00
add_or_set_node ( name , ip , state ) ;
}
else
{
if ( type = = lglcb_stop )
{
2005-01-13 16:24:02 +03:00
char ourcsid [ GULM_MAX_CSID_LEN ] ;
2004-06-24 12:02:38 +04:00
DEBUGLOG ( " Got Nodelist, stop \n " ) ;
2005-04-13 17:50:07 +04:00
if ( gulm_quorate )
{
clvmd_cluster_init_completed ( ) ;
init_state = INIT_DONE ;
}
else
{
if ( init_state = = INIT_NOTDONE )
init_state = INIT_WAITQUORATE ;
}
2004-06-24 12:02:38 +04:00
/* Mark ourself as up */
2005-01-13 16:24:02 +03:00
_get_our_csid ( ourcsid ) ;
gulm_add_up_node ( ourcsid ) ;
2004-06-24 12:02:38 +04:00
}
else
{
DEBUGLOG ( " Unknown lglcb_t %#x \n " , type ) ;
}
}
}
return 0 ;
}
2004-11-03 13:45:07 +03:00
static int core_statechange ( void * misc , uint8_t corestate , uint8_t quorate , struct in6_addr * masterip , char * mastername )
2004-06-24 12:02:38 +04:00
{
2005-04-13 17:50:07 +04:00
DEBUGLOG ( " CORE Got statechange. quorate:%d, corestate:%x mastername:%s \n " ,
quorate , corestate , mastername ) ;
2004-06-24 12:02:38 +04:00
2005-04-13 17:50:07 +04:00
gulm_quorate = quorate ;
if ( quorate & & init_state = = INIT_WAITQUORATE )
{
clvmd_cluster_init_completed ( ) ;
init_state = INIT_DONE ;
}
2004-06-24 12:02:38 +04:00
return 0 ;
}
2004-11-03 13:45:07 +03:00
static int core_nodechange ( void * misc , char * nodename , struct in6_addr * nodeip , uint8_t nodestate )
2004-06-24 12:02:38 +04:00
{
struct node_info * ninfo ;
2004-11-03 13:45:07 +03:00
DEBUGLOG ( " CORE node change, name=%s, state = %d \n " , nodename , nodestate ) ;
2004-06-24 12:02:38 +04:00
/* If we don't get nodeip here, try a lookup by name */
if ( ! nodeip )
2005-01-13 16:24:02 +03:00
_csid_from_name ( ( char * ) nodeip , nodename ) ;
2004-06-24 12:02:38 +04:00
if ( ! nodeip )
return 0 ;
ninfo = add_or_set_node ( nodename , nodeip , nodestate ) ;
if ( ! ninfo )
return 0 ;
/* Check if we need to drop any expired locks */
if ( ninfo - > state = = NODE_DOWN )
{
drop_expired_locks ( nodename ) ;
}
return 0 ;
}
static int core_error ( void * misc , uint32_t err )
{
DEBUGLOG ( " CORE error: %d \n " , err ) ;
// Not sure what happens here
return 0 ;
}
/* LOCK callback routines */
static int lock_login_reply ( void * misc , uint32_t error , uint8_t which )
{
DEBUGLOG ( " LOCK Got a Login reply. err:%d which:%d \n " ,
error , which ) ;
if ( error )
exit ( error ) ;
/* Drop any expired locks for us that might be hanging around */
drop_expired_locks ( NULL ) ;
/* Enable locking operations in other threads */
if ( lock_start_flag )
{
lock_start_flag = 0 ;
pthread_mutex_unlock ( & lock_start_mutex ) ;
}
2005-04-13 17:50:07 +04:00
2004-06-24 12:02:38 +04:00
return 0 ;
}
2004-11-03 13:45:07 +03:00
static int lock_lock_state ( void * misc , uint8_t * key , uint16_t keylen ,
uint64_t subid , uint64_t start , uint64_t stop ,
uint8_t state , uint32_t flags , uint32_t error ,
2004-06-24 12:02:38 +04:00
uint8_t * LVB , uint16_t LVBlen )
{
struct lock_wait * lwait ;
DEBUGLOG ( " LOCK lock state: %s, error = %d \n " , key , error ) ;
2005-02-22 19:26:21 +03:00
/* No waiting process to wake up when we are shutting down */
if ( in_shutdown )
2005-03-07 20:03:44 +03:00
return 0 ;
2005-02-22 19:26:21 +03:00
2005-11-09 12:24:10 +03:00
lwait = dm_hash_lookup ( lock_hash , key ) ;
2004-06-24 12:02:38 +04:00
if ( ! lwait )
{
DEBUGLOG ( " Can't find hash entry for resource %s \n " , key ) ;
return 0 ;
}
lwait - > status = error ;
pthread_mutex_lock ( & lwait - > mutex ) ;
pthread_cond_signal ( & lwait - > cond ) ;
pthread_mutex_unlock ( & lwait - > mutex ) ;
return 0 ;
}
static int lock_error ( void * misc , uint32_t err )
{
DEBUGLOG ( " LOCK error: %d \n " , err ) ;
// Not sure what happens here
return 0 ;
}
/* CORE callbacks */
static lg_core_callbacks_t core_callbacks = {
. login_reply = core_login_reply ,
. nodelist = core_nodelist ,
. statechange = core_statechange ,
. nodechange = core_nodechange ,
. error = core_error ,
} ;
/* LOCK callbacks */
static lg_lockspace_callbacks_t lock_callbacks = {
. login_reply = lock_login_reply ,
. lock_state = lock_lock_state ,
. error = lock_error ,
} ;
/* Allow tcp-comms to loop round the list of active nodes */
int get_next_node_csid ( void * * context , char * csid )
{
struct node_info * ninfo = NULL ;
/* First node */
if ( ! * context )
{
2005-11-09 12:24:10 +03:00
* context = dm_hash_get_first ( node_hash ) ;
2004-06-24 12:02:38 +04:00
}
else
{
2005-11-09 12:24:10 +03:00
* context = dm_hash_get_next ( node_hash , * context ) ;
2004-06-24 12:02:38 +04:00
}
if ( * context )
2005-11-09 12:24:10 +03:00
ninfo = dm_hash_get_data ( node_hash , * context ) ;
2004-06-24 12:02:38 +04:00
/* Find a node that is UP */
while ( * context & & ninfo - > state = = NODE_DOWN )
{
2005-11-09 12:24:10 +03:00
* context = dm_hash_get_next ( node_hash , * context ) ;
2004-06-24 12:02:38 +04:00
if ( * context )
{
2005-11-09 12:24:10 +03:00
ninfo = dm_hash_get_data ( node_hash , * context ) ;
2004-06-24 12:02:38 +04:00
}
}
if ( ! * context | | ninfo - > state = = NODE_DOWN )
{
return 0 ;
}
2005-11-09 12:24:10 +03:00
memcpy ( csid , dm_hash_get_key ( node_hash , * context ) , GULM_MAX_CSID_LEN ) ;
2004-06-24 12:02:38 +04:00
return 1 ;
}
2007-07-24 19:35:11 +04:00
int gulm_name_from_csid ( const char * csid , char * name )
2004-06-24 12:02:38 +04:00
{
struct node_info * ninfo ;
2005-11-09 12:24:10 +03:00
ninfo = dm_hash_lookup_binary ( node_hash , csid , GULM_MAX_CSID_LEN ) ;
2004-06-24 12:02:38 +04:00
if ( ! ninfo )
{
2004-11-03 13:45:07 +03:00
sprintf ( name , " UNKNOWN %s " , print_csid ( csid ) ) ;
2004-06-24 12:02:38 +04:00
return - 1 ;
}
strcpy ( name , ninfo - > name ) ;
return 0 ;
}
2007-07-24 19:35:11 +04:00
static int _csid_from_name ( char * csid , const char * name )
2004-06-24 12:02:38 +04:00
{
2005-11-09 12:24:10 +03:00
struct dm_hash_node * hn ;
2004-06-24 12:02:38 +04:00
struct node_info * ninfo ;
2005-11-09 12:24:10 +03:00
dm_hash_iterate ( hn , node_hash )
2004-06-24 12:02:38 +04:00
{
2005-11-09 12:24:10 +03:00
ninfo = dm_hash_get_data ( node_hash , hn ) ;
2004-06-24 12:02:38 +04:00
if ( strcmp ( ninfo - > name , name ) = = 0 )
{
2005-11-09 12:24:10 +03:00
memcpy ( csid , dm_hash_get_key ( node_hash , hn ) , GULM_MAX_CSID_LEN ) ;
2004-06-24 12:02:38 +04:00
return 0 ;
}
}
return - 1 ;
}
2005-01-13 16:24:02 +03:00
static int _get_num_nodes ( )
2004-06-24 12:02:38 +04:00
{
DEBUGLOG ( " num_nodes = %d \n " , num_nodes ) ;
return num_nodes ;
}
/* Node is now known to be running a clvmd */
2007-07-24 19:35:11 +04:00
void gulm_add_up_node ( const char * csid )
2004-06-24 12:02:38 +04:00
{
struct node_info * ninfo ;
2005-11-09 12:24:10 +03:00
ninfo = dm_hash_lookup_binary ( node_hash , csid , GULM_MAX_CSID_LEN ) ;
2005-02-22 19:26:21 +03:00
if ( ! ninfo ) {
DEBUGLOG ( " gulm_add_up_node no node_hash entry for csid %s \n " , print_csid ( csid ) ) ;
2004-06-24 12:02:38 +04:00
return ;
2005-02-22 19:26:21 +03:00
}
2004-06-24 12:02:38 +04:00
2005-02-22 19:26:21 +03:00
DEBUGLOG ( " gulm_add_up_node %s \n " , ninfo - > name ) ;
2005-04-13 17:50:07 +04:00
if ( ninfo - > state = = NODE_DOWN )
num_nodes + + ;
2004-06-24 12:02:38 +04:00
ninfo - > state = NODE_CLVMD ;
2005-04-13 17:50:07 +04:00
2004-06-24 12:02:38 +04:00
return ;
}
/* Node is now known to be NOT running a clvmd */
void add_down_node ( char * csid )
{
struct node_info * ninfo ;
2005-11-09 12:24:10 +03:00
ninfo = dm_hash_lookup_binary ( node_hash , csid , GULM_MAX_CSID_LEN ) ;
2004-06-24 12:02:38 +04:00
if ( ! ninfo )
return ;
/* Only set it to UP if it was previously known to be
running clvmd - gulm may set it DOWN quite soon */
if ( ninfo - > state = = NODE_CLVMD )
ninfo - > state = NODE_UP ;
2005-02-22 19:26:21 +03:00
drop_expired_locks ( ninfo - > name ) ;
2004-06-24 12:02:38 +04:00
return ;
}
/* Call a callback for each node, so the caller knows whether it's up or down */
2005-01-13 16:24:02 +03:00
static int _cluster_do_node_callback ( struct local_client * master_client ,
2007-07-24 19:35:11 +04:00
void ( * callback ) ( struct local_client * , const char * csid , int node_up ) )
2004-06-24 12:02:38 +04:00
{
2005-11-09 12:24:10 +03:00
struct dm_hash_node * hn ;
2004-06-24 12:02:38 +04:00
struct node_info * ninfo ;
2005-11-09 12:24:10 +03:00
dm_hash_iterate ( hn , node_hash )
2004-06-24 12:02:38 +04:00
{
2005-01-13 16:24:02 +03:00
char csid [ GULM_MAX_CSID_LEN ] ;
2004-06-24 12:02:38 +04:00
struct local_client * client ;
2005-11-09 12:24:10 +03:00
ninfo = dm_hash_get_data ( node_hash , hn ) ;
memcpy ( csid , dm_hash_get_key ( node_hash , hn ) , GULM_MAX_CSID_LEN ) ;
2004-06-24 12:02:38 +04:00
DEBUGLOG ( " down_callback. node %s, state = %d \n " , ninfo - > name , ninfo - > state ) ;
2005-11-09 12:24:10 +03:00
client = dm_hash_lookup_binary ( sock_hash , csid , GULM_MAX_CSID_LEN ) ;
2005-02-22 19:26:21 +03:00
if ( ! client )
{
/* If it's up but not connected, try to make contact */
if ( ninfo - > state = = NODE_UP )
gulm_connect_csid ( csid , & client ) ;
2005-11-09 12:24:10 +03:00
client = dm_hash_lookup_binary ( sock_hash , csid , GULM_MAX_CSID_LEN ) ;
2005-02-22 19:26:21 +03:00
}
if ( ninfo - > state ! = NODE_DOWN )
callback ( master_client , csid , ninfo - > state = = NODE_CLVMD ) ;
2004-06-24 12:02:38 +04:00
}
return 0 ;
}
/* Convert gulm error codes to unix errno numbers */
static int gulm_to_errno ( int gulm_ret )
{
switch ( gulm_ret )
{
case lg_err_TryFailed :
case lg_err_AlreadyPend :
2005-02-17 15:56:19 +03:00
errno = EAGAIN ;
break ;
2004-06-24 12:02:38 +04:00
/* More?? */
default :
2005-02-17 15:56:19 +03:00
errno = EINVAL ;
2004-06-24 12:02:38 +04:00
}
return gulm_ret ? - 1 : 0 ;
}
/* Real locking */
static int _lock_resource ( char * resource , int mode , int flags , int * lockid )
{
int status ;
struct lock_wait lwait ;
/* Wait until the lock module is ready */
if ( lock_start_flag )
{
pthread_mutex_lock ( & lock_start_mutex ) ;
pthread_mutex_unlock ( & lock_start_mutex ) ;
}
pthread_cond_init ( & lwait . cond , NULL ) ;
pthread_mutex_init ( & lwait . mutex , NULL ) ;
pthread_mutex_lock ( & lwait . mutex ) ;
/* This needs to be converted from DLM/LVM2 value for GULM */
2006-12-11 17:00:26 +03:00
if ( flags & LKF_NOQUEUE ) flags = lg_lock_flag_Try ;
2004-06-24 12:02:38 +04:00
2005-11-09 12:24:10 +03:00
dm_hash_insert ( lock_hash , resource , & lwait ) ;
2004-06-24 12:02:38 +04:00
DEBUGLOG ( " lock_resource '%s', flags=%d, mode=%d \n " , resource , flags , mode ) ;
status = lg_lock_state_req ( gulm_if , resource , strlen ( resource ) + 1 ,
2004-11-03 13:45:07 +03:00
0 , 0 , 0 ,
2004-06-24 12:02:38 +04:00
mode , flags , NULL , 0 ) ;
if ( status )
{
DEBUGLOG ( " lg_lock_state returned %d \n " , status ) ;
return status ;
}
/* Wait for it to complete */
pthread_cond_wait ( & lwait . cond , & lwait . mutex ) ;
pthread_mutex_unlock ( & lwait . mutex ) ;
2005-11-09 12:24:10 +03:00
dm_hash_remove ( lock_hash , resource ) ;
2004-06-24 12:02:38 +04:00
DEBUGLOG ( " lock-resource returning %d \n " , lwait . status ) ;
return gulm_to_errno ( lwait . status ) ;
}
static int _unlock_resource ( char * resource , int lockid )
{
int status ;
struct lock_wait lwait ;
pthread_cond_init ( & lwait . cond , NULL ) ;
pthread_mutex_init ( & lwait . mutex , NULL ) ;
pthread_mutex_lock ( & lwait . mutex ) ;
2005-11-09 12:24:10 +03:00
dm_hash_insert ( lock_hash , resource , & lwait ) ;
2004-06-24 12:02:38 +04:00
DEBUGLOG ( " unlock_resource %s \n " , resource ) ;
status = lg_lock_state_req ( gulm_if , resource , strlen ( resource ) + 1 ,
2004-11-03 13:45:07 +03:00
0 , 0 , 0 ,
2004-06-24 12:02:38 +04:00
lg_lock_state_Unlock , 0 , NULL , 0 ) ;
if ( status )
{
DEBUGLOG ( " lg_lock_state(unlock) returned %d \n " , status ) ;
return status ;
}
2005-02-22 19:26:21 +03:00
/* When we are shutting down, don't wait for unlocks
to be acknowledged , just do it . */
if ( in_shutdown )
return status ;
2004-06-24 12:02:38 +04:00
/* Wait for it to complete */
pthread_cond_wait ( & lwait . cond , & lwait . mutex ) ;
pthread_mutex_unlock ( & lwait . mutex ) ;
2005-11-09 12:24:10 +03:00
dm_hash_remove ( lock_hash , resource ) ;
2004-06-24 12:02:38 +04:00
return gulm_to_errno ( lwait . status ) ;
}
/* These two locking functions MUST be called in a seperate thread from
the clvmd main loop because they expect to be woken up by it .
These are abstractions around the real locking functions ( above )
as we need to emulate the DLM ' s EX / PW / CW interaction with GULM using
two locks .
To aid unlocking , we store the lock mode in the lockid ( as GULM
doesn ' t use this ) .
*/
2005-01-13 16:24:02 +03:00
static int _sync_lock ( const char * resource , int mode , int flags , int * lockid )
2004-06-24 12:02:38 +04:00
{
int status ;
char lock1 [ strlen ( resource ) + 3 ] ;
char lock2 [ strlen ( resource ) + 3 ] ;
snprintf ( lock1 , sizeof ( lock1 ) , " %s-1 " , resource ) ;
snprintf ( lock2 , sizeof ( lock2 ) , " %s-2 " , resource ) ;
switch ( mode )
{
case LCK_EXCL :
status = _lock_resource ( lock1 , lg_lock_state_Exclusive , flags , lockid ) ;
if ( status )
goto out ;
2005-02-21 17:36:09 +03:00
/* If we can't get this lock too then bail out */
2004-06-24 12:02:38 +04:00
status = _lock_resource ( lock2 , lg_lock_state_Exclusive , LCK_NONBLOCK , lockid ) ;
if ( status = = lg_err_TryFailed )
{
_unlock_resource ( lock1 , * lockid ) ;
status = - 1 ;
errno = EAGAIN ;
}
break ;
2006-12-11 17:00:26 +03:00
case LCK_PREAD :
2004-06-24 12:02:38 +04:00
case LCK_READ :
status = _lock_resource ( lock1 , lg_lock_state_Shared , flags , lockid ) ;
2005-02-21 17:36:09 +03:00
if ( status )
goto out ;
status = _unlock_resource ( lock2 , * lockid ) ;
2004-06-24 12:02:38 +04:00
break ;
case LCK_WRITE :
status = _lock_resource ( lock2 , lg_lock_state_Exclusive , flags , lockid ) ;
2005-02-21 17:36:09 +03:00
if ( status )
goto out ;
status = _unlock_resource ( lock1 , * lockid ) ;
2004-06-24 12:02:38 +04:00
break ;
default :
status = - 1 ;
errno = EINVAL ;
break ;
}
out :
* lockid = mode ;
return status ;
}
2005-01-13 16:24:02 +03:00
static int _sync_unlock ( const char * resource , int lockid )
2004-06-24 12:02:38 +04:00
{
int status = 0 ;
char lock1 [ strlen ( resource ) + 3 ] ;
char lock2 [ strlen ( resource ) + 3 ] ;
snprintf ( lock1 , sizeof ( lock1 ) , " %s-1 " , resource ) ;
snprintf ( lock2 , sizeof ( lock2 ) , " %s-2 " , resource ) ;
/* The held lock mode is in the lock id */
assert ( lockid = = LCK_EXCL | |
lockid = = LCK_READ | |
2006-12-11 17:00:26 +03:00
lockid = = LCK_PREAD | |
2004-06-24 12:02:38 +04:00
lockid = = LCK_WRITE ) ;
2005-02-21 17:36:09 +03:00
status = _unlock_resource ( lock1 , lockid ) ;
if ( ! status )
status = _unlock_resource ( lock2 , lockid ) ;
2004-06-24 12:02:38 +04:00
return status ;
}
2005-01-13 16:24:02 +03:00
static int _is_quorate ( )
2004-06-24 12:02:38 +04:00
{
2005-04-13 17:50:07 +04:00
return gulm_quorate ;
2004-06-24 12:02:38 +04:00
}
/* Get all the cluster node names & IPs from CCS and
add them to our node list so we know who to talk to .
Called when we start up and if we get sent SIGHUP .
*/
static int get_all_cluster_nodes ( )
{
int ctree ;
char * nodename ;
int error ;
2004-11-03 13:45:07 +03:00
int i ;
2004-06-24 12:02:38 +04:00
/* Open the config file */
2004-11-03 13:45:07 +03:00
ctree = ccs_force_connect ( NULL , 1 ) ;
2005-01-06 14:48:25 +03:00
if ( ctree < 0 )
2004-06-24 12:02:38 +04:00
{
log_error ( " Error connecting to CCS " ) ;
return - 1 ;
}
2005-01-04 18:11:34 +03:00
for ( i = 1 ; ; i + + )
2004-06-24 12:02:38 +04:00
{
2004-11-03 13:45:07 +03:00
char nodekey [ 256 ] ;
2005-01-13 16:24:02 +03:00
char nodeip [ GULM_MAX_CSID_LEN ] ;
2004-11-03 13:45:07 +03:00
int clvmflag = 1 ;
char * clvmflagstr ;
2004-06-24 12:02:38 +04:00
char key [ 256 ] ;
2005-01-04 14:48:10 +03:00
sprintf ( nodekey , " //cluster/clusternodes/clusternode[%d]/@name " , i ) ;
2004-11-03 13:45:07 +03:00
error = ccs_get ( ctree , nodekey , & nodename ) ;
if ( error )
break ;
2005-01-04 14:48:10 +03:00
sprintf ( key , " //cluster/clusternodes/clusternode[@name= \" %s \" ]/clvm " , nodename ) ;
2004-11-03 13:45:07 +03:00
if ( ! ccs_get ( ctree , key , & clvmflagstr ) )
{
clvmflag = atoi ( clvmflagstr ) ;
free ( clvmflagstr ) ;
}
2004-06-24 12:02:38 +04:00
2004-11-03 13:45:07 +03:00
DEBUGLOG ( " Got node %s from ccs(clvmflag = %d) \n " , nodename , clvmflag ) ;
if ( ( get_ip_address ( nodename , nodeip ) = = 0 ) & & clvmflag )
2004-06-24 12:02:38 +04:00
{
struct node_info * ninfo ;
/* If it's not in the list, then add it */
2005-11-09 12:24:10 +03:00
ninfo = dm_hash_lookup_binary ( node_hash , nodeip , GULM_MAX_CSID_LEN ) ;
2004-06-24 12:02:38 +04:00
if ( ! ninfo )
{
ninfo = malloc ( sizeof ( struct node_info ) ) ;
if ( ! ninfo )
{
syslog ( LOG_ERR , " Cannot alloc memory for node info \n " ) ;
ccs_disconnect ( ctree ) ;
return - 1 ;
}
strcpy ( ninfo - > name , nodename ) ;
ninfo - > state = NODE_DOWN ;
2005-11-09 12:24:10 +03:00
dm_hash_insert_binary ( node_hash , nodeip , GULM_MAX_CSID_LEN , ninfo ) ;
2004-06-24 12:02:38 +04:00
}
}
else
{
2005-06-14 14:35:02 +04:00
if ( ! clvmflag ) {
DEBUGLOG ( " node %s has clvm disabled \n " , nodename ) ;
}
else {
DEBUGLOG ( " Cannot resolve host name %s \n " , nodename ) ;
log_err ( " Cannot resolve host name %s \n " , nodename ) ;
}
2004-06-24 12:02:38 +04:00
}
free ( nodename ) ;
}
/* Finished with config file */
ccs_disconnect ( ctree ) ;
return 0 ;
}
2005-01-13 16:24:02 +03:00
static int _get_main_cluster_fd ( void )
{
return get_main_gulm_cluster_fd ( ) ;
}
2007-07-24 19:35:11 +04:00
static int _cluster_fd_callback ( struct local_client * fd , char * buf , int len , const char * csid , struct local_client * * new_client )
2005-01-13 16:24:02 +03:00
{
return cluster_fd_gulm_callback ( fd , buf , len , csid , new_client ) ;
}
2007-07-24 19:35:11 +04:00
static int _cluster_send_message ( const void * buf , int msglen , const char * csid , const char * errtext )
2005-01-13 16:24:02 +03:00
{
2007-07-24 19:35:11 +04:00
return gulm_cluster_send_message ( ( char * ) buf , msglen , csid , errtext ) ;
2005-01-13 16:24:02 +03:00
}
2006-10-09 18:11:57 +04:00
static int _get_cluster_name ( char * buf , int buflen )
{
strncpy ( buf , cluster_name , buflen ) ;
return 0 ;
}
2005-01-13 16:24:02 +03:00
static struct cluster_ops _cluster_gulm_ops = {
2005-01-21 14:35:24 +03:00
. cluster_init_completed = NULL ,
2005-01-13 16:24:02 +03:00
. cluster_send_message = _cluster_send_message ,
. name_from_csid = gulm_name_from_csid ,
. csid_from_name = _csid_from_name ,
. get_num_nodes = _get_num_nodes ,
. cluster_fd_callback = _cluster_fd_callback ,
. get_main_cluster_fd = _get_main_cluster_fd ,
. cluster_do_node_callback = _cluster_do_node_callback ,
. is_quorate = _is_quorate ,
. get_our_csid = _get_our_csid ,
. add_up_node = gulm_add_up_node ,
2005-04-19 14:36:42 +04:00
. reread_config = _reread_config ,
2005-01-13 16:24:02 +03:00
. cluster_closedown = _cluster_closedown ,
2006-10-09 18:11:57 +04:00
. get_cluster_name = _get_cluster_name ,
2005-01-13 16:24:02 +03:00
. sync_lock = _sync_lock ,
. sync_unlock = _sync_unlock ,
} ;
struct cluster_ops * init_gulm_cluster ( void )
{
if ( ! _init_cluster ( ) )
return & _cluster_gulm_ops ;
else
return NULL ;
}