1
0
mirror of git://sourceware.org/git/lvm2.git synced 2025-01-03 05:18:29 +03:00

Make clvmd work around some "limitations" in gulm's node state notifications.

Also make clvmd debuglog timestamps a little more helpful.
This commit is contained in:
Patrick Caulfield 2005-04-13 13:50:07 +00:00
parent 072893aabd
commit 2b7e4013cd
5 changed files with 69 additions and 22 deletions

View File

@ -7,6 +7,7 @@ Version 2.01.10 -
Scan ramdisks too and allow non-O_DIRECT fallback. Scan ramdisks too and allow non-O_DIRECT fallback.
Annotate, tidy and extend list.h. Annotate, tidy and extend list.h.
Alignment tidying. Alignment tidying.
Make clvmd work around some "bugs" in gulm's node state notifications.
Version 2.01.09 - 4th April 2005 Version 2.01.09 - 4th April 2005
================================ ================================

View File

@ -60,8 +60,9 @@ static struct hash_table *node_hash;
/* hash list of outstanding lock requests */ /* hash list of outstanding lock requests */
static struct hash_table *lock_hash; static struct hash_table *lock_hash;
/* Copy of the current core state */ /* Copy of the current quorate state */
static uint8_t current_corestate; static uint8_t gulm_quorate = 0;
static enum {INIT_NOTDONE, INIT_DONE, INIT_WAITQUORATE} init_state = INIT_NOTDONE;
/* Number of active nodes */ /* Number of active nodes */
static int num_nodes; static int num_nodes;
@ -312,12 +313,16 @@ static int core_login_reply(void *misc, uint64_t gen, uint32_t error, uint32_t r
if (error) if (error)
exit(error); exit(error);
current_corestate = corestate; /* Get the current core state (for quorum) */
lg_core_corestate(gulm_if);
return 0; return 0;
} }
static void set_node_state(struct node_info *ninfo, char *csid, uint8_t nodestate) static void set_node_state(struct node_info *ninfo, char *csid, uint8_t nodestate)
{ {
int oldstate = ninfo->state;
if (nodestate == lg_core_Logged_in) if (nodestate == lg_core_Logged_in)
{ {
/* Don't clobber NODE_CLVMD state */ /* Don't clobber NODE_CLVMD state */
@ -339,11 +344,17 @@ static void set_node_state(struct node_info *ninfo, char *csid, uint8_t nodestat
if (ninfo->state != NODE_DOWN) if (ninfo->state != NODE_DOWN)
num_nodes--; num_nodes--;
ninfo->state = NODE_DOWN; ninfo->state = NODE_DOWN;
}
}
/* Gulm doesn't always send node DOWN events, so even if this a a node UP we must
* assume (ahem) that it prevously went down at some time. So we close
* the sockets here to make sure that we don't have any dead connections
* to that node.
*/
tcp_remove_client(csid); tcp_remove_client(csid);
}
} DEBUGLOG("set_node_state, '%s' state = %d (oldstate=%d), num_nodes=%d\n",
DEBUGLOG("set_node_state, '%s' state = %d, num_nodes=%d\n", ninfo->name, ninfo->state, oldstate, num_nodes);
ninfo->name, ninfo->state, num_nodes);
} }
static struct node_info *add_or_set_node(char *name, struct in6_addr *ip, uint8_t state) static struct node_info *add_or_set_node(char *name, struct in6_addr *ip, uint8_t state)
@ -400,7 +411,16 @@ static int core_nodelist(void *misc, lglcb_t type, char *name, struct in6_addr *
char ourcsid[GULM_MAX_CSID_LEN]; char ourcsid[GULM_MAX_CSID_LEN];
DEBUGLOG("Got Nodelist, stop\n"); DEBUGLOG("Got Nodelist, stop\n");
if (gulm_quorate)
{
clvmd_cluster_init_completed(); clvmd_cluster_init_completed();
init_state = INIT_DONE;
}
else
{
if (init_state == INIT_NOTDONE)
init_state = INIT_WAITQUORATE;
}
/* Mark ourself as up */ /* Mark ourself as up */
_get_our_csid(ourcsid); _get_our_csid(ourcsid);
@ -418,10 +438,15 @@ static int core_nodelist(void *misc, lglcb_t type, char *name, struct in6_addr *
static int core_statechange(void *misc, uint8_t corestate, uint8_t quorate, struct in6_addr *masterip, char *mastername) static int core_statechange(void *misc, uint8_t corestate, uint8_t quorate, struct in6_addr *masterip, char *mastername)
{ {
DEBUGLOG("CORE Got statechange corestate:%#x mastername:%s\n", DEBUGLOG("CORE Got statechange. quorate:%d, corestate:%x mastername:%s\n",
corestate, mastername); quorate, corestate, mastername);
current_corestate = corestate; gulm_quorate = quorate;
if (quorate && init_state == INIT_WAITQUORATE)
{
clvmd_cluster_init_completed();
init_state = INIT_DONE;
}
return 0; return 0;
} }
@ -615,7 +640,11 @@ void gulm_add_up_node(char *csid)
} }
DEBUGLOG("gulm_add_up_node %s\n", ninfo->name); DEBUGLOG("gulm_add_up_node %s\n", ninfo->name);
if (ninfo->state == NODE_DOWN)
num_nodes++;
ninfo->state = NODE_CLVMD; ninfo->state = NODE_CLVMD;
return; return;
} }
@ -853,12 +882,7 @@ static int _sync_unlock(const char *resource, int lockid)
static int _is_quorate() static int _is_quorate()
{ {
if (current_corestate == lg_core_Slave || return gulm_quorate;
current_corestate == lg_core_Master ||
current_corestate == lg_core_Client)
return 1;
else
return 0;
} }
/* Get all the cluster node names & IPs from CCS and /* Get all the cluster node names & IPs from CCS and

View File

@ -280,6 +280,7 @@ int main(int argc, char *argv[])
child_init_signal(DFAIL_MALLOC); child_init_signal(DFAIL_MALLOC);
newfd->fd = local_sock; newfd->fd = local_sock;
newfd->removeme = 0;
newfd->type = LOCAL_RENDEZVOUS; newfd->type = LOCAL_RENDEZVOUS;
newfd->callback = local_rendezvous_callback; newfd->callback = local_rendezvous_callback;
newfd->next = local_client_head.next; newfd->next = local_client_head.next;
@ -346,6 +347,7 @@ static int local_rendezvous_callback(struct local_client *thisfd, char *buf,
newfd->fd = client_fd; newfd->fd = client_fd;
newfd->type = LOCAL_SOCK; newfd->type = LOCAL_SOCK;
newfd->xid = 0; newfd->xid = 0;
newfd->removeme = 0;
newfd->callback = local_sock_callback; newfd->callback = local_sock_callback;
newfd->bits.localsock.replies = NULL; newfd->bits.localsock.replies = NULL;
newfd->bits.localsock.expected_replies = 0; newfd->bits.localsock.expected_replies = 0;
@ -519,6 +521,20 @@ static void main_loop(int local_sock, int cmd_timeout)
for (thisfd = &local_client_head; thisfd != NULL; for (thisfd = &local_client_head; thisfd != NULL;
thisfd = thisfd->next) { thisfd = thisfd->next) {
if (thisfd->removeme) {
struct local_client *free_fd;
lastfd->next = thisfd->next;
free_fd = thisfd;
thisfd = lastfd;
DEBUGLOG("removeme set for fd %d\n", free_fd->fd);
/* Queue cleanup, this also frees the client struct */
add_to_lvmqueue(free_fd, NULL, 0, NULL);
break;
}
if (FD_ISSET(thisfd->fd, &in)) { if (FD_ISSET(thisfd->fd, &in)) {
struct local_client *newfd; struct local_client *newfd;
int ret; int ret;
@ -905,6 +921,7 @@ static int read_from_local_sock(struct local_client *thisfd)
DEBUGLOG("creating pipe, [%d, %d]\n", comms_pipe[0], DEBUGLOG("creating pipe, [%d, %d]\n", comms_pipe[0],
comms_pipe[1]); comms_pipe[1]);
newfd->fd = comms_pipe[0]; newfd->fd = comms_pipe[0];
newfd->removeme = 0;
newfd->type = THREAD_PIPE; newfd->type = THREAD_PIPE;
newfd->callback = local_pipe_callback; newfd->callback = local_pipe_callback;
newfd->next = thisfd->next; newfd->next = thisfd->next;
@ -1061,8 +1078,8 @@ void process_remote_command(struct clvm_header *msg, int msglen, int fd,
/* Get the node name as we /may/ need it later */ /* Get the node name as we /may/ need it later */
clops->name_from_csid(csid, nodename); clops->name_from_csid(csid, nodename);
DEBUGLOG("process_remote_command %d for clientid 0x%x on node %s\n", DEBUGLOG("process_remote_command %d for clientid 0x%x XID %d on node %s\n",
msg->cmd, msg->clientid, nodename); msg->cmd, msg->clientid, msg->xid, nodename);
/* Is the data to be found in the system LV ? */ /* Is the data to be found in the system LV ? */
if (msg->flags & CLVMD_FLAG_SYSTEMLV) { if (msg->flags & CLVMD_FLAG_SYSTEMLV) {
@ -1575,9 +1592,10 @@ static int send_message(void *buf, int msglen, char *csid, int fd,
static int process_work_item(struct lvm_thread_cmd *cmd) static int process_work_item(struct lvm_thread_cmd *cmd)
{ {
/* If msg is NULL then this is a cleanup request */ /* If msg is NULL then this is a cleanup request */
if (cmd->msg == NULL) { if (cmd->msg == NULL) {
DEBUGLOG("process_work_item: free fd %d\n", cmd->client->fd);
close(cmd->client->fd);
cmd_client_cleanup(cmd->client); cmd_client_cleanup(cmd->client);
free(cmd->client); free(cmd->client);
return 0; return 0;
@ -1638,6 +1656,7 @@ static void *lvm_thread_fn(void *arg)
pthread_mutex_unlock(&lvm_thread_mutex); pthread_mutex_unlock(&lvm_thread_mutex);
process_work_item(cmd); process_work_item(cmd);
if (cmd->msg)
free(cmd->msg); free(cmd->msg);
free(cmd); free(cmd);

View File

@ -86,6 +86,7 @@ struct local_client {
struct local_client *next; struct local_client *next;
unsigned short xid; unsigned short xid;
fd_callback_t callback; fd_callback_t callback;
uint8_t removeme;
union { union {
struct localsock_bits localsock; struct localsock_bits localsock;
@ -95,7 +96,7 @@ struct local_client {
}; };
#ifdef DEBUG #ifdef DEBUG
#define DEBUGLOG(fmt, args...) fprintf(stderr, "CLVMD[%x]: %ld ", (int)pthread_self(), time(NULL) ); fprintf(stderr, fmt, ## args) #define DEBUGLOG(fmt, args...) {time_t P; time(&P); fprintf(stderr, "CLVMD[%x]: %.15s ", (int)pthread_self(), ctime(&P)+4 ); fprintf(stderr, fmt, ## args);}
#else #else
#define DEBUGLOG(fmt, args...) #define DEBUGLOG(fmt, args...)
#endif #endif

View File

@ -105,6 +105,7 @@ void tcp_remove_client(char *csid)
if (client) if (client)
{ {
hash_remove_binary(sock_hash, csid, GULM_MAX_CSID_LEN); hash_remove_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
client->removeme = 1;
} }
/* Look for a mangled one too */ /* Look for a mangled one too */
@ -114,6 +115,7 @@ void tcp_remove_client(char *csid)
if (client) if (client)
{ {
hash_remove_binary(sock_hash, csid, GULM_MAX_CSID_LEN); hash_remove_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
client->removeme = 1;
} }
/* Put it back as we found it */ /* Put it back as we found it */