1
0
mirror of git://sourceware.org/git/lvm2.git synced 2024-12-21 13:34:40 +03:00

Make clvmd work around some "limitations" in gulm's node state notifications.

Also make clvmd debuglog timestamps a little more helpful.
This commit is contained in:
Patrick Caulfield 2005-04-13 13:50:07 +00:00
parent 072893aabd
commit 2b7e4013cd
5 changed files with 69 additions and 22 deletions

View File

@ -7,6 +7,7 @@ Version 2.01.10 -
Scan ramdisks too and allow non-O_DIRECT fallback.
Annotate, tidy and extend list.h.
Alignment tidying.
Make clvmd work around some "bugs" in gulm's node state notifications.
Version 2.01.09 - 4th April 2005
================================

View File

@ -60,8 +60,9 @@ static struct hash_table *node_hash;
/* hash list of outstanding lock requests */
static struct hash_table *lock_hash;
/* Copy of the current core state */
static uint8_t current_corestate;
/* Copy of the current quorate state */
static uint8_t gulm_quorate = 0;
static enum {INIT_NOTDONE, INIT_DONE, INIT_WAITQUORATE} init_state = INIT_NOTDONE;
/* Number of active nodes */
static int num_nodes;
@ -312,12 +313,16 @@ static int core_login_reply(void *misc, uint64_t gen, uint32_t error, uint32_t r
if (error)
exit(error);
current_corestate = corestate;
/* Get the current core state (for quorum) */
lg_core_corestate(gulm_if);
return 0;
}
static void set_node_state(struct node_info *ninfo, char *csid, uint8_t nodestate)
{
int oldstate = ninfo->state;
if (nodestate == lg_core_Logged_in)
{
/* Don't clobber NODE_CLVMD state */
@ -339,11 +344,17 @@ static void set_node_state(struct node_info *ninfo, char *csid, uint8_t nodestat
if (ninfo->state != NODE_DOWN)
num_nodes--;
ninfo->state = NODE_DOWN;
tcp_remove_client(csid);
}
}
DEBUGLOG("set_node_state, '%s' state = %d, num_nodes=%d\n",
ninfo->name, ninfo->state, num_nodes);
/* Gulm doesn't always send node DOWN events, so even if this a a node UP we must
* assume (ahem) that it prevously went down at some time. So we close
* the sockets here to make sure that we don't have any dead connections
* to that node.
*/
tcp_remove_client(csid);
DEBUGLOG("set_node_state, '%s' state = %d (oldstate=%d), num_nodes=%d\n",
ninfo->name, ninfo->state, oldstate, num_nodes);
}
static struct node_info *add_or_set_node(char *name, struct in6_addr *ip, uint8_t state)
@ -400,7 +411,16 @@ static int core_nodelist(void *misc, lglcb_t type, char *name, struct in6_addr *
char ourcsid[GULM_MAX_CSID_LEN];
DEBUGLOG("Got Nodelist, stop\n");
clvmd_cluster_init_completed();
if (gulm_quorate)
{
clvmd_cluster_init_completed();
init_state = INIT_DONE;
}
else
{
if (init_state == INIT_NOTDONE)
init_state = INIT_WAITQUORATE;
}
/* Mark ourself as up */
_get_our_csid(ourcsid);
@ -418,10 +438,15 @@ static int core_nodelist(void *misc, lglcb_t type, char *name, struct in6_addr *
static int core_statechange(void *misc, uint8_t corestate, uint8_t quorate, struct in6_addr *masterip, char *mastername)
{
DEBUGLOG("CORE Got statechange corestate:%#x mastername:%s\n",
corestate, mastername);
DEBUGLOG("CORE Got statechange. quorate:%d, corestate:%x mastername:%s\n",
quorate, corestate, mastername);
current_corestate = corestate;
gulm_quorate = quorate;
if (quorate && init_state == INIT_WAITQUORATE)
{
clvmd_cluster_init_completed();
init_state = INIT_DONE;
}
return 0;
}
@ -474,7 +499,7 @@ static int lock_login_reply(void *misc, uint32_t error, uint8_t which)
lock_start_flag = 0;
pthread_mutex_unlock(&lock_start_mutex);
}
return 0;
}
@ -615,7 +640,11 @@ void gulm_add_up_node(char *csid)
}
DEBUGLOG("gulm_add_up_node %s\n", ninfo->name);
if (ninfo->state == NODE_DOWN)
num_nodes++;
ninfo->state = NODE_CLVMD;
return;
}
@ -853,12 +882,7 @@ static int _sync_unlock(const char *resource, int lockid)
static int _is_quorate()
{
if (current_corestate == lg_core_Slave ||
current_corestate == lg_core_Master ||
current_corestate == lg_core_Client)
return 1;
else
return 0;
return gulm_quorate;
}
/* Get all the cluster node names & IPs from CCS and

View File

@ -280,6 +280,7 @@ int main(int argc, char *argv[])
child_init_signal(DFAIL_MALLOC);
newfd->fd = local_sock;
newfd->removeme = 0;
newfd->type = LOCAL_RENDEZVOUS;
newfd->callback = local_rendezvous_callback;
newfd->next = local_client_head.next;
@ -346,6 +347,7 @@ static int local_rendezvous_callback(struct local_client *thisfd, char *buf,
newfd->fd = client_fd;
newfd->type = LOCAL_SOCK;
newfd->xid = 0;
newfd->removeme = 0;
newfd->callback = local_sock_callback;
newfd->bits.localsock.replies = NULL;
newfd->bits.localsock.expected_replies = 0;
@ -519,6 +521,20 @@ static void main_loop(int local_sock, int cmd_timeout)
for (thisfd = &local_client_head; thisfd != NULL;
thisfd = thisfd->next) {
if (thisfd->removeme) {
struct local_client *free_fd;
lastfd->next = thisfd->next;
free_fd = thisfd;
thisfd = lastfd;
DEBUGLOG("removeme set for fd %d\n", free_fd->fd);
/* Queue cleanup, this also frees the client struct */
add_to_lvmqueue(free_fd, NULL, 0, NULL);
break;
}
if (FD_ISSET(thisfd->fd, &in)) {
struct local_client *newfd;
int ret;
@ -905,6 +921,7 @@ static int read_from_local_sock(struct local_client *thisfd)
DEBUGLOG("creating pipe, [%d, %d]\n", comms_pipe[0],
comms_pipe[1]);
newfd->fd = comms_pipe[0];
newfd->removeme = 0;
newfd->type = THREAD_PIPE;
newfd->callback = local_pipe_callback;
newfd->next = thisfd->next;
@ -1061,8 +1078,8 @@ void process_remote_command(struct clvm_header *msg, int msglen, int fd,
/* Get the node name as we /may/ need it later */
clops->name_from_csid(csid, nodename);
DEBUGLOG("process_remote_command %d for clientid 0x%x on node %s\n",
msg->cmd, msg->clientid, nodename);
DEBUGLOG("process_remote_command %d for clientid 0x%x XID %d on node %s\n",
msg->cmd, msg->clientid, msg->xid, nodename);
/* Is the data to be found in the system LV ? */
if (msg->flags & CLVMD_FLAG_SYSTEMLV) {
@ -1575,9 +1592,10 @@ static int send_message(void *buf, int msglen, char *csid, int fd,
static int process_work_item(struct lvm_thread_cmd *cmd)
{
/* If msg is NULL then this is a cleanup request */
if (cmd->msg == NULL) {
DEBUGLOG("process_work_item: free fd %d\n", cmd->client->fd);
close(cmd->client->fd);
cmd_client_cleanup(cmd->client);
free(cmd->client);
return 0;
@ -1638,7 +1656,8 @@ static void *lvm_thread_fn(void *arg)
pthread_mutex_unlock(&lvm_thread_mutex);
process_work_item(cmd);
free(cmd->msg);
if (cmd->msg)
free(cmd->msg);
free(cmd);
pthread_mutex_lock(&lvm_thread_mutex);

View File

@ -86,6 +86,7 @@ struct local_client {
struct local_client *next;
unsigned short xid;
fd_callback_t callback;
uint8_t removeme;
union {
struct localsock_bits localsock;
@ -95,7 +96,7 @@ struct local_client {
};
#ifdef DEBUG
#define DEBUGLOG(fmt, args...) fprintf(stderr, "CLVMD[%x]: %ld ", (int)pthread_self(), time(NULL) ); fprintf(stderr, fmt, ## args)
#define DEBUGLOG(fmt, args...) {time_t P; time(&P); fprintf(stderr, "CLVMD[%x]: %.15s ", (int)pthread_self(), ctime(&P)+4 ); fprintf(stderr, fmt, ## args);}
#else
#define DEBUGLOG(fmt, args...)
#endif

View File

@ -105,6 +105,7 @@ void tcp_remove_client(char *csid)
if (client)
{
hash_remove_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
client->removeme = 1;
}
/* Look for a mangled one too */
@ -114,6 +115,7 @@ void tcp_remove_client(char *csid)
if (client)
{
hash_remove_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
client->removeme = 1;
}
/* Put it back as we found it */