mirror of
git://sourceware.org/git/lvm2.git
synced 2024-12-21 13:34:40 +03:00
Make clvmd work around some "limitations" in gulm's node state notifications.
Also make clvmd debuglog timestamps a little more helpful.
This commit is contained in:
parent
072893aabd
commit
2b7e4013cd
@ -7,6 +7,7 @@ Version 2.01.10 -
|
||||
Scan ramdisks too and allow non-O_DIRECT fallback.
|
||||
Annotate, tidy and extend list.h.
|
||||
Alignment tidying.
|
||||
Make clvmd work around some "bugs" in gulm's node state notifications.
|
||||
|
||||
Version 2.01.09 - 4th April 2005
|
||||
================================
|
||||
|
@ -60,8 +60,9 @@ static struct hash_table *node_hash;
|
||||
/* hash list of outstanding lock requests */
|
||||
static struct hash_table *lock_hash;
|
||||
|
||||
/* Copy of the current core state */
|
||||
static uint8_t current_corestate;
|
||||
/* Copy of the current quorate state */
|
||||
static uint8_t gulm_quorate = 0;
|
||||
static enum {INIT_NOTDONE, INIT_DONE, INIT_WAITQUORATE} init_state = INIT_NOTDONE;
|
||||
|
||||
/* Number of active nodes */
|
||||
static int num_nodes;
|
||||
@ -312,12 +313,16 @@ static int core_login_reply(void *misc, uint64_t gen, uint32_t error, uint32_t r
|
||||
if (error)
|
||||
exit(error);
|
||||
|
||||
current_corestate = corestate;
|
||||
/* Get the current core state (for quorum) */
|
||||
lg_core_corestate(gulm_if);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void set_node_state(struct node_info *ninfo, char *csid, uint8_t nodestate)
|
||||
{
|
||||
int oldstate = ninfo->state;
|
||||
|
||||
if (nodestate == lg_core_Logged_in)
|
||||
{
|
||||
/* Don't clobber NODE_CLVMD state */
|
||||
@ -339,11 +344,17 @@ static void set_node_state(struct node_info *ninfo, char *csid, uint8_t nodestat
|
||||
if (ninfo->state != NODE_DOWN)
|
||||
num_nodes--;
|
||||
ninfo->state = NODE_DOWN;
|
||||
tcp_remove_client(csid);
|
||||
}
|
||||
}
|
||||
DEBUGLOG("set_node_state, '%s' state = %d, num_nodes=%d\n",
|
||||
ninfo->name, ninfo->state, num_nodes);
|
||||
/* Gulm doesn't always send node DOWN events, so even if this a a node UP we must
|
||||
* assume (ahem) that it prevously went down at some time. So we close
|
||||
* the sockets here to make sure that we don't have any dead connections
|
||||
* to that node.
|
||||
*/
|
||||
tcp_remove_client(csid);
|
||||
|
||||
DEBUGLOG("set_node_state, '%s' state = %d (oldstate=%d), num_nodes=%d\n",
|
||||
ninfo->name, ninfo->state, oldstate, num_nodes);
|
||||
}
|
||||
|
||||
static struct node_info *add_or_set_node(char *name, struct in6_addr *ip, uint8_t state)
|
||||
@ -400,7 +411,16 @@ static int core_nodelist(void *misc, lglcb_t type, char *name, struct in6_addr *
|
||||
char ourcsid[GULM_MAX_CSID_LEN];
|
||||
|
||||
DEBUGLOG("Got Nodelist, stop\n");
|
||||
clvmd_cluster_init_completed();
|
||||
if (gulm_quorate)
|
||||
{
|
||||
clvmd_cluster_init_completed();
|
||||
init_state = INIT_DONE;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (init_state == INIT_NOTDONE)
|
||||
init_state = INIT_WAITQUORATE;
|
||||
}
|
||||
|
||||
/* Mark ourself as up */
|
||||
_get_our_csid(ourcsid);
|
||||
@ -418,10 +438,15 @@ static int core_nodelist(void *misc, lglcb_t type, char *name, struct in6_addr *
|
||||
|
||||
static int core_statechange(void *misc, uint8_t corestate, uint8_t quorate, struct in6_addr *masterip, char *mastername)
|
||||
{
|
||||
DEBUGLOG("CORE Got statechange corestate:%#x mastername:%s\n",
|
||||
corestate, mastername);
|
||||
DEBUGLOG("CORE Got statechange. quorate:%d, corestate:%x mastername:%s\n",
|
||||
quorate, corestate, mastername);
|
||||
|
||||
current_corestate = corestate;
|
||||
gulm_quorate = quorate;
|
||||
if (quorate && init_state == INIT_WAITQUORATE)
|
||||
{
|
||||
clvmd_cluster_init_completed();
|
||||
init_state = INIT_DONE;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -474,7 +499,7 @@ static int lock_login_reply(void *misc, uint32_t error, uint8_t which)
|
||||
lock_start_flag = 0;
|
||||
pthread_mutex_unlock(&lock_start_mutex);
|
||||
}
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -615,7 +640,11 @@ void gulm_add_up_node(char *csid)
|
||||
}
|
||||
|
||||
DEBUGLOG("gulm_add_up_node %s\n", ninfo->name);
|
||||
|
||||
if (ninfo->state == NODE_DOWN)
|
||||
num_nodes++;
|
||||
ninfo->state = NODE_CLVMD;
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
@ -853,12 +882,7 @@ static int _sync_unlock(const char *resource, int lockid)
|
||||
|
||||
static int _is_quorate()
|
||||
{
|
||||
if (current_corestate == lg_core_Slave ||
|
||||
current_corestate == lg_core_Master ||
|
||||
current_corestate == lg_core_Client)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
return gulm_quorate;
|
||||
}
|
||||
|
||||
/* Get all the cluster node names & IPs from CCS and
|
||||
|
@ -280,6 +280,7 @@ int main(int argc, char *argv[])
|
||||
child_init_signal(DFAIL_MALLOC);
|
||||
|
||||
newfd->fd = local_sock;
|
||||
newfd->removeme = 0;
|
||||
newfd->type = LOCAL_RENDEZVOUS;
|
||||
newfd->callback = local_rendezvous_callback;
|
||||
newfd->next = local_client_head.next;
|
||||
@ -346,6 +347,7 @@ static int local_rendezvous_callback(struct local_client *thisfd, char *buf,
|
||||
newfd->fd = client_fd;
|
||||
newfd->type = LOCAL_SOCK;
|
||||
newfd->xid = 0;
|
||||
newfd->removeme = 0;
|
||||
newfd->callback = local_sock_callback;
|
||||
newfd->bits.localsock.replies = NULL;
|
||||
newfd->bits.localsock.expected_replies = 0;
|
||||
@ -519,6 +521,20 @@ static void main_loop(int local_sock, int cmd_timeout)
|
||||
|
||||
for (thisfd = &local_client_head; thisfd != NULL;
|
||||
thisfd = thisfd->next) {
|
||||
|
||||
if (thisfd->removeme) {
|
||||
struct local_client *free_fd;
|
||||
lastfd->next = thisfd->next;
|
||||
free_fd = thisfd;
|
||||
thisfd = lastfd;
|
||||
|
||||
DEBUGLOG("removeme set for fd %d\n", free_fd->fd);
|
||||
|
||||
/* Queue cleanup, this also frees the client struct */
|
||||
add_to_lvmqueue(free_fd, NULL, 0, NULL);
|
||||
break;
|
||||
}
|
||||
|
||||
if (FD_ISSET(thisfd->fd, &in)) {
|
||||
struct local_client *newfd;
|
||||
int ret;
|
||||
@ -905,6 +921,7 @@ static int read_from_local_sock(struct local_client *thisfd)
|
||||
DEBUGLOG("creating pipe, [%d, %d]\n", comms_pipe[0],
|
||||
comms_pipe[1]);
|
||||
newfd->fd = comms_pipe[0];
|
||||
newfd->removeme = 0;
|
||||
newfd->type = THREAD_PIPE;
|
||||
newfd->callback = local_pipe_callback;
|
||||
newfd->next = thisfd->next;
|
||||
@ -1061,8 +1078,8 @@ void process_remote_command(struct clvm_header *msg, int msglen, int fd,
|
||||
/* Get the node name as we /may/ need it later */
|
||||
clops->name_from_csid(csid, nodename);
|
||||
|
||||
DEBUGLOG("process_remote_command %d for clientid 0x%x on node %s\n",
|
||||
msg->cmd, msg->clientid, nodename);
|
||||
DEBUGLOG("process_remote_command %d for clientid 0x%x XID %d on node %s\n",
|
||||
msg->cmd, msg->clientid, msg->xid, nodename);
|
||||
|
||||
/* Is the data to be found in the system LV ? */
|
||||
if (msg->flags & CLVMD_FLAG_SYSTEMLV) {
|
||||
@ -1575,9 +1592,10 @@ static int send_message(void *buf, int msglen, char *csid, int fd,
|
||||
|
||||
static int process_work_item(struct lvm_thread_cmd *cmd)
|
||||
{
|
||||
|
||||
/* If msg is NULL then this is a cleanup request */
|
||||
if (cmd->msg == NULL) {
|
||||
DEBUGLOG("process_work_item: free fd %d\n", cmd->client->fd);
|
||||
close(cmd->client->fd);
|
||||
cmd_client_cleanup(cmd->client);
|
||||
free(cmd->client);
|
||||
return 0;
|
||||
@ -1638,7 +1656,8 @@ static void *lvm_thread_fn(void *arg)
|
||||
pthread_mutex_unlock(&lvm_thread_mutex);
|
||||
|
||||
process_work_item(cmd);
|
||||
free(cmd->msg);
|
||||
if (cmd->msg)
|
||||
free(cmd->msg);
|
||||
free(cmd);
|
||||
|
||||
pthread_mutex_lock(&lvm_thread_mutex);
|
||||
|
@ -86,6 +86,7 @@ struct local_client {
|
||||
struct local_client *next;
|
||||
unsigned short xid;
|
||||
fd_callback_t callback;
|
||||
uint8_t removeme;
|
||||
|
||||
union {
|
||||
struct localsock_bits localsock;
|
||||
@ -95,7 +96,7 @@ struct local_client {
|
||||
};
|
||||
|
||||
#ifdef DEBUG
|
||||
#define DEBUGLOG(fmt, args...) fprintf(stderr, "CLVMD[%x]: %ld ", (int)pthread_self(), time(NULL) ); fprintf(stderr, fmt, ## args)
|
||||
#define DEBUGLOG(fmt, args...) {time_t P; time(&P); fprintf(stderr, "CLVMD[%x]: %.15s ", (int)pthread_self(), ctime(&P)+4 ); fprintf(stderr, fmt, ## args);}
|
||||
#else
|
||||
#define DEBUGLOG(fmt, args...)
|
||||
#endif
|
||||
|
@ -105,6 +105,7 @@ void tcp_remove_client(char *csid)
|
||||
if (client)
|
||||
{
|
||||
hash_remove_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
|
||||
client->removeme = 1;
|
||||
}
|
||||
|
||||
/* Look for a mangled one too */
|
||||
@ -114,6 +115,7 @@ void tcp_remove_client(char *csid)
|
||||
if (client)
|
||||
{
|
||||
hash_remove_binary(sock_hash, csid, GULM_MAX_CSID_LEN);
|
||||
client->removeme = 1;
|
||||
}
|
||||
|
||||
/* Put it back as we found it */
|
||||
|
Loading…
Reference in New Issue
Block a user