1
0
mirror of https://github.com/samba-team/samba.git synced 2025-01-27 14:04:05 +03:00

tools/ctdb: Factor, simplify and improve robustness of ipreallocate code

Having other functions call control_ipreallocate() suggests that the
it might look at the argv/argv arguments that are passed.  This is not
the case.  Change the callers so they call the new ipreallocate()
function instead.

Broadcast CTDB_SRVID_TAKEOVER_RUN to all connected nodes.  Inactive
nodes will ignore it.  This is safe since we only want 1 reply.  If we
didn't get a response, we don't actually care if there's no active
recovery master - just fire, wait, retry, ...

Ignore some failures on the basis that they might be transient, so it
is probably worth retrying.

Signed-off-by: Martin Schwenke <martin@meltin.net>

(This used to be ctdb commit 4bf0b1c9d21986eecb7682f935bd6154c65533cc)
This commit is contained in:
Martin Schwenke 2013-08-09 15:41:37 +10:00
parent db121b4c8f
commit f72f4c362b

View File

@ -1973,21 +1973,17 @@ static void ctdb_every_second(struct event_context *ev, struct timed_event *te,
ctdb_every_second, ctdb);
}
/*
ask the recovery daemon on the recovery master to perform a ip reallocation
/* Send an ipreallocate to the recovery daemon on all nodes. Only the
* recovery master will answer.
*/
static int control_ipreallocate(struct ctdb_context *ctdb, int argc, const char **argv)
static int ipreallocate(struct ctdb_context *ctdb)
{
int i, ret;
int ret;
TDB_DATA data;
struct takeover_run_reply rd;
struct ctdb_node_map *nodemap=NULL;
int count;
struct timeval tv = timeval_current();
struct timeval tv;
/* we need some events to trigger so we can timeout and restart
the loop
*/
/* Time ticks to enable timeouts to be processed */
event_add_timed(ctdb->ev, ctdb,
timeval_current_ofs(1, 0),
ctdb_every_second, ctdb);
@ -1995,49 +1991,35 @@ static int control_ipreallocate(struct ctdb_context *ctdb, int argc, const char
rd.pnn = ctdb_get_pnn(ctdb);
rd.srvid = getpid();
/* register a message port for receiveing the reply so that we
can receive the reply
*/
/* Register message port for reply from recovery master */
ctdb_client_set_message_handler(ctdb, rd.srvid, ip_reallocate_handler, NULL);
data.dptr = (uint8_t *)&rd;
data.dsize = sizeof(rd);
again:
/* get the number of nodes and node flags */
if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), options.pnn, ctdb, &nodemap) != 0) {
DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
sleep(1);
goto again;
}
ipreallocate_finished = false;
count = 0;
for (i=0; i<nodemap->num;i++) {
if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
continue;
} else {
/* Send to all active nodes. Only recmaster will reply. */
ret = ctdb_client_send_message(ctdb, i, CTDB_SRVID_TAKEOVER_RUN, data);
/* Send to all connected nodes. Only recmaster replies */
ret = ctdb_client_send_message(ctdb, CTDB_BROADCAST_CONNECTED,
CTDB_SRVID_TAKEOVER_RUN, data);
if (ret != 0) {
DEBUG(DEBUG_ERR,("Failed to send ip takeover run request message to %u\n", options.pnn));
/* This can only happen if the socket is closed and
* there's no way to recover from that, so don't try
* again.
*/
DEBUG(DEBUG_WARNING,
("Failed to send IP reallocation request to connected nodes\n"));
return -1;
}
count++;
}
}
if (count == 0) {
DEBUG(DEBUG_ERR,("No recmaster available, no need to wait for cluster convergence\n"));
return 0;
}
tv = timeval_current();
/* this loop will terminate when we have received the reply */
/* This loop terminates the reply is received */
while (timeval_elapsed(&tv) < 5.0 && !ipreallocate_finished) {
event_loop_once(ctdb->ev);
}
if (!ipreallocate_finished) {
DEBUG(DEBUG_NOTICE,
("Still waiting for confirmation of IP reallocation\n"));
goto again;
}
@ -2045,6 +2027,11 @@ again:
}
static int control_ipreallocate(struct ctdb_context *ctdb, int argc, const char **argv)
{
return ipreallocate(ctdb);
}
/*
add a public ip address to a node
*/
@ -2937,7 +2924,7 @@ static int control_disable(struct ctdb_context *ctdb, int argc, const char **arg
}
} while (!(nodemap->nodes[options.pnn].flags & NODE_FLAGS_PERMANENTLY_DISABLED));
ret = control_ipreallocate(ctdb, argc, argv);
ret = ipreallocate(ctdb);
if (ret != 0) {
DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u\n", options.pnn));
return ret;
@ -2983,7 +2970,7 @@ static int control_enable(struct ctdb_context *ctdb, int argc, const char **argv
} while (nodemap->nodes[options.pnn].flags & NODE_FLAGS_PERMANENTLY_DISABLED);
ret = control_ipreallocate(ctdb, argc, argv);
ret = ipreallocate(ctdb);
if (ret != 0) {
DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u\n", options.pnn));
return ret;
@ -3014,7 +3001,7 @@ static int control_stop(struct ctdb_context *ctdb, int argc, const char **argv)
}
} while (nodemap == NULL || !(nodemap->nodes[options.pnn].flags & NODE_FLAGS_STOPPED));
ret = control_ipreallocate(ctdb, argc, argv);
ret = ipreallocate(ctdb);
if (ret != 0) {
DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u\n", options.pnn));
return ret;
@ -3047,7 +3034,7 @@ static int control_continue(struct ctdb_context *ctdb, int argc, const char **ar
}
} while (nodemap == NULL || nodemap->nodes[options.pnn].flags & NODE_FLAGS_STOPPED);
ret = control_ipreallocate(ctdb, argc, argv);
ret = ipreallocate(ctdb);
if (ret != 0) {
DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u\n", options.pnn));
return ret;
@ -3131,7 +3118,7 @@ static int control_ban(struct ctdb_context *ctdb, int argc, const char **argv)
return -1;
}
ret = control_ipreallocate(ctdb, argc, argv);
ret = ipreallocate(ctdb);
if (ret != 0) {
DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u\n", options.pnn));
return ret;
@ -3171,7 +3158,7 @@ static int control_unban(struct ctdb_context *ctdb, int argc, const char **argv)
return -1;
}
ret = control_ipreallocate(ctdb, argc, argv);
ret = ipreallocate(ctdb);
if (ret != 0) {
DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u\n", options.pnn));
return ret;