1
0
mirror of https://github.com/samba-team/samba.git synced 2025-02-09 09:57:48 +03:00
samba-mirror/ctdb/server/ctdb_takeover_helper.c
Martin Schwenke fd1dc9e0c2 ctdb-takeover: Fetch public IP addresses from all connected nodes
Redundant releases will be sent to all connected nodes anyway, so this
is no worse.  This will facilitate an improvement to avoid sending
releases to nodes with no known IPs.

Signed-off-by: Martin Schwenke <martin@meltin.net>
Reviewed-by: Amitay Isaacs <amitay@gmail.com>
2017-02-24 07:47:12 +01:00

1263 lines
30 KiB
C

/*
CTDB IP takeover helper
Copyright (C) Martin Schwenke 2016
Based on ctdb_recovery_helper.c
Copyright (C) Amitay Isaacs 2015
and ctdb_takeover.c
Copyright (C) Ronnie Sahlberg 2007
Copyright (C) Andrew Tridgell 2007
Copyright (C) Martin Schwenke 2011
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "replace.h"
#include "system/network.h"
#include "system/filesys.h"
#include <popt.h>
#include <talloc.h>
#include <tevent.h>
#include "lib/util/debug.h"
#include "lib/util/strv.h"
#include "lib/util/strv_util.h"
#include "lib/util/sys_rw.h"
#include "lib/util/time.h"
#include "lib/util/tevent_unix.h"
#include "protocol/protocol.h"
#include "protocol/protocol_api.h"
#include "client/client.h"
#include "common/logging.h"
#include "server/ipalloc.h"
static int takeover_timeout = 9;
#define TIMEOUT() timeval_current_ofs(takeover_timeout, 0)
/*
* Utility functions
*/
static bool generic_recv(struct tevent_req *req, int *perr)
{
int err;
if (tevent_req_is_unix_error(req, &err)) {
if (perr != NULL) {
*perr = err;
}
return false;
}
return true;
}
static enum ipalloc_algorithm
determine_algorithm(const struct ctdb_tunable_list *tunables)
{
switch (tunables->ip_alloc_algorithm) {
case 0:
return IPALLOC_DETERMINISTIC;
case 1:
return IPALLOC_NONDETERMINISTIC;
case 2:
return IPALLOC_LCP2;
default:
return IPALLOC_LCP2;
};
}
/**********************************************************************/
struct get_public_ips_state {
uint32_t *pnns;
int count;
struct ctdb_public_ip_list *ips;
uint32_t *ban_credits;
};
static void get_public_ips_done(struct tevent_req *subreq);
static struct tevent_req *get_public_ips_send(
TALLOC_CTX *mem_ctx,
struct tevent_context *ev,
struct ctdb_client_context *client,
uint32_t *pnns,
int count, int num_nodes,
uint32_t *ban_credits,
bool available_only)
{
struct tevent_req *req, *subreq;
struct get_public_ips_state *state;
struct ctdb_req_control request;
req = tevent_req_create(mem_ctx, &state, struct get_public_ips_state);
if (req == NULL) {
return NULL;
}
state->pnns = pnns;
state->count = count;
state->ban_credits = ban_credits;
state->ips = talloc_zero_array(state,
struct ctdb_public_ip_list,
num_nodes);
if (tevent_req_nomem(state->ips, req)) {
return tevent_req_post(req, ev);
}
/* Short circuit if no nodes being asked for IPs */
if (state->count == 0) {
tevent_req_done(req);
return tevent_req_post(req, ev);
}
ctdb_req_control_get_public_ips(&request, available_only);
subreq = ctdb_client_control_multi_send(mem_ctx, ev, client,
state->pnns,
state->count,
TIMEOUT(), &request);
if (tevent_req_nomem(subreq, req)) {
return tevent_req_post(req, ev);
}
tevent_req_set_callback(subreq, get_public_ips_done, req);
return req;
}
static void get_public_ips_done(struct tevent_req *subreq)
{
struct tevent_req *req = tevent_req_callback_data(
subreq, struct tevent_req);
struct get_public_ips_state *state = tevent_req_data(
req, struct get_public_ips_state);
struct ctdb_reply_control **reply;
int *err_list;
int ret, i;
bool status, found_errors;
status = ctdb_client_control_multi_recv(subreq, &ret, state, &err_list,
&reply);
TALLOC_FREE(subreq);
if (! status) {
found_errors = false;
for (i = 0; i < state->count; i++) {
if (err_list[i] != 0) {
uint32_t pnn = state->pnns[i];
D_ERR("control GET_PUBLIC_IPS failed on "
"node %u, ret=%d\n", pnn, err_list[i]);
state->ban_credits[pnn]++;
found_errors = true;
}
}
tevent_req_error(req, ret);
return;
}
found_errors = false;
for (i = 0; i < state->count; i++) {
uint32_t pnn;
struct ctdb_public_ip_list *ips;
pnn = state->pnns[i];
ret = ctdb_reply_control_get_public_ips(reply[i], state->ips,
&ips);
if (ret != 0) {
D_ERR("control GET_PUBLIC_IPS failed on "
"node %u\n", pnn);
state->ban_credits[pnn]++;
found_errors = true;
continue;
}
D_INFO("Fetched public IPs from node %u\n", pnn);
state->ips[pnn] = *ips;
}
if (found_errors) {
tevent_req_error(req, EIO);
return;
}
talloc_free(reply);
tevent_req_done(req);
}
static bool get_public_ips_recv(struct tevent_req *req, int *perr,
TALLOC_CTX *mem_ctx,
struct ctdb_public_ip_list **ips)
{
struct get_public_ips_state *state = tevent_req_data(
req, struct get_public_ips_state);
int err;
if (tevent_req_is_unix_error(req, &err)) {
if (perr != NULL) {
*perr = err;
}
return false;
}
*ips = talloc_steal(mem_ctx, state->ips);
return true;
}
/**********************************************************************/
struct release_ip_state {
int num_sent;
int num_replies;
int num_fails;
int err_any;
uint32_t *ban_credits;
};
struct release_ip_one_state {
struct tevent_req *req;
uint32_t *pnns;
int count;
const char *ip_str;
};
static void release_ip_done(struct tevent_req *subreq);
static struct tevent_req *release_ip_send(TALLOC_CTX *mem_ctx,
struct tevent_context *ev,
struct ctdb_client_context *client,
uint32_t *pnns,
int count,
struct timeval timeout,
struct public_ip_list *all_ips,
uint32_t *ban_credits)
{
struct tevent_req *req, *subreq;
struct release_ip_state *state;
struct ctdb_req_control request;
struct public_ip_list *tmp_ip;
req = tevent_req_create(mem_ctx, &state, struct release_ip_state);
if (req == NULL) {
return NULL;
}
state->num_sent = 0;
state->num_replies = 0;
state->num_fails = 0;
state->ban_credits = ban_credits;
/* Send a RELEASE_IP to all nodes that should not be hosting
* each IP. For each IP, all but one of these will be
* redundant. However, the redundant ones are used to tell
* nodes which node should be hosting the IP so that commands
* like "ctdb ip" can display a particular nodes idea of who
* is hosting what. */
for (tmp_ip = all_ips; tmp_ip != NULL; tmp_ip = tmp_ip->next) {
struct release_ip_one_state *substate;
struct ctdb_public_ip ip;
int i;
substate = talloc_zero(state, struct release_ip_one_state);
if (tevent_req_nomem(substate, req)) {
return tevent_req_post(req, ev);
}
substate->pnns = talloc_zero_array(substate, uint32_t, count);
if (tevent_req_nomem(substate->pnns, req)) {
return tevent_req_post(req, ev);
}
substate->count = 0;
substate->req = req;
substate->ip_str = ctdb_sock_addr_to_string(substate,
&tmp_ip->addr);
if (tevent_req_nomem(substate->ip_str, req)) {
return tevent_req_post(req, ev);
}
for (i = 0; i < count; i++) {
uint32_t pnn = pnns[i];
/* If pnn is not the node that should be
* hosting the IP then add it to the list of
* nodes that need to do a release. */
if (tmp_ip->pnn != pnn) {
substate->pnns[substate->count] = pnn;
substate->count++;
}
}
if (substate->count == 0) {
/* No releases to send for this address... */
TALLOC_FREE(substate);
continue;
}
ip.pnn = tmp_ip->pnn;
ip.addr = tmp_ip->addr;
ctdb_req_control_release_ip(&request, &ip);
subreq = ctdb_client_control_multi_send(state, ev, client,
substate->pnns,
substate->count,
timeout,/* cumulative */
&request);
if (tevent_req_nomem(subreq, req)) {
return tevent_req_post(req, ev);
}
tevent_req_set_callback(subreq, release_ip_done, substate);
state->num_sent++;
}
/* None sent, finished... */
if (state->num_sent == 0) {
tevent_req_done(req);
return tevent_req_post(req, ev);
}
return req;
}
static void release_ip_done(struct tevent_req *subreq)
{
struct release_ip_one_state *substate = tevent_req_callback_data(
subreq, struct release_ip_one_state);
struct tevent_req *req = substate->req;
struct release_ip_state *state = tevent_req_data(
req, struct release_ip_state);
int ret, i;
int *err_list;
bool status, found_errors;
status = ctdb_client_control_multi_recv(subreq, &ret, state,
&err_list, NULL);
TALLOC_FREE(subreq);
if (status) {
D_INFO("RELEASE_IP %s succeeded on %d nodes\n",
substate->ip_str, substate->count);
goto done;
}
/* Get some clear error messages out of err_list and count
* banning credits
*/
found_errors = false;
for (i = 0; i < substate->count; i++) {
int err = err_list[i];
if (err != 0) {
uint32_t pnn = substate->pnns[i];
D_ERR("RELEASE_IP %s failed on node %u, "
"ret=%d\n", substate->ip_str, pnn, err);
state->ban_credits[pnn]++;
state->err_any = err;
found_errors = true;
}
}
if (! found_errors) {
D_ERR("RELEASE_IP %s internal error, ret=%d\n",
substate->ip_str, ret);
state->err_any = EIO;
}
state->num_fails++;
done:
talloc_free(substate);
state->num_replies++;
if (state->num_replies < state->num_sent) {
/* Not all replies received, don't go further */
return;
}
if (state->num_fails > 0) {
tevent_req_error(req, state->err_any);
return;
}
tevent_req_done(req);
}
static bool release_ip_recv(struct tevent_req *req, int *perr)
{
return generic_recv(req, perr);
}
/**********************************************************************/
struct take_ip_state {
int num_sent;
int num_replies;
int num_fails;
int err_any;
uint32_t *ban_credits;
};
struct take_ip_one_state {
struct tevent_req *req;
uint32_t pnn;
const char *ip_str;
};
static void take_ip_done(struct tevent_req *subreq);
static struct tevent_req *take_ip_send(TALLOC_CTX *mem_ctx,
struct tevent_context *ev,
struct ctdb_client_context *client,
struct timeval timeout,
struct public_ip_list *all_ips,
uint32_t *ban_credits)
{
struct tevent_req *req, *subreq;
struct take_ip_state *state;
struct ctdb_req_control request;
struct public_ip_list *tmp_ip;
req = tevent_req_create(mem_ctx, &state, struct take_ip_state);
if (req == NULL) {
return NULL;
}
state->num_sent = 0;
state->num_replies = 0;
state->num_fails = 0;
state->ban_credits = ban_credits;
/* For each IP, send a TAKOVER_IP to the node that should be
* hosting it. Many of these will often be redundant (since
* the allocation won't have changed) but they can be useful
* to recover from inconsistencies. */
for (tmp_ip = all_ips; tmp_ip != NULL; tmp_ip = tmp_ip->next) {
struct take_ip_one_state *substate;
struct ctdb_public_ip ip;
if (tmp_ip->pnn == -1) {
/* IP will be unassigned */
continue;
}
substate = talloc_zero(state, struct take_ip_one_state);
if (tevent_req_nomem(substate, req)) {
return tevent_req_post(req, ev);
}
substate->req = req;
substate->pnn = tmp_ip->pnn;
substate->ip_str = ctdb_sock_addr_to_string(substate,
&tmp_ip->addr);
if (tevent_req_nomem(substate->ip_str, req)) {
return tevent_req_post(req, ev);
}
ip.pnn = tmp_ip->pnn;
ip.addr = tmp_ip->addr;
ctdb_req_control_takeover_ip(&request, &ip);
subreq = ctdb_client_control_send(
state, ev, client, tmp_ip->pnn,
timeout, /* cumulative */
&request);
if (tevent_req_nomem(subreq, req)) {
return tevent_req_post(req, ev);
}
tevent_req_set_callback(subreq, take_ip_done, substate);
state->num_sent++;
}
/* None sent, finished... */
if (state->num_sent == 0) {
tevent_req_done(req);
return tevent_req_post(req, ev);
}
return req;
}
static void take_ip_done(struct tevent_req *subreq)
{
struct take_ip_one_state *substate = tevent_req_callback_data(
subreq, struct take_ip_one_state);
struct tevent_req *req = substate->req;
struct ctdb_reply_control *reply;
struct take_ip_state *state = tevent_req_data(
req, struct take_ip_state);
int ret = 0;
bool status;
status = ctdb_client_control_recv(subreq, &ret, state, &reply);
TALLOC_FREE(subreq);
if (! status) {
D_ERR("TAKEOVER_IP %s failed to node %u, ret=%d\n",
substate->ip_str, substate->pnn, ret);
goto fail;
}
ret = ctdb_reply_control_takeover_ip(reply);
if (ret != 0) {
D_ERR("TAKEOVER_IP %s failed on node %u, ret=%d\n",
substate->ip_str, substate->pnn, ret);
goto fail;
}
D_INFO("TAKEOVER_IP %s succeeded on node %u\n",
substate->ip_str, substate->pnn);
goto done;
fail:
state->ban_credits[substate->pnn]++;
state->num_fails++;
state->err_any = ret;
done:
talloc_free(substate);
state->num_replies++;
if (state->num_replies < state->num_sent) {
/* Not all replies received, don't go further */
return;
}
if (state->num_fails > 0) {
tevent_req_error(req, state->err_any);
return;
}
tevent_req_done(req);
}
static bool take_ip_recv(struct tevent_req *req, int *perr)
{
return generic_recv(req, perr);
}
/**********************************************************************/
struct ipreallocated_state {
uint32_t *pnns;
int count;
uint32_t *ban_credits;
};
static void ipreallocated_done(struct tevent_req *subreq);
static struct tevent_req *ipreallocated_send(TALLOC_CTX *mem_ctx,
struct tevent_context *ev,
struct ctdb_client_context *client,
uint32_t *pnns,
int count,
struct timeval timeout,
uint32_t *ban_credits)
{
struct tevent_req *req, *subreq;
struct ipreallocated_state *state;
struct ctdb_req_control request;
req = tevent_req_create(mem_ctx, &state, struct ipreallocated_state);
if (req == NULL) {
return NULL;
}
state->pnns = pnns;
state->count = count;
state->ban_credits = ban_credits;
ctdb_req_control_ipreallocated(&request);
subreq = ctdb_client_control_multi_send(state, ev, client,
pnns, count,
timeout, /* cumulative */
&request);
if (tevent_req_nomem(subreq, req)) {
return tevent_req_post(req, ev);
}
tevent_req_set_callback(subreq, ipreallocated_done, req);
return req;
}
static void ipreallocated_done(struct tevent_req *subreq)
{
struct tevent_req *req = tevent_req_callback_data(
subreq, struct tevent_req);
struct ipreallocated_state *state = tevent_req_data(
req, struct ipreallocated_state);
int *err_list = NULL;
int ret, i;
bool status, found_errors;
status = ctdb_client_control_multi_recv(subreq, &ret, state,
&err_list, NULL);
TALLOC_FREE(subreq);
if (status) {
D_INFO("IPREALLOCATED succeeded on %d nodes\n", state->count);
tevent_req_done(req);
return;
}
/* Get some clear error messages out of err_list and count
* banning credits
*/
found_errors = false;
for (i = 0; i < state->count; i++) {
int err = err_list[i];
if (err != 0) {
uint32_t pnn = state->pnns[i];
D_ERR("IPREALLOCATED failed on node %u, ret=%d\n",
pnn, err);
state->ban_credits[pnn]++;
found_errors = true;
}
}
if (! found_errors) {
D_ERR("IPREALLOCATED internal error, ret=%d\n", ret);
}
tevent_req_error(req, ret);
}
static bool ipreallocated_recv(struct tevent_req *req, int *perr)
{
return generic_recv(req, perr);
}
/**********************************************************************/
/*
* Recalculate the allocation of public IPs to nodes and have the
* nodes host their allocated addresses.
*
* - Get tunables
* - Get nodemap
* - Initialise IP allocation state. Pass:
* + algorithm to be used;
* + various tunables (NoIPTakeover, NoIPFailback, NoIPHostOnAllDisabled)
* + list of nodes to force rebalance (internal structure, currently
* no way to fetch, only used by LCP2 for nodes that have had new
* IP addresses added).
* - Set IP flags for IP allocation based on node map
* - Retrieve known and available IP addresses (done separately so
* values can be faked in unit testing)
* - Use ipalloc_set_public_ips() to set known and available IP
* addresses for allocation
* - If cluster can't host IP addresses then jump to IPREALLOCATED
* - Run IP allocation algorithm
* - Send RELEASE_IP to all nodes for IPs they should not host
* - Send TAKE_IP to all nodes for IPs they should host
* - Send IPREALLOCATED to all nodes
*/
struct takeover_state {
struct tevent_context *ev;
struct ctdb_client_context *client;
struct timeval timeout;
int num_nodes;
uint32_t *pnns_connected;
int num_connected;
uint32_t *pnns_active;
int num_active;
uint32_t destnode;
uint32_t *force_rebalance_nodes;
struct ctdb_tunable_list *tun_list;
struct ipalloc_state *ipalloc_state;
struct ctdb_public_ip_list *known_ips;
struct public_ip_list *all_ips;
uint32_t *ban_credits;
};
static void takeover_tunables_done(struct tevent_req *subreq);
static void takeover_nodemap_done(struct tevent_req *subreq);
static void takeover_known_ips_done(struct tevent_req *subreq);
static void takeover_avail_ips_done(struct tevent_req *subreq);
static void takeover_release_ip_done(struct tevent_req *subreq);
static void takeover_take_ip_done(struct tevent_req *subreq);
static void takeover_ipreallocated(struct tevent_req *req);
static void takeover_ipreallocated_done(struct tevent_req *subreq);
static void takeover_failed(struct tevent_req *subreq, int ret);
static void takeover_failed_done(struct tevent_req *subreq);
static struct tevent_req *takeover_send(TALLOC_CTX *mem_ctx,
struct tevent_context *ev,
struct ctdb_client_context *client,
uint32_t *force_rebalance_nodes)
{
struct tevent_req *req, *subreq;
struct takeover_state *state;
struct ctdb_req_control request;
req = tevent_req_create(mem_ctx, &state, struct takeover_state);
if (req == NULL) {
return NULL;
}
state->ev = ev;
state->client = client;
state->force_rebalance_nodes = force_rebalance_nodes;
state->destnode = ctdb_client_pnn(client);
ctdb_req_control_get_all_tunables(&request);
subreq = ctdb_client_control_send(state, state->ev, state->client,
state->destnode, TIMEOUT(),
&request);
if (tevent_req_nomem(subreq, req)) {
return tevent_req_post(req, ev);
}
tevent_req_set_callback(subreq, takeover_tunables_done, req);
return req;
}
static void takeover_tunables_done(struct tevent_req *subreq)
{
struct tevent_req *req = tevent_req_callback_data(
subreq, struct tevent_req);
struct takeover_state *state = tevent_req_data(
req, struct takeover_state);
struct ctdb_reply_control *reply;
struct ctdb_req_control request;
int ret;
bool status;
status = ctdb_client_control_recv(subreq, &ret, state, &reply);
TALLOC_FREE(subreq);
if (! status) {
D_ERR("control GET_ALL_TUNABLES failed, ret=%d\n", ret);
tevent_req_error(req, ret);
return;
}
ret = ctdb_reply_control_get_all_tunables(reply, state,
&state->tun_list);
if (ret != 0) {
D_ERR("control GET_ALL_TUNABLES failed, ret=%d\n", ret);
tevent_req_error(req, ret);
return;
}
talloc_free(reply);
takeover_timeout = state->tun_list->takeover_timeout;
ctdb_req_control_get_nodemap(&request);
subreq = ctdb_client_control_send(state, state->ev, state->client,
state->destnode, TIMEOUT(),
&request);
if (tevent_req_nomem(subreq, req)) {
return;
}
tevent_req_set_callback(subreq, takeover_nodemap_done, req);
}
static void takeover_nodemap_done(struct tevent_req *subreq)
{
struct tevent_req *req = tevent_req_callback_data(
subreq, struct tevent_req);
struct takeover_state *state = tevent_req_data(
req, struct takeover_state);
struct ctdb_reply_control *reply;
bool status;
int ret;
struct ctdb_node_map *nodemap;
status = ctdb_client_control_recv(subreq, &ret, state, &reply);
TALLOC_FREE(subreq);
if (! status) {
D_ERR("control GET_NODEMAP failed to node %u, ret=%d\n",
state->destnode, ret);
tevent_req_error(req, ret);
return;
}
ret = ctdb_reply_control_get_nodemap(reply, state, &nodemap);
if (ret != 0) {
D_ERR("control GET_NODEMAP failed, ret=%d\n", ret);
tevent_req_error(req, ret);
return;
}
state->num_nodes = nodemap->num;
state->num_connected = list_of_connected_nodes(nodemap,
CTDB_UNKNOWN_PNN, state,
&state->pnns_connected);
if (state->num_connected <= 0) {
tevent_req_error(req, ENOMEM);
return;
}
state->num_active = list_of_active_nodes(nodemap,
CTDB_UNKNOWN_PNN, state,
&state->pnns_active);
if (state->num_active <= 0) {
tevent_req_error(req, ENOMEM);
return;
}
/* Default timeout for early jump to IPREALLOCATED. See below
* for explanation of 3 times...
*/
state->timeout = timeval_current_ofs(3 * takeover_timeout, 0);
state->ban_credits = talloc_zero_array(state, uint32_t,
state->num_nodes);
if (tevent_req_nomem(state->ban_credits, req)) {
return;
}
if (state->tun_list->disable_ip_failover != 0) {
/* IP failover is completely disabled so just send out
* ipreallocated event.
*/
takeover_ipreallocated(req);
return;
}
state->ipalloc_state =
ipalloc_state_init(
state, state->num_nodes,
determine_algorithm(state->tun_list),
(state->tun_list->no_ip_takeover != 0),
(state->tun_list->no_ip_failback != 0),
(state->tun_list->no_ip_host_on_all_disabled != 0),
state->force_rebalance_nodes);
if (tevent_req_nomem(state->ipalloc_state, req)) {
return;
}
ipalloc_set_node_flags(state->ipalloc_state, nodemap);
subreq = get_public_ips_send(state, state->ev, state->client,
state->pnns_connected, state->num_connected,
state->num_nodes, state->ban_credits,
false);
if (tevent_req_nomem(subreq, req)) {
return;
}
tevent_req_set_callback(subreq, takeover_known_ips_done, req);
}
static void takeover_known_ips_done(struct tevent_req *subreq)
{
struct tevent_req *req = tevent_req_callback_data(
subreq, struct tevent_req);
struct takeover_state *state = tevent_req_data(
req, struct takeover_state);
int ret;
bool status;
uint32_t *pnns = NULL;
int count, i;
status = get_public_ips_recv(subreq, &ret, state, &state->known_ips);
TALLOC_FREE(subreq);
if (! status) {
D_ERR("Failed to fetch known public IPs\n");
takeover_failed(req, ret);
return;
}
/* Get available IPs from active nodes that actually have known IPs */
pnns = talloc_zero_array(state, uint32_t, state->num_active);
if (tevent_req_nomem(pnns, req)) {
return;
}
count = 0;
for (i = 0; i < state->num_active; i++) {
uint32_t pnn = state->pnns_active[i];
/* If pnn has IPs then fetch available IPs from it */
if (state->known_ips[pnn].num > 0) {
pnns[count] = pnn;
count++;
}
}
subreq = get_public_ips_send(state, state->ev, state->client,
pnns, count,
state->num_nodes, state->ban_credits,
true);
if (tevent_req_nomem(subreq, req)) {
return;
}
tevent_req_set_callback(subreq, takeover_avail_ips_done, req);
}
static void takeover_avail_ips_done(struct tevent_req *subreq)
{
struct tevent_req *req = tevent_req_callback_data(
subreq, struct tevent_req);
struct takeover_state *state = tevent_req_data(
req, struct takeover_state);
bool status;
int ret;
struct ctdb_public_ip_list *available_ips;
status = get_public_ips_recv(subreq, &ret, state, &available_ips);
TALLOC_FREE(subreq);
if (! status) {
D_ERR("Failed to fetch available public IPs\n");
takeover_failed(req, ret);
return;
}
ipalloc_set_public_ips(state->ipalloc_state,
state->known_ips, available_ips);
if (! ipalloc_can_host_ips(state->ipalloc_state)) {
D_NOTICE("No nodes available to host public IPs yet\n");
takeover_ipreallocated(req);
return;
}
/* Do the IP reassignment calculations */
state->all_ips = ipalloc(state->ipalloc_state);
if (tevent_req_nomem(state->all_ips, req)) {
return;
}
/* Each of the following stages (RELEASE_IP, TAKEOVER_IP,
* IPREALLOCATED) notionally has a timeout of TakeoverTimeout
* seconds. However, RELEASE_IP can take longer due to TCP
* connection killing, so sometimes needs more time.
* Therefore, use a cumulative timeout of TakeoverTimeout * 3
* seconds across all 3 stages. No explicit expiry checks are
* needed before each stage because tevent is smart enough to
* fire the timeouts even if they are in the past. Initialise
* this here so it explicitly covers the stages we're
* interested in but, in particular, not the time taken by the
* ipalloc().
*/
state->timeout = timeval_current_ofs(3 * takeover_timeout, 0);
subreq = release_ip_send(state, state->ev, state->client,
state->pnns_connected, state->num_connected,
state->timeout, state->all_ips,
state->ban_credits);
if (tevent_req_nomem(subreq, req)) {
return;
}
tevent_req_set_callback(subreq, takeover_release_ip_done, req);
}
static void takeover_release_ip_done(struct tevent_req *subreq)
{
struct tevent_req *req = tevent_req_callback_data(
subreq, struct tevent_req);
struct takeover_state *state = tevent_req_data(
req, struct takeover_state);
int ret;
bool status;
status = release_ip_recv(subreq, &ret);
TALLOC_FREE(subreq);
if (! status) {
takeover_failed(req, ret);
return;
}
/* All released, now for takeovers */
subreq = take_ip_send(state, state->ev, state->client,
state->timeout, state->all_ips,
state->ban_credits);
if (tevent_req_nomem(subreq, req)) {
return;
}
tevent_req_set_callback(subreq, takeover_take_ip_done, req);
}
static void takeover_take_ip_done(struct tevent_req *subreq)
{
struct tevent_req *req = tevent_req_callback_data(
subreq, struct tevent_req);
int ret = 0;
bool status;
status = take_ip_recv(subreq, &ret);
TALLOC_FREE(subreq);
if (! status) {
takeover_failed(req, ret);
return;
}
takeover_ipreallocated(req);
}
static void takeover_ipreallocated(struct tevent_req *req)
{
struct takeover_state *state = tevent_req_data(
req, struct takeover_state);
struct tevent_req *subreq;
subreq = ipreallocated_send(state, state->ev, state->client,
state->pnns_connected,
state->num_connected,
state->timeout,
state->ban_credits);
if (tevent_req_nomem(subreq, req)) {
return;
}
tevent_req_set_callback(subreq, takeover_ipreallocated_done, req);
}
static void takeover_ipreallocated_done(struct tevent_req *subreq)
{
struct tevent_req *req = tevent_req_callback_data(
subreq, struct tevent_req);
int ret;
bool status;
status = ipreallocated_recv(subreq, &ret);
TALLOC_FREE(subreq);
if (! status) {
takeover_failed(req, ret);
return;
}
tevent_req_done(req);
}
struct takeover_failed_state {
struct tevent_req *req;
int ret;
};
void takeover_failed(struct tevent_req *req, int ret)
{
struct takeover_state *state = tevent_req_data(
req, struct takeover_state);
struct tevent_req *subreq;
uint32_t max_pnn = CTDB_UNKNOWN_PNN;
int max_credits = 0;
int pnn;
/* Check that bans are enabled */
if (state->tun_list->enable_bans == 0) {
tevent_req_error(req, ret);
return;
}
for (pnn = 0; pnn < state->num_nodes; pnn++) {
if (state->ban_credits[pnn] > max_credits) {
max_pnn = pnn;
max_credits = state->ban_credits[pnn];
}
}
if (max_credits > 0) {
struct ctdb_req_message message;
struct takeover_failed_state *substate;
D_WARNING("Assigning banning credits to node %u\n", max_pnn);
substate = talloc_zero(state, struct takeover_failed_state);
if (tevent_req_nomem(substate, req)) {
return;
}
substate->req = req;
substate->ret = ret;
message.srvid = CTDB_SRVID_BANNING;
message.data.pnn = max_pnn;
subreq = ctdb_client_message_send(
state, state->ev, state->client,
ctdb_client_pnn(state->client),
&message);
if (subreq == NULL) {
D_ERR("failed to assign banning credits\n");
tevent_req_error(req, ret);
return;
}
tevent_req_set_callback(subreq, takeover_failed_done, substate);
} else {
tevent_req_error(req, ret);
}
}
static void takeover_failed_done(struct tevent_req *subreq)
{
struct takeover_failed_state *substate = tevent_req_callback_data(
subreq, struct takeover_failed_state);
struct tevent_req *req = substate->req;
int ret;
bool status;
status = ctdb_client_message_recv(subreq, &ret);
TALLOC_FREE(subreq);
if (! status) {
D_ERR("failed to assign banning credits, ret=%d\n", ret);
}
ret = substate->ret;
talloc_free(substate);
tevent_req_error(req, ret);
}
static void takeover_recv(struct tevent_req *req, int *perr)
{
generic_recv(req, perr);
}
static uint32_t *parse_node_list(TALLOC_CTX *mem_ctx, const char* s)
{
char *strv = NULL;
int num, i, ret;
char *t;
uint32_t *nodes;
ret = strv_split(mem_ctx, &strv, s, ",");
if (ret != 0) {
D_ERR("out of memory\n");
return NULL;
}
num = strv_count(strv);
nodes = talloc_array(mem_ctx, uint32_t, num);
if (nodes == NULL) {
D_ERR("out of memory\n");
return NULL;
}
t = NULL;
for (i = 0; i < num; i++) {
t = strv_next(strv, t);
nodes[i] = atoi(t);
}
return nodes;
}
static void usage(const char *progname)
{
fprintf(stderr,
"\nUsage: %s <output-fd> <ctdb-socket-path> "
"[<force-rebalance-nodes>]\n",
progname);
}
/*
* Arguments - write fd, socket path
*/
int main(int argc, const char *argv[])
{
int write_fd;
const char *sockpath;
TALLOC_CTX *mem_ctx;
struct tevent_context *ev;
struct ctdb_client_context *client;
int ret;
struct tevent_req *req;
uint32_t *force_rebalance_nodes = NULL;
if (argc < 3 || argc > 4) {
usage(argv[0]);
exit(1);
}
write_fd = atoi(argv[1]);
sockpath = argv[2];
mem_ctx = talloc_new(NULL);
if (mem_ctx == NULL) {
fprintf(stderr, "talloc_new() failed\n");
ret = ENOMEM;
goto done;
}
if (argc == 4) {
force_rebalance_nodes = parse_node_list(mem_ctx, argv[3]);
if (force_rebalance_nodes == NULL) {
usage(argv[0]);
ret = EINVAL;
goto done;
}
}
ret = logging_init(mem_ctx, NULL, NULL, "ctdb-takeover");
if (ret != 0) {
fprintf(stderr,
"ctdb-takeover: Unable to initialize logging\n");
goto done;
}
ev = tevent_context_init(mem_ctx);
if (ev == NULL) {
D_ERR("tevent_context_init() failed\n");
ret = ENOMEM;
goto done;
}
ret = ctdb_client_init(mem_ctx, ev, sockpath, &client);
if (ret != 0) {
D_ERR("ctdb_client_init() failed, ret=%d\n", ret);
goto done;
}
req = takeover_send(mem_ctx, ev, client, force_rebalance_nodes);
if (req == NULL) {
D_ERR("takeover_send() failed\n");
ret = 1;
goto done;
}
if (! tevent_req_poll(req, ev)) {
D_ERR("tevent_req_poll() failed\n");
ret = 1;
goto done;
}
takeover_recv(req, &ret);
TALLOC_FREE(req);
if (ret != 0) {
D_ERR("takeover run failed, ret=%d\n", ret);
}
done:
sys_write_v(write_fd, &ret, sizeof(ret));
talloc_free(mem_ctx);
return ret;
}