mirror of
https://github.com/samba-team/samba.git
synced 2024-12-25 23:21:54 +03:00
d8398b04b5
Signed-off-by: Martin Schwenke <martin@meltin.net> Reviewed-by: Amitay Isaacs <amitay@gmail.com>
3491 lines
89 KiB
C
3491 lines
89 KiB
C
/*
|
|
ctdb ip takeover code
|
|
|
|
Copyright (C) Ronnie Sahlberg 2007
|
|
Copyright (C) Andrew Tridgell 2007
|
|
Copyright (C) Martin Schwenke 2011
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
#include "replace.h"
|
|
#include "system/network.h"
|
|
#include "system/filesys.h"
|
|
#include "system/time.h"
|
|
#include "system/wait.h"
|
|
|
|
#include <talloc.h>
|
|
#include <tevent.h>
|
|
|
|
#include "lib/util/dlinklist.h"
|
|
#include "lib/util/debug.h"
|
|
#include "lib/util/samba_util.h"
|
|
#include "lib/util/util_process.h"
|
|
|
|
#include "ctdb_private.h"
|
|
#include "ctdb_client.h"
|
|
|
|
#include "common/rb_tree.h"
|
|
#include "common/reqid.h"
|
|
#include "common/system.h"
|
|
#include "common/common.h"
|
|
#include "common/logging.h"
|
|
|
|
#include "server/ipalloc.h"
|
|
|
|
#define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
|
|
|
|
#define CTDB_ARP_INTERVAL 1
|
|
#define CTDB_ARP_REPEAT 3
|
|
|
|
struct ctdb_interface {
|
|
struct ctdb_interface *prev, *next;
|
|
const char *name;
|
|
bool link_up;
|
|
uint32_t references;
|
|
};
|
|
|
|
static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
|
|
{
|
|
if (vnn->iface) {
|
|
return vnn->iface->name;
|
|
}
|
|
|
|
return "__none__";
|
|
}
|
|
|
|
static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
|
|
{
|
|
struct ctdb_interface *i;
|
|
|
|
if (strlen(iface) > CTDB_IFACE_SIZE) {
|
|
DEBUG(DEBUG_ERR, ("Interface name too long \"%s\"\n", iface));
|
|
return -1;
|
|
}
|
|
|
|
/* Verify that we don't have an entry for this ip yet */
|
|
for (i=ctdb->ifaces;i;i=i->next) {
|
|
if (strcmp(i->name, iface) == 0) {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/* create a new structure for this interface */
|
|
i = talloc_zero(ctdb, struct ctdb_interface);
|
|
CTDB_NO_MEMORY_FATAL(ctdb, i);
|
|
i->name = talloc_strdup(i, iface);
|
|
CTDB_NO_MEMORY(ctdb, i->name);
|
|
|
|
i->link_up = true;
|
|
|
|
DLIST_ADD(ctdb->ifaces, i);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static bool vnn_has_interface_with_name(struct ctdb_vnn *vnn,
|
|
const char *name)
|
|
{
|
|
int n;
|
|
|
|
for (n = 0; vnn->ifaces[n] != NULL; n++) {
|
|
if (strcmp(name, vnn->ifaces[n]) == 0) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/* If any interfaces now have no possible IPs then delete them. This
|
|
* implementation is naive (i.e. simple) rather than clever
|
|
* (i.e. complex). Given that this is run on delip and that operation
|
|
* is rare, this doesn't need to be efficient - it needs to be
|
|
* foolproof. One alternative is reference counting, where the logic
|
|
* is distributed and can, therefore, be broken in multiple places.
|
|
* Another alternative is to build a red-black tree of interfaces that
|
|
* can have addresses (by walking ctdb->vnn and ctdb->single_ip_vnn
|
|
* once) and then walking ctdb->ifaces once and deleting those not in
|
|
* the tree. Let's go to one of those if the naive implementation
|
|
* causes problems... :-)
|
|
*/
|
|
static void ctdb_remove_orphaned_ifaces(struct ctdb_context *ctdb,
|
|
struct ctdb_vnn *vnn)
|
|
{
|
|
struct ctdb_interface *i, *next;
|
|
|
|
/* For each interface, check if there's an IP using it. */
|
|
for (i = ctdb->ifaces; i != NULL; i = next) {
|
|
struct ctdb_vnn *tv;
|
|
bool found;
|
|
next = i->next;
|
|
|
|
/* Only consider interfaces named in the given VNN. */
|
|
if (!vnn_has_interface_with_name(vnn, i->name)) {
|
|
continue;
|
|
}
|
|
|
|
/* Is the "single IP" on this interface? */
|
|
if ((ctdb->single_ip_vnn != NULL) &&
|
|
(ctdb->single_ip_vnn->ifaces[0] != NULL) &&
|
|
(strcmp(i->name, ctdb->single_ip_vnn->ifaces[0]) == 0)) {
|
|
/* Found, next interface please... */
|
|
continue;
|
|
}
|
|
/* Search for a vnn with this interface. */
|
|
found = false;
|
|
for (tv=ctdb->vnn; tv; tv=tv->next) {
|
|
if (vnn_has_interface_with_name(tv, i->name)) {
|
|
found = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!found) {
|
|
/* None of the VNNs are using this interface. */
|
|
DLIST_REMOVE(ctdb->ifaces, i);
|
|
talloc_free(i);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static struct ctdb_interface *ctdb_find_iface(struct ctdb_context *ctdb,
|
|
const char *iface)
|
|
{
|
|
struct ctdb_interface *i;
|
|
|
|
for (i=ctdb->ifaces;i;i=i->next) {
|
|
if (strcmp(i->name, iface) == 0) {
|
|
return i;
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static struct ctdb_interface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
|
|
struct ctdb_vnn *vnn)
|
|
{
|
|
int i;
|
|
struct ctdb_interface *cur = NULL;
|
|
struct ctdb_interface *best = NULL;
|
|
|
|
for (i=0; vnn->ifaces[i]; i++) {
|
|
|
|
cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
|
|
if (cur == NULL) {
|
|
continue;
|
|
}
|
|
|
|
if (!cur->link_up) {
|
|
continue;
|
|
}
|
|
|
|
if (best == NULL) {
|
|
best = cur;
|
|
continue;
|
|
}
|
|
|
|
if (cur->references < best->references) {
|
|
best = cur;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
return best;
|
|
}
|
|
|
|
static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
|
|
struct ctdb_vnn *vnn)
|
|
{
|
|
struct ctdb_interface *best = NULL;
|
|
|
|
if (vnn->iface) {
|
|
DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
|
|
"still assigned to iface '%s'\n",
|
|
ctdb_addr_to_str(&vnn->public_address),
|
|
ctdb_vnn_iface_string(vnn)));
|
|
return 0;
|
|
}
|
|
|
|
best = ctdb_vnn_best_iface(ctdb, vnn);
|
|
if (best == NULL) {
|
|
DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
|
|
"cannot assign to iface any iface\n",
|
|
ctdb_addr_to_str(&vnn->public_address)));
|
|
return -1;
|
|
}
|
|
|
|
vnn->iface = best;
|
|
best->references++;
|
|
vnn->pnn = ctdb->pnn;
|
|
|
|
DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
|
|
"now assigned to iface '%s' refs[%d]\n",
|
|
ctdb_addr_to_str(&vnn->public_address),
|
|
ctdb_vnn_iface_string(vnn),
|
|
best->references));
|
|
return 0;
|
|
}
|
|
|
|
static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
|
|
struct ctdb_vnn *vnn)
|
|
{
|
|
DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
|
|
"now unassigned (old iface '%s' refs[%d])\n",
|
|
ctdb_addr_to_str(&vnn->public_address),
|
|
ctdb_vnn_iface_string(vnn),
|
|
vnn->iface?vnn->iface->references:0));
|
|
if (vnn->iface) {
|
|
vnn->iface->references--;
|
|
}
|
|
vnn->iface = NULL;
|
|
if (vnn->pnn == ctdb->pnn) {
|
|
vnn->pnn = -1;
|
|
}
|
|
}
|
|
|
|
static bool ctdb_vnn_available(struct ctdb_context *ctdb,
|
|
struct ctdb_vnn *vnn)
|
|
{
|
|
int i;
|
|
|
|
/* Nodes that are not RUNNING can not host IPs */
|
|
if (ctdb->runstate != CTDB_RUNSTATE_RUNNING) {
|
|
return false;
|
|
}
|
|
|
|
if (vnn->delete_pending) {
|
|
return false;
|
|
}
|
|
|
|
if (vnn->iface && vnn->iface->link_up) {
|
|
return true;
|
|
}
|
|
|
|
for (i=0; vnn->ifaces[i]; i++) {
|
|
struct ctdb_interface *cur;
|
|
|
|
cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
|
|
if (cur == NULL) {
|
|
continue;
|
|
}
|
|
|
|
if (cur->link_up) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
struct ctdb_takeover_arp {
|
|
struct ctdb_context *ctdb;
|
|
uint32_t count;
|
|
ctdb_sock_addr addr;
|
|
struct ctdb_tcp_array *tcparray;
|
|
struct ctdb_vnn *vnn;
|
|
};
|
|
|
|
|
|
/*
|
|
lists of tcp endpoints
|
|
*/
|
|
struct ctdb_tcp_list {
|
|
struct ctdb_tcp_list *prev, *next;
|
|
struct ctdb_connection connection;
|
|
};
|
|
|
|
/*
|
|
list of clients to kill on IP release
|
|
*/
|
|
struct ctdb_client_ip {
|
|
struct ctdb_client_ip *prev, *next;
|
|
struct ctdb_context *ctdb;
|
|
ctdb_sock_addr addr;
|
|
uint32_t client_id;
|
|
};
|
|
|
|
|
|
/*
|
|
send a gratuitous arp
|
|
*/
|
|
static void ctdb_control_send_arp(struct tevent_context *ev,
|
|
struct tevent_timer *te,
|
|
struct timeval t, void *private_data)
|
|
{
|
|
struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
|
|
struct ctdb_takeover_arp);
|
|
int i, ret;
|
|
struct ctdb_tcp_array *tcparray;
|
|
const char *iface = ctdb_vnn_iface_string(arp->vnn);
|
|
|
|
ret = ctdb_sys_send_arp(&arp->addr, iface);
|
|
if (ret != 0) {
|
|
DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
|
|
iface, strerror(errno)));
|
|
}
|
|
|
|
tcparray = arp->tcparray;
|
|
if (tcparray) {
|
|
for (i=0;i<tcparray->num;i++) {
|
|
struct ctdb_connection *tcon;
|
|
|
|
tcon = &tcparray->connections[i];
|
|
DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
|
|
(unsigned)ntohs(tcon->dst.ip.sin_port),
|
|
ctdb_addr_to_str(&tcon->src),
|
|
(unsigned)ntohs(tcon->src.ip.sin_port)));
|
|
ret = ctdb_sys_send_tcp(
|
|
&tcon->src,
|
|
&tcon->dst,
|
|
0, 0, 0);
|
|
if (ret != 0) {
|
|
DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
|
|
ctdb_addr_to_str(&tcon->src)));
|
|
}
|
|
}
|
|
}
|
|
|
|
arp->count++;
|
|
|
|
if (arp->count == CTDB_ARP_REPEAT) {
|
|
talloc_free(arp);
|
|
return;
|
|
}
|
|
|
|
tevent_add_timer(arp->ctdb->ev, arp->vnn->takeover_ctx,
|
|
timeval_current_ofs(CTDB_ARP_INTERVAL, 100000),
|
|
ctdb_control_send_arp, arp);
|
|
}
|
|
|
|
static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
|
|
struct ctdb_vnn *vnn)
|
|
{
|
|
struct ctdb_takeover_arp *arp;
|
|
struct ctdb_tcp_array *tcparray;
|
|
|
|
if (!vnn->takeover_ctx) {
|
|
vnn->takeover_ctx = talloc_new(vnn);
|
|
if (!vnn->takeover_ctx) {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
|
|
if (!arp) {
|
|
return -1;
|
|
}
|
|
|
|
arp->ctdb = ctdb;
|
|
arp->addr = vnn->public_address;
|
|
arp->vnn = vnn;
|
|
|
|
tcparray = vnn->tcp_array;
|
|
if (tcparray) {
|
|
/* add all of the known tcp connections for this IP to the
|
|
list of tcp connections to send tickle acks for */
|
|
arp->tcparray = talloc_steal(arp, tcparray);
|
|
|
|
vnn->tcp_array = NULL;
|
|
vnn->tcp_update_needed = true;
|
|
}
|
|
|
|
tevent_add_timer(arp->ctdb->ev, vnn->takeover_ctx,
|
|
timeval_zero(), ctdb_control_send_arp, arp);
|
|
|
|
return 0;
|
|
}
|
|
|
|
struct takeover_callback_state {
|
|
struct ctdb_req_control_old *c;
|
|
ctdb_sock_addr *addr;
|
|
struct ctdb_vnn *vnn;
|
|
};
|
|
|
|
struct ctdb_do_takeip_state {
|
|
struct ctdb_req_control_old *c;
|
|
struct ctdb_vnn *vnn;
|
|
};
|
|
|
|
/*
|
|
called when takeip event finishes
|
|
*/
|
|
static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
|
|
void *private_data)
|
|
{
|
|
struct ctdb_do_takeip_state *state =
|
|
talloc_get_type(private_data, struct ctdb_do_takeip_state);
|
|
int32_t ret;
|
|
TDB_DATA data;
|
|
|
|
if (status != 0) {
|
|
struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
|
|
|
|
if (status == -ETIME) {
|
|
ctdb_ban_self(ctdb);
|
|
}
|
|
DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
|
|
ctdb_addr_to_str(&state->vnn->public_address),
|
|
ctdb_vnn_iface_string(state->vnn)));
|
|
ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
|
|
|
|
node->flags |= NODE_FLAGS_UNHEALTHY;
|
|
talloc_free(state);
|
|
return;
|
|
}
|
|
|
|
if (ctdb->do_checkpublicip) {
|
|
|
|
ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
|
|
if (ret != 0) {
|
|
ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
|
|
talloc_free(state);
|
|
return;
|
|
}
|
|
|
|
}
|
|
|
|
data.dptr = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
|
|
data.dsize = strlen((char *)data.dptr) + 1;
|
|
DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
|
|
|
|
ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
|
|
|
|
|
|
/* the control succeeded */
|
|
ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
|
|
talloc_free(state);
|
|
return;
|
|
}
|
|
|
|
static int ctdb_takeip_destructor(struct ctdb_do_takeip_state *state)
|
|
{
|
|
state->vnn->update_in_flight = false;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
take over an ip address
|
|
*/
|
|
static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
|
|
struct ctdb_req_control_old *c,
|
|
struct ctdb_vnn *vnn)
|
|
{
|
|
int ret;
|
|
struct ctdb_do_takeip_state *state;
|
|
|
|
if (vnn->update_in_flight) {
|
|
DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u rejected "
|
|
"update for this IP already in flight\n",
|
|
ctdb_addr_to_str(&vnn->public_address),
|
|
vnn->public_netmask_bits));
|
|
return -1;
|
|
}
|
|
|
|
ret = ctdb_vnn_assign_iface(ctdb, vnn);
|
|
if (ret != 0) {
|
|
DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
|
|
"assign a usable interface\n",
|
|
ctdb_addr_to_str(&vnn->public_address),
|
|
vnn->public_netmask_bits));
|
|
return -1;
|
|
}
|
|
|
|
state = talloc(vnn, struct ctdb_do_takeip_state);
|
|
CTDB_NO_MEMORY(ctdb, state);
|
|
|
|
state->c = talloc_steal(ctdb, c);
|
|
state->vnn = vnn;
|
|
|
|
vnn->update_in_flight = true;
|
|
talloc_set_destructor(state, ctdb_takeip_destructor);
|
|
|
|
DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
|
|
ctdb_addr_to_str(&vnn->public_address),
|
|
vnn->public_netmask_bits,
|
|
ctdb_vnn_iface_string(vnn)));
|
|
|
|
ret = ctdb_event_script_callback(ctdb,
|
|
state,
|
|
ctdb_do_takeip_callback,
|
|
state,
|
|
CTDB_EVENT_TAKE_IP,
|
|
"%s %s %u",
|
|
ctdb_vnn_iface_string(vnn),
|
|
ctdb_addr_to_str(&vnn->public_address),
|
|
vnn->public_netmask_bits);
|
|
|
|
if (ret != 0) {
|
|
DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
|
|
ctdb_addr_to_str(&vnn->public_address),
|
|
ctdb_vnn_iface_string(vnn)));
|
|
talloc_free(state);
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
struct ctdb_do_updateip_state {
|
|
struct ctdb_req_control_old *c;
|
|
struct ctdb_interface *old;
|
|
struct ctdb_vnn *vnn;
|
|
};
|
|
|
|
/*
|
|
called when updateip event finishes
|
|
*/
|
|
static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
|
|
void *private_data)
|
|
{
|
|
struct ctdb_do_updateip_state *state =
|
|
talloc_get_type(private_data, struct ctdb_do_updateip_state);
|
|
int32_t ret;
|
|
|
|
if (status != 0) {
|
|
if (status == -ETIME) {
|
|
ctdb_ban_self(ctdb);
|
|
}
|
|
DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
|
|
ctdb_addr_to_str(&state->vnn->public_address),
|
|
state->old->name,
|
|
ctdb_vnn_iface_string(state->vnn)));
|
|
|
|
/*
|
|
* All we can do is reset the old interface
|
|
* and let the next run fix it
|
|
*/
|
|
ctdb_vnn_unassign_iface(ctdb, state->vnn);
|
|
state->vnn->iface = state->old;
|
|
state->vnn->iface->references++;
|
|
|
|
ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
|
|
talloc_free(state);
|
|
return;
|
|
}
|
|
|
|
if (ctdb->do_checkpublicip) {
|
|
|
|
ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
|
|
if (ret != 0) {
|
|
ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
|
|
talloc_free(state);
|
|
return;
|
|
}
|
|
|
|
}
|
|
|
|
/* the control succeeded */
|
|
ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
|
|
talloc_free(state);
|
|
return;
|
|
}
|
|
|
|
static int ctdb_updateip_destructor(struct ctdb_do_updateip_state *state)
|
|
{
|
|
state->vnn->update_in_flight = false;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
update (move) an ip address
|
|
*/
|
|
static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
|
|
struct ctdb_req_control_old *c,
|
|
struct ctdb_vnn *vnn)
|
|
{
|
|
int ret;
|
|
struct ctdb_do_updateip_state *state;
|
|
struct ctdb_interface *old = vnn->iface;
|
|
const char *new_name;
|
|
|
|
if (vnn->update_in_flight) {
|
|
DEBUG(DEBUG_NOTICE,("Update of IP %s/%u rejected "
|
|
"update for this IP already in flight\n",
|
|
ctdb_addr_to_str(&vnn->public_address),
|
|
vnn->public_netmask_bits));
|
|
return -1;
|
|
}
|
|
|
|
ctdb_vnn_unassign_iface(ctdb, vnn);
|
|
ret = ctdb_vnn_assign_iface(ctdb, vnn);
|
|
if (ret != 0) {
|
|
DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
|
|
"assin a usable interface (old iface '%s')\n",
|
|
ctdb_addr_to_str(&vnn->public_address),
|
|
vnn->public_netmask_bits,
|
|
old->name));
|
|
return -1;
|
|
}
|
|
|
|
new_name = ctdb_vnn_iface_string(vnn);
|
|
if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
|
|
/* A benign update from one interface onto itself.
|
|
* no need to run the eventscripts in this case, just return
|
|
* success.
|
|
*/
|
|
ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
|
|
return 0;
|
|
}
|
|
|
|
state = talloc(vnn, struct ctdb_do_updateip_state);
|
|
CTDB_NO_MEMORY(ctdb, state);
|
|
|
|
state->c = talloc_steal(ctdb, c);
|
|
state->old = old;
|
|
state->vnn = vnn;
|
|
|
|
vnn->update_in_flight = true;
|
|
talloc_set_destructor(state, ctdb_updateip_destructor);
|
|
|
|
DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
|
|
"interface %s to %s\n",
|
|
ctdb_addr_to_str(&vnn->public_address),
|
|
vnn->public_netmask_bits,
|
|
old->name,
|
|
new_name));
|
|
|
|
ret = ctdb_event_script_callback(ctdb,
|
|
state,
|
|
ctdb_do_updateip_callback,
|
|
state,
|
|
CTDB_EVENT_UPDATE_IP,
|
|
"%s %s %s %u",
|
|
state->old->name,
|
|
new_name,
|
|
ctdb_addr_to_str(&vnn->public_address),
|
|
vnn->public_netmask_bits);
|
|
if (ret != 0) {
|
|
DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
|
|
ctdb_addr_to_str(&vnn->public_address),
|
|
old->name, new_name));
|
|
talloc_free(state);
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
Find the vnn of the node that has a public ip address
|
|
returns -1 if the address is not known as a public address
|
|
*/
|
|
static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
|
|
{
|
|
struct ctdb_vnn *vnn;
|
|
|
|
for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
|
|
if (ctdb_same_ip(&vnn->public_address, addr)) {
|
|
return vnn;
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
take over an ip address
|
|
*/
|
|
int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
|
|
struct ctdb_req_control_old *c,
|
|
TDB_DATA indata,
|
|
bool *async_reply)
|
|
{
|
|
int ret;
|
|
struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
|
|
struct ctdb_vnn *vnn;
|
|
bool have_ip = false;
|
|
bool do_updateip = false;
|
|
bool do_takeip = false;
|
|
struct ctdb_interface *best_iface = NULL;
|
|
|
|
if (pip->pnn != ctdb->pnn) {
|
|
DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
|
|
"with pnn %d, but we're node %d\n",
|
|
ctdb_addr_to_str(&pip->addr),
|
|
pip->pnn, ctdb->pnn));
|
|
return -1;
|
|
}
|
|
|
|
/* update out vnn list */
|
|
vnn = find_public_ip_vnn(ctdb, &pip->addr);
|
|
if (vnn == NULL) {
|
|
DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
|
|
ctdb_addr_to_str(&pip->addr)));
|
|
return 0;
|
|
}
|
|
|
|
if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
|
|
have_ip = ctdb_sys_have_ip(&pip->addr);
|
|
}
|
|
best_iface = ctdb_vnn_best_iface(ctdb, vnn);
|
|
if (best_iface == NULL) {
|
|
DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
|
|
"a usable interface (old %s, have_ip %d)\n",
|
|
ctdb_addr_to_str(&vnn->public_address),
|
|
vnn->public_netmask_bits,
|
|
ctdb_vnn_iface_string(vnn),
|
|
have_ip));
|
|
return -1;
|
|
}
|
|
|
|
if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
|
|
DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
|
|
have_ip = false;
|
|
}
|
|
|
|
|
|
if (vnn->iface == NULL && have_ip) {
|
|
DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
|
|
"but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
|
|
ctdb_addr_to_str(&vnn->public_address)));
|
|
return 0;
|
|
}
|
|
|
|
if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
|
|
DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
|
|
"and we have it on iface[%s], but it was assigned to node %d"
|
|
"and we are node %d, banning ourself\n",
|
|
ctdb_addr_to_str(&vnn->public_address),
|
|
ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
|
|
ctdb_ban_self(ctdb);
|
|
return -1;
|
|
}
|
|
|
|
if (vnn->pnn == -1 && have_ip) {
|
|
vnn->pnn = ctdb->pnn;
|
|
DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
|
|
"and we already have it on iface[%s], update local daemon\n",
|
|
ctdb_addr_to_str(&vnn->public_address),
|
|
ctdb_vnn_iface_string(vnn)));
|
|
return 0;
|
|
}
|
|
|
|
if (vnn->iface) {
|
|
if (vnn->iface != best_iface) {
|
|
if (!vnn->iface->link_up) {
|
|
do_updateip = true;
|
|
} else if (vnn->iface->references > (best_iface->references + 1)) {
|
|
/* only move when the rebalance gains something */
|
|
do_updateip = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!have_ip) {
|
|
if (do_updateip) {
|
|
ctdb_vnn_unassign_iface(ctdb, vnn);
|
|
do_updateip = false;
|
|
}
|
|
do_takeip = true;
|
|
}
|
|
|
|
if (do_takeip) {
|
|
ret = ctdb_do_takeip(ctdb, c, vnn);
|
|
if (ret != 0) {
|
|
return -1;
|
|
}
|
|
} else if (do_updateip) {
|
|
ret = ctdb_do_updateip(ctdb, c, vnn);
|
|
if (ret != 0) {
|
|
return -1;
|
|
}
|
|
} else {
|
|
/*
|
|
* The interface is up and the kernel known the ip
|
|
* => do nothing
|
|
*/
|
|
DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
|
|
ctdb_addr_to_str(&pip->addr),
|
|
vnn->public_netmask_bits,
|
|
ctdb_vnn_iface_string(vnn)));
|
|
return 0;
|
|
}
|
|
|
|
/* tell ctdb_control.c that we will be replying asynchronously */
|
|
*async_reply = true;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void do_delete_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn)
|
|
{
|
|
DLIST_REMOVE(ctdb->vnn, vnn);
|
|
ctdb_vnn_unassign_iface(ctdb, vnn);
|
|
ctdb_remove_orphaned_ifaces(ctdb, vnn);
|
|
talloc_free(vnn);
|
|
}
|
|
|
|
/*
|
|
called when releaseip event finishes
|
|
*/
|
|
static void release_ip_callback(struct ctdb_context *ctdb, int status,
|
|
void *private_data)
|
|
{
|
|
struct takeover_callback_state *state =
|
|
talloc_get_type(private_data, struct takeover_callback_state);
|
|
TDB_DATA data;
|
|
|
|
if (status == -ETIME) {
|
|
ctdb_ban_self(ctdb);
|
|
}
|
|
|
|
if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
|
|
if (ctdb_sys_have_ip(state->addr)) {
|
|
DEBUG(DEBUG_ERR,
|
|
("IP %s still hosted during release IP callback, failing\n",
|
|
ctdb_addr_to_str(state->addr)));
|
|
ctdb_request_control_reply(ctdb, state->c,
|
|
NULL, -1, NULL);
|
|
talloc_free(state);
|
|
return;
|
|
}
|
|
}
|
|
|
|
/* send a message to all clients of this node telling them
|
|
that the cluster has been reconfigured and they should
|
|
release any sockets on this IP */
|
|
data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
|
|
CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
|
|
data.dsize = strlen((char *)data.dptr)+1;
|
|
|
|
DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
|
|
|
|
ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
|
|
|
|
ctdb_vnn_unassign_iface(ctdb, state->vnn);
|
|
|
|
/* Process the IP if it has been marked for deletion */
|
|
if (state->vnn->delete_pending) {
|
|
do_delete_ip(ctdb, state->vnn);
|
|
state->vnn = NULL;
|
|
}
|
|
|
|
/* the control succeeded */
|
|
ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
|
|
talloc_free(state);
|
|
}
|
|
|
|
static int ctdb_releaseip_destructor(struct takeover_callback_state *state)
|
|
{
|
|
if (state->vnn != NULL) {
|
|
state->vnn->update_in_flight = false;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
release an ip address
|
|
*/
|
|
int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
|
|
struct ctdb_req_control_old *c,
|
|
TDB_DATA indata,
|
|
bool *async_reply)
|
|
{
|
|
int ret;
|
|
struct takeover_callback_state *state;
|
|
struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
|
|
struct ctdb_vnn *vnn;
|
|
char *iface;
|
|
|
|
/* update our vnn list */
|
|
vnn = find_public_ip_vnn(ctdb, &pip->addr);
|
|
if (vnn == NULL) {
|
|
DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
|
|
ctdb_addr_to_str(&pip->addr)));
|
|
return 0;
|
|
}
|
|
vnn->pnn = pip->pnn;
|
|
|
|
/* stop any previous arps */
|
|
talloc_free(vnn->takeover_ctx);
|
|
vnn->takeover_ctx = NULL;
|
|
|
|
/* Some ctdb tool commands (e.g. moveip) send
|
|
* lazy multicast to drop an IP from any node that isn't the
|
|
* intended new node. The following causes makes ctdbd ignore
|
|
* a release for any address it doesn't host.
|
|
*/
|
|
if (ctdb->tunable.disable_ip_failover == 0 && ctdb->do_checkpublicip) {
|
|
if (!ctdb_sys_have_ip(&pip->addr)) {
|
|
DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
|
|
ctdb_addr_to_str(&pip->addr),
|
|
vnn->public_netmask_bits,
|
|
ctdb_vnn_iface_string(vnn)));
|
|
ctdb_vnn_unassign_iface(ctdb, vnn);
|
|
return 0;
|
|
}
|
|
} else {
|
|
if (vnn->iface == NULL) {
|
|
DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u (ip not held)\n",
|
|
ctdb_addr_to_str(&pip->addr),
|
|
vnn->public_netmask_bits));
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/* There is a potential race between take_ip and us because we
|
|
* update the VNN via a callback that run when the
|
|
* eventscripts have been run. Avoid the race by allowing one
|
|
* update to be in flight at a time.
|
|
*/
|
|
if (vnn->update_in_flight) {
|
|
DEBUG(DEBUG_NOTICE,("Release of IP %s/%u rejected "
|
|
"update for this IP already in flight\n",
|
|
ctdb_addr_to_str(&vnn->public_address),
|
|
vnn->public_netmask_bits));
|
|
return -1;
|
|
}
|
|
|
|
iface = strdup(ctdb_vnn_iface_string(vnn));
|
|
|
|
DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s node:%d\n",
|
|
ctdb_addr_to_str(&pip->addr),
|
|
vnn->public_netmask_bits,
|
|
iface,
|
|
pip->pnn));
|
|
|
|
state = talloc(ctdb, struct takeover_callback_state);
|
|
if (state == NULL) {
|
|
ctdb_set_error(ctdb, "Out of memory at %s:%d",
|
|
__FILE__, __LINE__);
|
|
free(iface);
|
|
return -1;
|
|
}
|
|
|
|
state->c = talloc_steal(state, c);
|
|
state->addr = talloc(state, ctdb_sock_addr);
|
|
if (state->addr == NULL) {
|
|
ctdb_set_error(ctdb, "Out of memory at %s:%d",
|
|
__FILE__, __LINE__);
|
|
free(iface);
|
|
talloc_free(state);
|
|
return -1;
|
|
}
|
|
*state->addr = pip->addr;
|
|
state->vnn = vnn;
|
|
|
|
vnn->update_in_flight = true;
|
|
talloc_set_destructor(state, ctdb_releaseip_destructor);
|
|
|
|
ret = ctdb_event_script_callback(ctdb,
|
|
state, release_ip_callback, state,
|
|
CTDB_EVENT_RELEASE_IP,
|
|
"%s %s %u",
|
|
iface,
|
|
ctdb_addr_to_str(&pip->addr),
|
|
vnn->public_netmask_bits);
|
|
free(iface);
|
|
if (ret != 0) {
|
|
DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
|
|
ctdb_addr_to_str(&pip->addr),
|
|
ctdb_vnn_iface_string(vnn)));
|
|
talloc_free(state);
|
|
return -1;
|
|
}
|
|
|
|
/* tell the control that we will be reply asynchronously */
|
|
*async_reply = true;
|
|
return 0;
|
|
}
|
|
|
|
static int ctdb_add_public_address(struct ctdb_context *ctdb,
|
|
ctdb_sock_addr *addr,
|
|
unsigned mask, const char *ifaces,
|
|
bool check_address)
|
|
{
|
|
struct ctdb_vnn *vnn;
|
|
uint32_t num = 0;
|
|
char *tmp;
|
|
const char *iface;
|
|
int i;
|
|
int ret;
|
|
|
|
tmp = strdup(ifaces);
|
|
for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
|
|
if (!ctdb_sys_check_iface_exists(iface)) {
|
|
DEBUG(DEBUG_CRIT,("Interface %s does not exist. Can not add public-address : %s\n", iface, ctdb_addr_to_str(addr)));
|
|
free(tmp);
|
|
return -1;
|
|
}
|
|
}
|
|
free(tmp);
|
|
|
|
/* Verify that we don't have an entry for this ip yet */
|
|
for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
|
|
if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
|
|
DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
|
|
ctdb_addr_to_str(addr)));
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
/* create a new vnn structure for this ip address */
|
|
vnn = talloc_zero(ctdb, struct ctdb_vnn);
|
|
CTDB_NO_MEMORY_FATAL(ctdb, vnn);
|
|
vnn->ifaces = talloc_array(vnn, const char *, num + 2);
|
|
tmp = talloc_strdup(vnn, ifaces);
|
|
CTDB_NO_MEMORY_FATAL(ctdb, tmp);
|
|
for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
|
|
vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
|
|
CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
|
|
vnn->ifaces[num] = talloc_strdup(vnn, iface);
|
|
CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
|
|
num++;
|
|
}
|
|
talloc_free(tmp);
|
|
vnn->ifaces[num] = NULL;
|
|
vnn->public_address = *addr;
|
|
vnn->public_netmask_bits = mask;
|
|
vnn->pnn = -1;
|
|
if (check_address) {
|
|
if (ctdb_sys_have_ip(addr)) {
|
|
DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
|
|
vnn->pnn = ctdb->pnn;
|
|
}
|
|
}
|
|
|
|
for (i=0; vnn->ifaces[i]; i++) {
|
|
ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
|
|
if (ret != 0) {
|
|
DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
|
|
"for public_address[%s]\n",
|
|
vnn->ifaces[i], ctdb_addr_to_str(addr)));
|
|
talloc_free(vnn);
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
DLIST_ADD(ctdb->vnn, vnn);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
setup the public address lists from a file
|
|
*/
|
|
int ctdb_set_public_addresses(struct ctdb_context *ctdb, bool check_addresses)
|
|
{
|
|
char **lines;
|
|
int nlines;
|
|
int i;
|
|
|
|
lines = file_lines_load(ctdb->public_addresses_file, &nlines, 0, ctdb);
|
|
if (lines == NULL) {
|
|
ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", ctdb->public_addresses_file);
|
|
return -1;
|
|
}
|
|
while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
|
|
nlines--;
|
|
}
|
|
|
|
for (i=0;i<nlines;i++) {
|
|
unsigned mask;
|
|
ctdb_sock_addr addr;
|
|
const char *addrstr;
|
|
const char *ifaces;
|
|
char *tok, *line;
|
|
|
|
line = lines[i];
|
|
while ((*line == ' ') || (*line == '\t')) {
|
|
line++;
|
|
}
|
|
if (*line == '#') {
|
|
continue;
|
|
}
|
|
if (strcmp(line, "") == 0) {
|
|
continue;
|
|
}
|
|
tok = strtok(line, " \t");
|
|
addrstr = tok;
|
|
tok = strtok(NULL, " \t");
|
|
if (tok == NULL) {
|
|
if (NULL == ctdb->default_public_interface) {
|
|
DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
|
|
i+1));
|
|
talloc_free(lines);
|
|
return -1;
|
|
}
|
|
ifaces = ctdb->default_public_interface;
|
|
} else {
|
|
ifaces = tok;
|
|
}
|
|
|
|
if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
|
|
DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
|
|
talloc_free(lines);
|
|
return -1;
|
|
}
|
|
if (ctdb_add_public_address(ctdb, &addr, mask, ifaces, check_addresses)) {
|
|
DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
|
|
talloc_free(lines);
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
|
|
talloc_free(lines);
|
|
return 0;
|
|
}
|
|
|
|
int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
|
|
const char *iface,
|
|
const char *ip)
|
|
{
|
|
struct ctdb_vnn *svnn;
|
|
struct ctdb_interface *cur = NULL;
|
|
bool ok;
|
|
int ret;
|
|
|
|
svnn = talloc_zero(ctdb, struct ctdb_vnn);
|
|
CTDB_NO_MEMORY(ctdb, svnn);
|
|
|
|
svnn->ifaces = talloc_array(svnn, const char *, 2);
|
|
CTDB_NO_MEMORY(ctdb, svnn->ifaces);
|
|
svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
|
|
CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
|
|
svnn->ifaces[1] = NULL;
|
|
|
|
ok = parse_ip(ip, iface, 0, &svnn->public_address);
|
|
if (!ok) {
|
|
talloc_free(svnn);
|
|
return -1;
|
|
}
|
|
|
|
ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
|
|
if (ret != 0) {
|
|
DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
|
|
"for single_ip[%s]\n",
|
|
svnn->ifaces[0],
|
|
ctdb_addr_to_str(&svnn->public_address)));
|
|
talloc_free(svnn);
|
|
return -1;
|
|
}
|
|
|
|
/* assume the single public ip interface is initially "good" */
|
|
cur = ctdb_find_iface(ctdb, iface);
|
|
if (cur == NULL) {
|
|
DEBUG(DEBUG_CRIT,("Can not find public interface %s used by --single-public-ip", iface));
|
|
return -1;
|
|
}
|
|
cur->link_up = true;
|
|
|
|
ret = ctdb_vnn_assign_iface(ctdb, svnn);
|
|
if (ret != 0) {
|
|
talloc_free(svnn);
|
|
return -1;
|
|
}
|
|
|
|
ctdb->single_ip_vnn = svnn;
|
|
return 0;
|
|
}
|
|
|
|
static void *add_ip_callback(void *parm, void *data)
|
|
{
|
|
struct public_ip_list *this_ip = parm;
|
|
struct public_ip_list *prev_ip = data;
|
|
|
|
if (prev_ip == NULL) {
|
|
return parm;
|
|
}
|
|
if (this_ip->pnn == -1) {
|
|
this_ip->pnn = prev_ip->pnn;
|
|
}
|
|
|
|
return parm;
|
|
}
|
|
|
|
static int getips_count_callback(void *param, void *data)
|
|
{
|
|
struct public_ip_list **ip_list = (struct public_ip_list **)param;
|
|
struct public_ip_list *new_ip = (struct public_ip_list *)data;
|
|
|
|
new_ip->next = *ip_list;
|
|
*ip_list = new_ip;
|
|
return 0;
|
|
}
|
|
|
|
static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
|
|
struct ctdb_public_ip_list *ips,
|
|
uint32_t pnn);
|
|
|
|
static int ctdb_reload_remote_public_ips(struct ctdb_context *ctdb,
|
|
struct ipalloc_state *ipalloc_state,
|
|
struct ctdb_node_map_old *nodemap)
|
|
{
|
|
int j;
|
|
int ret;
|
|
struct ctdb_public_ip_list_old *ip_list;
|
|
|
|
if (ipalloc_state->num != nodemap->num) {
|
|
DEBUG(DEBUG_ERR,
|
|
(__location__
|
|
" ipalloc_state->num (%d) != nodemap->num (%d) invalid param\n",
|
|
ipalloc_state->num, nodemap->num));
|
|
return -1;
|
|
}
|
|
|
|
for (j=0; j<nodemap->num; j++) {
|
|
if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
|
|
continue;
|
|
}
|
|
|
|
/* Retrieve the list of known public IPs from the node */
|
|
ret = ctdb_ctrl_get_public_ips_flags(ctdb,
|
|
TAKEOVER_TIMEOUT(),
|
|
j,
|
|
ipalloc_state->known_public_ips,
|
|
0,
|
|
&ip_list);
|
|
if (ret != 0) {
|
|
DEBUG(DEBUG_ERR,
|
|
("Failed to read known public IPs from node: %u\n",
|
|
j));
|
|
return -1;
|
|
}
|
|
ipalloc_state->known_public_ips[j].num = ip_list->num;
|
|
/* This could be copied and freed. However, ip_list
|
|
* is allocated off ipalloc_state->known_public_ips,
|
|
* so this is a safe hack. This will go away in a
|
|
* while anyway... */
|
|
ipalloc_state->known_public_ips[j].ip = &ip_list->ips[0];
|
|
|
|
if (ctdb->do_checkpublicip) {
|
|
verify_remote_ip_allocation(
|
|
ctdb,
|
|
&ipalloc_state->known_public_ips[j],
|
|
j);
|
|
}
|
|
|
|
/* Retrieve the list of available public IPs from the node */
|
|
ret = ctdb_ctrl_get_public_ips_flags(ctdb,
|
|
TAKEOVER_TIMEOUT(),
|
|
j,
|
|
ipalloc_state->available_public_ips,
|
|
CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE,
|
|
&ip_list);
|
|
if (ret != 0) {
|
|
DEBUG(DEBUG_ERR,
|
|
("Failed to read available public IPs from node: %u\n",
|
|
j));
|
|
return -1;
|
|
}
|
|
ipalloc_state->available_public_ips[j].num = ip_list->num;
|
|
/* This could be copied and freed. However, ip_list
|
|
* is allocated off ipalloc_state->available_public_ips,
|
|
* so this is a safe hack. This will go away in a
|
|
* while anyway... */
|
|
ipalloc_state->available_public_ips[j].ip = &ip_list->ips[0];
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static struct public_ip_list *
|
|
create_merged_ip_list(struct ctdb_context *ctdb, struct ipalloc_state *ipalloc_state)
|
|
{
|
|
int i, j;
|
|
struct public_ip_list *ip_list;
|
|
struct ctdb_public_ip_list *public_ips;
|
|
|
|
TALLOC_FREE(ctdb->ip_tree);
|
|
ctdb->ip_tree = trbt_create(ctdb, 0);
|
|
|
|
for (i=0; i < ctdb->num_nodes; i++) {
|
|
|
|
if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
|
|
continue;
|
|
}
|
|
|
|
/* there were no public ips for this node */
|
|
if (ipalloc_state->known_public_ips == NULL) {
|
|
continue;
|
|
}
|
|
|
|
public_ips = &ipalloc_state->known_public_ips[i];
|
|
|
|
for (j=0; j < public_ips->num; j++) {
|
|
struct public_ip_list *tmp_ip;
|
|
|
|
tmp_ip = talloc_zero(ctdb->ip_tree, struct public_ip_list);
|
|
CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
|
|
/* Do not use information about IP addresses hosted
|
|
* on other nodes, it may not be accurate */
|
|
if (public_ips->ip[j].pnn == ctdb->nodes[i]->pnn) {
|
|
tmp_ip->pnn = public_ips->ip[j].pnn;
|
|
} else {
|
|
tmp_ip->pnn = -1;
|
|
}
|
|
tmp_ip->addr = public_ips->ip[j].addr;
|
|
tmp_ip->next = NULL;
|
|
|
|
trbt_insertarray32_callback(ctdb->ip_tree,
|
|
IP_KEYLEN, ip_key(&public_ips->ip[j].addr),
|
|
add_ip_callback,
|
|
tmp_ip);
|
|
}
|
|
}
|
|
|
|
ip_list = NULL;
|
|
trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
|
|
|
|
return ip_list;
|
|
}
|
|
|
|
static bool all_nodes_are_disabled(struct ctdb_node_map_old *nodemap)
|
|
{
|
|
int i;
|
|
|
|
for (i=0;i<nodemap->num;i++) {
|
|
if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
|
|
/* Found one completely healthy node */
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
struct get_tunable_callback_data {
|
|
const char *tunable;
|
|
uint32_t *out;
|
|
bool fatal;
|
|
};
|
|
|
|
static void get_tunable_callback(struct ctdb_context *ctdb, uint32_t pnn,
|
|
int32_t res, TDB_DATA outdata,
|
|
void *callback)
|
|
{
|
|
struct get_tunable_callback_data *cd =
|
|
(struct get_tunable_callback_data *)callback;
|
|
int size;
|
|
|
|
if (res != 0) {
|
|
/* Already handled in fail callback */
|
|
return;
|
|
}
|
|
|
|
if (outdata.dsize != sizeof(uint32_t)) {
|
|
DEBUG(DEBUG_ERR,("Wrong size of returned data when reading \"%s\" tunable from node %d. Expected %d bytes but received %d bytes\n",
|
|
cd->tunable, pnn, (int)sizeof(uint32_t),
|
|
(int)outdata.dsize));
|
|
cd->fatal = true;
|
|
return;
|
|
}
|
|
|
|
size = talloc_array_length(cd->out);
|
|
if (pnn >= size) {
|
|
DEBUG(DEBUG_ERR,("Got %s reply from node %d but nodemap only has %d entries\n",
|
|
cd->tunable, pnn, size));
|
|
return;
|
|
}
|
|
|
|
|
|
cd->out[pnn] = *(uint32_t *)outdata.dptr;
|
|
}
|
|
|
|
static void get_tunable_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
|
|
int32_t res, TDB_DATA outdata,
|
|
void *callback)
|
|
{
|
|
struct get_tunable_callback_data *cd =
|
|
(struct get_tunable_callback_data *)callback;
|
|
|
|
switch (res) {
|
|
case -ETIME:
|
|
DEBUG(DEBUG_ERR,
|
|
("Timed out getting tunable \"%s\" from node %d\n",
|
|
cd->tunable, pnn));
|
|
cd->fatal = true;
|
|
break;
|
|
case -EINVAL:
|
|
case -1:
|
|
DEBUG(DEBUG_WARNING,
|
|
("Tunable \"%s\" not implemented on node %d\n",
|
|
cd->tunable, pnn));
|
|
break;
|
|
default:
|
|
DEBUG(DEBUG_ERR,
|
|
("Unexpected error getting tunable \"%s\" from node %d\n",
|
|
cd->tunable, pnn));
|
|
cd->fatal = true;
|
|
}
|
|
}
|
|
|
|
static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
|
|
TALLOC_CTX *tmp_ctx,
|
|
struct ctdb_node_map_old *nodemap,
|
|
const char *tunable,
|
|
uint32_t default_value)
|
|
{
|
|
TDB_DATA data;
|
|
struct ctdb_control_get_tunable *t;
|
|
uint32_t *nodes;
|
|
uint32_t *tvals;
|
|
struct get_tunable_callback_data callback_data;
|
|
int i;
|
|
|
|
tvals = talloc_array(tmp_ctx, uint32_t, nodemap->num);
|
|
CTDB_NO_MEMORY_NULL(ctdb, tvals);
|
|
for (i=0; i<nodemap->num; i++) {
|
|
tvals[i] = default_value;
|
|
}
|
|
|
|
callback_data.out = tvals;
|
|
callback_data.tunable = tunable;
|
|
callback_data.fatal = false;
|
|
|
|
data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(tunable) + 1;
|
|
data.dptr = talloc_size(tmp_ctx, data.dsize);
|
|
t = (struct ctdb_control_get_tunable *)data.dptr;
|
|
t->length = strlen(tunable)+1;
|
|
memcpy(t->name, tunable, t->length);
|
|
nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
|
|
if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_TUNABLE,
|
|
nodes, 0, TAKEOVER_TIMEOUT(),
|
|
false, data,
|
|
get_tunable_callback,
|
|
get_tunable_fail_callback,
|
|
&callback_data) != 0) {
|
|
if (callback_data.fatal) {
|
|
talloc_free(tvals);
|
|
tvals = NULL;
|
|
}
|
|
}
|
|
talloc_free(nodes);
|
|
talloc_free(data.dptr);
|
|
|
|
return tvals;
|
|
}
|
|
|
|
/* Set internal flags for IP allocation:
|
|
* Clear ip flags
|
|
* Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
|
|
* Set NOIPHOST ip flag for each INACTIVE node
|
|
* if all nodes are disabled:
|
|
* Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
|
|
* else
|
|
* Set NOIPHOST ip flags for disabled nodes
|
|
*/
|
|
static void set_ipflags_internal(struct ipalloc_state *ipalloc_state,
|
|
struct ctdb_node_map_old *nodemap,
|
|
uint32_t *tval_noiptakeover,
|
|
uint32_t *tval_noiphostonalldisabled)
|
|
{
|
|
int i;
|
|
|
|
for (i=0;i<nodemap->num;i++) {
|
|
/* Can not take IPs on node with NoIPTakeover set */
|
|
if (tval_noiptakeover[i] != 0) {
|
|
ipalloc_state->noiptakeover[i] = true;
|
|
}
|
|
|
|
/* Can not host IPs on INACTIVE node */
|
|
if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
|
|
ipalloc_state->noiphost[i] = true;
|
|
}
|
|
}
|
|
|
|
if (all_nodes_are_disabled(nodemap)) {
|
|
/* If all nodes are disabled, can not host IPs on node
|
|
* with NoIPHostOnAllDisabled set
|
|
*/
|
|
for (i=0;i<nodemap->num;i++) {
|
|
if (tval_noiphostonalldisabled[i] != 0) {
|
|
ipalloc_state->noiphost[i] = true;
|
|
}
|
|
}
|
|
} else {
|
|
/* If some nodes are not disabled, then can not host
|
|
* IPs on DISABLED node
|
|
*/
|
|
for (i=0;i<nodemap->num;i++) {
|
|
if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
|
|
ipalloc_state->noiphost[i] = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static bool set_ipflags(struct ctdb_context *ctdb,
|
|
struct ipalloc_state *ipalloc_state,
|
|
struct ctdb_node_map_old *nodemap)
|
|
{
|
|
uint32_t *tval_noiptakeover;
|
|
uint32_t *tval_noiphostonalldisabled;
|
|
|
|
tval_noiptakeover = get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
|
|
"NoIPTakeover", 0);
|
|
if (tval_noiptakeover == NULL) {
|
|
return false;
|
|
}
|
|
|
|
tval_noiphostonalldisabled =
|
|
get_tunable_from_nodes(ctdb, ipalloc_state, nodemap,
|
|
"NoIPHostOnAllDisabled", 0);
|
|
if (tval_noiphostonalldisabled == NULL) {
|
|
/* Caller frees tmp_ctx */
|
|
return false;
|
|
}
|
|
|
|
set_ipflags_internal(ipalloc_state, nodemap,
|
|
tval_noiptakeover,
|
|
tval_noiphostonalldisabled);
|
|
|
|
talloc_free(tval_noiptakeover);
|
|
talloc_free(tval_noiphostonalldisabled);
|
|
|
|
return true;
|
|
}
|
|
|
|
static struct ipalloc_state * ipalloc_state_init(struct ctdb_context *ctdb,
|
|
TALLOC_CTX *mem_ctx)
|
|
{
|
|
struct ipalloc_state *ipalloc_state =
|
|
talloc_zero(mem_ctx, struct ipalloc_state);
|
|
if (ipalloc_state == NULL) {
|
|
DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
|
|
return NULL;
|
|
}
|
|
|
|
ipalloc_state->num = ctdb->num_nodes;
|
|
|
|
ipalloc_state->known_public_ips =
|
|
talloc_zero_array(ipalloc_state,
|
|
struct ctdb_public_ip_list,
|
|
ipalloc_state->num);
|
|
if (ipalloc_state->known_public_ips == NULL) {
|
|
DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
|
|
goto fail;
|
|
}
|
|
|
|
ipalloc_state->available_public_ips =
|
|
talloc_zero_array(ipalloc_state,
|
|
struct ctdb_public_ip_list,
|
|
ipalloc_state->num);
|
|
if (ipalloc_state->available_public_ips == NULL) {
|
|
DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
|
|
goto fail;
|
|
}
|
|
ipalloc_state->noiptakeover =
|
|
talloc_zero_array(ipalloc_state,
|
|
bool,
|
|
ipalloc_state->num);
|
|
if (ipalloc_state->noiptakeover == NULL) {
|
|
DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
|
|
goto fail;
|
|
}
|
|
ipalloc_state->noiphost =
|
|
talloc_zero_array(ipalloc_state,
|
|
bool,
|
|
ipalloc_state->num);
|
|
if (ipalloc_state->noiphost == NULL) {
|
|
DEBUG(DEBUG_ERR, (__location__ " Out of memory\n"));
|
|
goto fail;
|
|
}
|
|
|
|
if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
|
|
ipalloc_state->algorithm = IPALLOC_LCP2;
|
|
} else if (1 == ctdb->tunable.deterministic_public_ips) {
|
|
ipalloc_state->algorithm = IPALLOC_DETERMINISTIC;
|
|
} else {
|
|
ipalloc_state->algorithm = IPALLOC_NONDETERMINISTIC;
|
|
}
|
|
|
|
ipalloc_state->no_ip_failback = ctdb->tunable.no_ip_failback;
|
|
|
|
return ipalloc_state;
|
|
fail:
|
|
talloc_free(ipalloc_state);
|
|
return NULL;
|
|
}
|
|
|
|
struct iprealloc_callback_data {
|
|
bool *retry_nodes;
|
|
int retry_count;
|
|
client_async_callback fail_callback;
|
|
void *fail_callback_data;
|
|
struct ctdb_node_map_old *nodemap;
|
|
};
|
|
|
|
static void iprealloc_fail_callback(struct ctdb_context *ctdb, uint32_t pnn,
|
|
int32_t res, TDB_DATA outdata,
|
|
void *callback)
|
|
{
|
|
int numnodes;
|
|
struct iprealloc_callback_data *cd =
|
|
(struct iprealloc_callback_data *)callback;
|
|
|
|
numnodes = talloc_array_length(cd->retry_nodes);
|
|
if (pnn > numnodes) {
|
|
DEBUG(DEBUG_ERR,
|
|
("ipreallocated failure from node %d, "
|
|
"but only %d nodes in nodemap\n",
|
|
pnn, numnodes));
|
|
return;
|
|
}
|
|
|
|
/* Can't run the "ipreallocated" event on a INACTIVE node */
|
|
if (cd->nodemap->nodes[pnn].flags & NODE_FLAGS_INACTIVE) {
|
|
DEBUG(DEBUG_WARNING,
|
|
("ipreallocated failed on inactive node %d, ignoring\n",
|
|
pnn));
|
|
return;
|
|
}
|
|
|
|
switch (res) {
|
|
case -ETIME:
|
|
/* If the control timed out then that's a real error,
|
|
* so call the real fail callback
|
|
*/
|
|
if (cd->fail_callback) {
|
|
cd->fail_callback(ctdb, pnn, res, outdata,
|
|
cd->fail_callback_data);
|
|
} else {
|
|
DEBUG(DEBUG_WARNING,
|
|
("iprealloc timed out but no callback registered\n"));
|
|
}
|
|
break;
|
|
default:
|
|
/* If not a timeout then either the ipreallocated
|
|
* eventscript (or some setup) failed. This might
|
|
* have failed because the IPREALLOCATED control isn't
|
|
* implemented - right now there is no way of knowing
|
|
* because the error codes are all folded down to -1.
|
|
* Consider retrying using EVENTSCRIPT control...
|
|
*/
|
|
DEBUG(DEBUG_WARNING,
|
|
("ipreallocated failure from node %d, flagging retry\n",
|
|
pnn));
|
|
cd->retry_nodes[pnn] = true;
|
|
cd->retry_count++;
|
|
}
|
|
}
|
|
|
|
struct takeover_callback_data {
|
|
bool *node_failed;
|
|
client_async_callback fail_callback;
|
|
void *fail_callback_data;
|
|
struct ctdb_node_map_old *nodemap;
|
|
};
|
|
|
|
static void takeover_run_fail_callback(struct ctdb_context *ctdb,
|
|
uint32_t node_pnn, int32_t res,
|
|
TDB_DATA outdata, void *callback_data)
|
|
{
|
|
struct takeover_callback_data *cd =
|
|
talloc_get_type_abort(callback_data,
|
|
struct takeover_callback_data);
|
|
int i;
|
|
|
|
for (i = 0; i < cd->nodemap->num; i++) {
|
|
if (node_pnn == cd->nodemap->nodes[i].pnn) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (i == cd->nodemap->num) {
|
|
DEBUG(DEBUG_ERR, (__location__ " invalid PNN %u\n", node_pnn));
|
|
return;
|
|
}
|
|
|
|
if (!cd->node_failed[i]) {
|
|
cd->node_failed[i] = true;
|
|
cd->fail_callback(ctdb, node_pnn, res, outdata,
|
|
cd->fail_callback_data);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Recalculate the allocation of public IPs to nodes and have the
|
|
* nodes host their allocated addresses.
|
|
*
|
|
* - Allocate memory for IP allocation state, including per node
|
|
* arrays
|
|
* - Populate IP allocation algorithm in IP allocation state
|
|
* - Populate local value of tunable NoIPFailback in IP allocation
|
|
state - this is really a cluster-wide configuration variable and
|
|
only the value form the master node is used
|
|
* - Retrieve tunables NoIPTakeover and NoIPHostOnAllDisabled from all
|
|
* connected nodes - this is done separately so tunable values can
|
|
* be faked in unit testing
|
|
* - Populate NoIPTakover tunable in IP allocation state
|
|
* - Populate NoIPHost in IP allocation state, derived from node flags
|
|
* and NoIPHostOnAllDisabled tunable
|
|
* - Retrieve and populate known and available IP lists in IP
|
|
* allocation state
|
|
* - If no available IP addresses then early exit
|
|
* - Build list of (known IPs, currently assigned node)
|
|
* - Populate list of nodes to force rebalance - internal structure,
|
|
* currently no way to fetch, only used by LCP2 for nodes that have
|
|
* had new IP addresses added
|
|
* - Run IP allocation algorithm
|
|
* - Send RELEASE_IP to all nodes for IPs they should not host
|
|
* - Send TAKE_IP to all nodes for IPs they should host
|
|
* - Send IPREALLOCATED to all nodes (with backward compatibility hack)
|
|
*/
|
|
int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map_old *nodemap,
|
|
uint32_t *force_rebalance_nodes,
|
|
client_async_callback fail_callback, void *callback_data)
|
|
{
|
|
int i, j, ret;
|
|
struct ctdb_public_ip ip;
|
|
uint32_t *nodes;
|
|
struct public_ip_list *all_ips, *tmp_ip;
|
|
TDB_DATA data;
|
|
struct timeval timeout;
|
|
struct client_async_data *async_data;
|
|
struct ctdb_client_control_state *state;
|
|
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
|
|
struct ipalloc_state *ipalloc_state;
|
|
struct takeover_callback_data *takeover_data;
|
|
struct iprealloc_callback_data iprealloc_data;
|
|
bool *retry_data;
|
|
bool can_host_ips;
|
|
|
|
/*
|
|
* ip failover is completely disabled, just send out the
|
|
* ipreallocated event.
|
|
*/
|
|
if (ctdb->tunable.disable_ip_failover != 0) {
|
|
goto ipreallocated;
|
|
}
|
|
|
|
ipalloc_state = ipalloc_state_init(ctdb, tmp_ctx);
|
|
if (ipalloc_state == NULL) {
|
|
talloc_free(tmp_ctx);
|
|
return -1;
|
|
}
|
|
|
|
if (!set_ipflags(ctdb, ipalloc_state, nodemap)) {
|
|
DEBUG(DEBUG_ERR,("Failed to set IP flags - aborting takeover run\n"));
|
|
talloc_free(tmp_ctx);
|
|
return -1;
|
|
}
|
|
|
|
/* Fetch known/available public IPs from each active node */
|
|
ret = ctdb_reload_remote_public_ips(ctdb, ipalloc_state, nodemap);
|
|
if (ret != 0) {
|
|
talloc_free(tmp_ctx);
|
|
return -1;
|
|
}
|
|
|
|
/* Short-circuit IP allocation if no node has available IPs */
|
|
can_host_ips = false;
|
|
for (i=0; i < ipalloc_state->num; i++) {
|
|
if (ipalloc_state->available_public_ips[i].num != 0) {
|
|
can_host_ips = true;
|
|
}
|
|
}
|
|
if (!can_host_ips) {
|
|
DEBUG(DEBUG_WARNING,("No nodes available to host public IPs yet\n"));
|
|
return 0;
|
|
}
|
|
|
|
/* since nodes only know about those public addresses that
|
|
can be served by that particular node, no single node has
|
|
a full list of all public addresses that exist in the cluster.
|
|
Walk over all node structures and create a merged list of
|
|
all public addresses that exist in the cluster.
|
|
|
|
keep the tree of ips around as ctdb->ip_tree
|
|
*/
|
|
all_ips = create_merged_ip_list(ctdb, ipalloc_state);
|
|
ipalloc_state->all_ips = all_ips;
|
|
|
|
ipalloc_state->force_rebalance_nodes = force_rebalance_nodes;
|
|
|
|
/* Do the IP reassignment calculations */
|
|
ipalloc(ipalloc_state);
|
|
|
|
/* Now tell all nodes to release any public IPs should not
|
|
* host. This will be a NOOP on nodes that don't currently
|
|
* hold the given IP.
|
|
*/
|
|
takeover_data = talloc_zero(tmp_ctx, struct takeover_callback_data);
|
|
CTDB_NO_MEMORY_FATAL(ctdb, takeover_data);
|
|
|
|
takeover_data->node_failed = talloc_zero_array(tmp_ctx,
|
|
bool, nodemap->num);
|
|
CTDB_NO_MEMORY_FATAL(ctdb, takeover_data->node_failed);
|
|
takeover_data->fail_callback = fail_callback;
|
|
takeover_data->fail_callback_data = callback_data;
|
|
takeover_data->nodemap = nodemap;
|
|
|
|
async_data = talloc_zero(tmp_ctx, struct client_async_data);
|
|
CTDB_NO_MEMORY_FATAL(ctdb, async_data);
|
|
|
|
async_data->fail_callback = takeover_run_fail_callback;
|
|
async_data->callback_data = takeover_data;
|
|
|
|
ZERO_STRUCT(ip); /* Avoid valgrind warnings for union */
|
|
|
|
/* Send a RELEASE_IP to all nodes that should not be hosting
|
|
* each IP. For each IP, all but one of these will be
|
|
* redundant. However, the redundant ones are used to tell
|
|
* nodes which node should be hosting the IP so that commands
|
|
* like "ctdb ip" can display a particular nodes idea of who
|
|
* is hosting what. */
|
|
for (i=0;i<nodemap->num;i++) {
|
|
/* don't talk to unconnected nodes, but do talk to banned nodes */
|
|
if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
|
|
continue;
|
|
}
|
|
|
|
for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
|
|
if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
|
|
/* This node should be serving this
|
|
vnn so don't tell it to release the ip
|
|
*/
|
|
continue;
|
|
}
|
|
ip.pnn = tmp_ip->pnn;
|
|
ip.addr = tmp_ip->addr;
|
|
|
|
timeout = TAKEOVER_TIMEOUT();
|
|
data.dsize = sizeof(ip);
|
|
data.dptr = (uint8_t *)&ip;
|
|
state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
|
|
0, CTDB_CONTROL_RELEASE_IP, 0,
|
|
data, async_data,
|
|
&timeout, NULL);
|
|
if (state == NULL) {
|
|
DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
|
|
talloc_free(tmp_ctx);
|
|
return -1;
|
|
}
|
|
|
|
ctdb_client_async_add(async_data, state);
|
|
}
|
|
}
|
|
if (ctdb_client_async_wait(ctdb, async_data) != 0) {
|
|
DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
|
|
talloc_free(tmp_ctx);
|
|
return -1;
|
|
}
|
|
talloc_free(async_data);
|
|
|
|
|
|
/* For each IP, send a TAKOVER_IP to the node that should be
|
|
* hosting it. Many of these will often be redundant (since
|
|
* the allocation won't have changed) but they can be useful
|
|
* to recover from inconsistencies. */
|
|
async_data = talloc_zero(tmp_ctx, struct client_async_data);
|
|
CTDB_NO_MEMORY_FATAL(ctdb, async_data);
|
|
|
|
async_data->fail_callback = fail_callback;
|
|
async_data->callback_data = callback_data;
|
|
|
|
for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
|
|
if (tmp_ip->pnn == -1) {
|
|
/* this IP won't be taken over */
|
|
continue;
|
|
}
|
|
|
|
ip.pnn = tmp_ip->pnn;
|
|
ip.addr = tmp_ip->addr;
|
|
|
|
timeout = TAKEOVER_TIMEOUT();
|
|
data.dsize = sizeof(ip);
|
|
data.dptr = (uint8_t *)&ip;
|
|
state = ctdb_control_send(ctdb, tmp_ip->pnn,
|
|
0, CTDB_CONTROL_TAKEOVER_IP, 0,
|
|
data, async_data, &timeout, NULL);
|
|
if (state == NULL) {
|
|
DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
|
|
talloc_free(tmp_ctx);
|
|
return -1;
|
|
}
|
|
|
|
ctdb_client_async_add(async_data, state);
|
|
}
|
|
if (ctdb_client_async_wait(ctdb, async_data) != 0) {
|
|
DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
|
|
talloc_free(tmp_ctx);
|
|
return -1;
|
|
}
|
|
|
|
ipreallocated:
|
|
/*
|
|
* Tell all nodes to run eventscripts to process the
|
|
* "ipreallocated" event. This can do a lot of things,
|
|
* including restarting services to reconfigure them if public
|
|
* IPs have moved. Once upon a time this event only used to
|
|
* update natgw.
|
|
*/
|
|
retry_data = talloc_zero_array(tmp_ctx, bool, nodemap->num);
|
|
CTDB_NO_MEMORY_FATAL(ctdb, retry_data);
|
|
iprealloc_data.retry_nodes = retry_data;
|
|
iprealloc_data.retry_count = 0;
|
|
iprealloc_data.fail_callback = fail_callback;
|
|
iprealloc_data.fail_callback_data = callback_data;
|
|
iprealloc_data.nodemap = nodemap;
|
|
|
|
nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
|
|
ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_IPREALLOCATED,
|
|
nodes, 0, TAKEOVER_TIMEOUT(),
|
|
false, tdb_null,
|
|
NULL, iprealloc_fail_callback,
|
|
&iprealloc_data);
|
|
if (ret != 0) {
|
|
/* If the control failed then we should retry to any
|
|
* nodes flagged by iprealloc_fail_callback using the
|
|
* EVENTSCRIPT control. This is a best-effort at
|
|
* backward compatiblity when running a mixed cluster
|
|
* where some nodes have not yet been upgraded to
|
|
* support the IPREALLOCATED control.
|
|
*/
|
|
DEBUG(DEBUG_WARNING,
|
|
("Retry ipreallocated to some nodes using eventscript control\n"));
|
|
|
|
nodes = talloc_array(tmp_ctx, uint32_t,
|
|
iprealloc_data.retry_count);
|
|
CTDB_NO_MEMORY_FATAL(ctdb, nodes);
|
|
|
|
j = 0;
|
|
for (i=0; i<nodemap->num; i++) {
|
|
if (iprealloc_data.retry_nodes[i]) {
|
|
nodes[j] = i;
|
|
j++;
|
|
}
|
|
}
|
|
|
|
data.dptr = discard_const("ipreallocated");
|
|
data.dsize = strlen((char *)data.dptr) + 1;
|
|
ret = ctdb_client_async_control(ctdb,
|
|
CTDB_CONTROL_RUN_EVENTSCRIPTS,
|
|
nodes, 0, TAKEOVER_TIMEOUT(),
|
|
false, data,
|
|
NULL, fail_callback,
|
|
callback_data);
|
|
if (ret != 0) {
|
|
DEBUG(DEBUG_ERR, (__location__ " failed to send control to run eventscripts with \"ipreallocated\"\n"));
|
|
}
|
|
}
|
|
|
|
talloc_free(tmp_ctx);
|
|
return ret;
|
|
}
|
|
|
|
|
|
/*
|
|
destroy a ctdb_client_ip structure
|
|
*/
|
|
static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
|
|
{
|
|
DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
|
|
ctdb_addr_to_str(&ip->addr),
|
|
ntohs(ip->addr.ip.sin_port),
|
|
ip->client_id));
|
|
|
|
DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
called by a client to inform us of a TCP connection that it is managing
|
|
that should tickled with an ACK when IP takeover is done
|
|
*/
|
|
int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
|
|
TDB_DATA indata)
|
|
{
|
|
struct ctdb_client *client = reqid_find(ctdb->idr, client_id, struct ctdb_client);
|
|
struct ctdb_connection *tcp_sock = NULL;
|
|
struct ctdb_tcp_list *tcp;
|
|
struct ctdb_connection t;
|
|
int ret;
|
|
TDB_DATA data;
|
|
struct ctdb_client_ip *ip;
|
|
struct ctdb_vnn *vnn;
|
|
ctdb_sock_addr addr;
|
|
|
|
/* If we don't have public IPs, tickles are useless */
|
|
if (ctdb->vnn == NULL) {
|
|
return 0;
|
|
}
|
|
|
|
tcp_sock = (struct ctdb_connection *)indata.dptr;
|
|
|
|
addr = tcp_sock->src;
|
|
ctdb_canonicalize_ip(&addr, &tcp_sock->src);
|
|
addr = tcp_sock->dst;
|
|
ctdb_canonicalize_ip(&addr, &tcp_sock->dst);
|
|
|
|
ZERO_STRUCT(addr);
|
|
memcpy(&addr, &tcp_sock->dst, sizeof(addr));
|
|
vnn = find_public_ip_vnn(ctdb, &addr);
|
|
if (vnn == NULL) {
|
|
switch (addr.sa.sa_family) {
|
|
case AF_INET:
|
|
if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
|
|
DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n",
|
|
ctdb_addr_to_str(&addr)));
|
|
}
|
|
break;
|
|
case AF_INET6:
|
|
DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n",
|
|
ctdb_addr_to_str(&addr)));
|
|
break;
|
|
default:
|
|
DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
if (vnn->pnn != ctdb->pnn) {
|
|
DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
|
|
ctdb_addr_to_str(&addr),
|
|
client_id, client->pid));
|
|
/* failing this call will tell smbd to die */
|
|
return -1;
|
|
}
|
|
|
|
ip = talloc(client, struct ctdb_client_ip);
|
|
CTDB_NO_MEMORY(ctdb, ip);
|
|
|
|
ip->ctdb = ctdb;
|
|
ip->addr = addr;
|
|
ip->client_id = client_id;
|
|
talloc_set_destructor(ip, ctdb_client_ip_destructor);
|
|
DLIST_ADD(ctdb->client_ip_list, ip);
|
|
|
|
tcp = talloc(client, struct ctdb_tcp_list);
|
|
CTDB_NO_MEMORY(ctdb, tcp);
|
|
|
|
tcp->connection.src = tcp_sock->src;
|
|
tcp->connection.dst = tcp_sock->dst;
|
|
|
|
DLIST_ADD(client->tcp_list, tcp);
|
|
|
|
t.src = tcp_sock->src;
|
|
t.dst = tcp_sock->dst;
|
|
|
|
data.dptr = (uint8_t *)&t;
|
|
data.dsize = sizeof(t);
|
|
|
|
switch (addr.sa.sa_family) {
|
|
case AF_INET:
|
|
DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
|
|
(unsigned)ntohs(tcp_sock->dst.ip.sin_port),
|
|
ctdb_addr_to_str(&tcp_sock->src),
|
|
(unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
|
|
break;
|
|
case AF_INET6:
|
|
DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
|
|
(unsigned)ntohs(tcp_sock->dst.ip6.sin6_port),
|
|
ctdb_addr_to_str(&tcp_sock->src),
|
|
(unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
|
|
break;
|
|
default:
|
|
DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
|
|
}
|
|
|
|
|
|
/* tell all nodes about this tcp connection */
|
|
ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
|
|
CTDB_CONTROL_TCP_ADD,
|
|
0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
|
|
if (ret != 0) {
|
|
DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
find a tcp address on a list
|
|
*/
|
|
static struct ctdb_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
|
|
struct ctdb_connection *tcp)
|
|
{
|
|
int i;
|
|
|
|
if (array == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
for (i=0;i<array->num;i++) {
|
|
if (ctdb_same_sockaddr(&array->connections[i].src, &tcp->src) &&
|
|
ctdb_same_sockaddr(&array->connections[i].dst, &tcp->dst)) {
|
|
return &array->connections[i];
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
called by a daemon to inform us of a TCP connection that one of its
|
|
clients managing that should tickled with an ACK when IP takeover is
|
|
done
|
|
*/
|
|
int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
|
|
{
|
|
struct ctdb_connection *p = (struct ctdb_connection *)indata.dptr;
|
|
struct ctdb_tcp_array *tcparray;
|
|
struct ctdb_connection tcp;
|
|
struct ctdb_vnn *vnn;
|
|
|
|
/* If we don't have public IPs, tickles are useless */
|
|
if (ctdb->vnn == NULL) {
|
|
return 0;
|
|
}
|
|
|
|
vnn = find_public_ip_vnn(ctdb, &p->dst);
|
|
if (vnn == NULL) {
|
|
DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
|
|
ctdb_addr_to_str(&p->dst)));
|
|
|
|
return -1;
|
|
}
|
|
|
|
|
|
tcparray = vnn->tcp_array;
|
|
|
|
/* If this is the first tickle */
|
|
if (tcparray == NULL) {
|
|
tcparray = talloc(vnn, struct ctdb_tcp_array);
|
|
CTDB_NO_MEMORY(ctdb, tcparray);
|
|
vnn->tcp_array = tcparray;
|
|
|
|
tcparray->num = 0;
|
|
tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_connection));
|
|
CTDB_NO_MEMORY(ctdb, tcparray->connections);
|
|
|
|
tcparray->connections[tcparray->num].src = p->src;
|
|
tcparray->connections[tcparray->num].dst = p->dst;
|
|
tcparray->num++;
|
|
|
|
if (tcp_update_needed) {
|
|
vnn->tcp_update_needed = true;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/* Do we already have this tickle ?*/
|
|
tcp.src = p->src;
|
|
tcp.dst = p->dst;
|
|
if (ctdb_tcp_find(tcparray, &tcp) != NULL) {
|
|
DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
|
|
ctdb_addr_to_str(&tcp.dst),
|
|
ntohs(tcp.dst.ip.sin_port),
|
|
vnn->pnn));
|
|
return 0;
|
|
}
|
|
|
|
/* A new tickle, we must add it to the array */
|
|
tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
|
|
struct ctdb_connection,
|
|
tcparray->num+1);
|
|
CTDB_NO_MEMORY(ctdb, tcparray->connections);
|
|
|
|
tcparray->connections[tcparray->num].src = p->src;
|
|
tcparray->connections[tcparray->num].dst = p->dst;
|
|
tcparray->num++;
|
|
|
|
DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
|
|
ctdb_addr_to_str(&tcp.dst),
|
|
ntohs(tcp.dst.ip.sin_port),
|
|
vnn->pnn));
|
|
|
|
if (tcp_update_needed) {
|
|
vnn->tcp_update_needed = true;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
static void ctdb_remove_connection(struct ctdb_vnn *vnn, struct ctdb_connection *conn)
|
|
{
|
|
struct ctdb_connection *tcpp;
|
|
|
|
if (vnn == NULL) {
|
|
return;
|
|
}
|
|
|
|
/* if the array is empty we cant remove it
|
|
and we don't need to do anything
|
|
*/
|
|
if (vnn->tcp_array == NULL) {
|
|
DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
|
|
ctdb_addr_to_str(&conn->dst),
|
|
ntohs(conn->dst.ip.sin_port)));
|
|
return;
|
|
}
|
|
|
|
|
|
/* See if we know this connection
|
|
if we don't know this connection then we dont need to do anything
|
|
*/
|
|
tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
|
|
if (tcpp == NULL) {
|
|
DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
|
|
ctdb_addr_to_str(&conn->dst),
|
|
ntohs(conn->dst.ip.sin_port)));
|
|
return;
|
|
}
|
|
|
|
|
|
/* We need to remove this entry from the array.
|
|
Instead of allocating a new array and copying data to it
|
|
we cheat and just copy the last entry in the existing array
|
|
to the entry that is to be removed and just shring the
|
|
->num field
|
|
*/
|
|
*tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
|
|
vnn->tcp_array->num--;
|
|
|
|
/* If we deleted the last entry we also need to remove the entire array
|
|
*/
|
|
if (vnn->tcp_array->num == 0) {
|
|
talloc_free(vnn->tcp_array);
|
|
vnn->tcp_array = NULL;
|
|
}
|
|
|
|
vnn->tcp_update_needed = true;
|
|
|
|
DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
|
|
ctdb_addr_to_str(&conn->src),
|
|
ntohs(conn->src.ip.sin_port)));
|
|
}
|
|
|
|
|
|
/*
|
|
called by a daemon to inform us of a TCP connection that one of its
|
|
clients used are no longer needed in the tickle database
|
|
*/
|
|
int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
|
|
{
|
|
struct ctdb_vnn *vnn;
|
|
struct ctdb_connection *conn = (struct ctdb_connection *)indata.dptr;
|
|
|
|
/* If we don't have public IPs, tickles are useless */
|
|
if (ctdb->vnn == NULL) {
|
|
return 0;
|
|
}
|
|
|
|
vnn = find_public_ip_vnn(ctdb, &conn->dst);
|
|
if (vnn == NULL) {
|
|
DEBUG(DEBUG_ERR,
|
|
(__location__ " unable to find public address %s\n",
|
|
ctdb_addr_to_str(&conn->dst)));
|
|
return 0;
|
|
}
|
|
|
|
ctdb_remove_connection(vnn, conn);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
Called when another daemon starts - causes all tickles for all
|
|
public addresses we are serving to be sent to the new node on the
|
|
next check. This actually causes the next scheduled call to
|
|
tdb_update_tcp_tickles() to update all nodes. This is simple and
|
|
doesn't require careful error handling.
|
|
*/
|
|
int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t pnn)
|
|
{
|
|
struct ctdb_vnn *vnn;
|
|
|
|
DEBUG(DEBUG_INFO, ("Received startup control from node %lu\n",
|
|
(unsigned long) pnn));
|
|
|
|
for (vnn = ctdb->vnn; vnn != NULL; vnn = vnn->next) {
|
|
vnn->tcp_update_needed = true;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
called when a client structure goes away - hook to remove
|
|
elements from the tcp_list in all daemons
|
|
*/
|
|
void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
|
|
{
|
|
while (client->tcp_list) {
|
|
struct ctdb_vnn *vnn;
|
|
struct ctdb_tcp_list *tcp = client->tcp_list;
|
|
struct ctdb_connection *conn = &tcp->connection;
|
|
|
|
DLIST_REMOVE(client->tcp_list, tcp);
|
|
|
|
vnn = find_public_ip_vnn(client->ctdb,
|
|
&conn->dst);
|
|
if (vnn == NULL) {
|
|
DEBUG(DEBUG_ERR,
|
|
(__location__ " unable to find public address %s\n",
|
|
ctdb_addr_to_str(&conn->dst)));
|
|
continue;
|
|
}
|
|
|
|
/* If the IP address is hosted on this node then
|
|
* remove the connection. */
|
|
if (vnn->pnn == client->ctdb->pnn) {
|
|
ctdb_remove_connection(vnn, conn);
|
|
}
|
|
|
|
/* Otherwise this function has been called because the
|
|
* server IP address has been released to another node
|
|
* and the client has exited. This means that we
|
|
* should not delete the connection information. The
|
|
* takeover node processes connections too. */
|
|
}
|
|
}
|
|
|
|
|
|
void ctdb_release_all_ips(struct ctdb_context *ctdb)
|
|
{
|
|
struct ctdb_vnn *vnn;
|
|
int count = 0;
|
|
TDB_DATA data;
|
|
|
|
if (ctdb->tunable.disable_ip_failover == 1) {
|
|
return;
|
|
}
|
|
|
|
for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
|
|
if (!ctdb_sys_have_ip(&vnn->public_address)) {
|
|
ctdb_vnn_unassign_iface(ctdb, vnn);
|
|
continue;
|
|
}
|
|
if (!vnn->iface) {
|
|
continue;
|
|
}
|
|
|
|
/* Don't allow multiple releases at once. Some code,
|
|
* particularly ctdb_tickle_sentenced_connections() is
|
|
* not re-entrant */
|
|
if (vnn->update_in_flight) {
|
|
DEBUG(DEBUG_WARNING,
|
|
(__location__
|
|
" Not releasing IP %s/%u on interface %s, an update is already in progess\n",
|
|
ctdb_addr_to_str(&vnn->public_address),
|
|
vnn->public_netmask_bits,
|
|
ctdb_vnn_iface_string(vnn)));
|
|
continue;
|
|
}
|
|
vnn->update_in_flight = true;
|
|
|
|
DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n",
|
|
ctdb_addr_to_str(&vnn->public_address),
|
|
vnn->public_netmask_bits,
|
|
ctdb_vnn_iface_string(vnn)));
|
|
|
|
ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
|
|
ctdb_vnn_iface_string(vnn),
|
|
ctdb_addr_to_str(&vnn->public_address),
|
|
vnn->public_netmask_bits);
|
|
|
|
data.dptr = (uint8_t *)talloc_strdup(
|
|
vnn, ctdb_addr_to_str(&vnn->public_address));
|
|
if (data.dptr != NULL) {
|
|
data.dsize = strlen((char *)data.dptr) + 1;
|
|
ctdb_daemon_send_message(ctdb, ctdb->pnn,
|
|
CTDB_SRVID_RELEASE_IP, data);
|
|
talloc_free(data.dptr);
|
|
}
|
|
|
|
ctdb_vnn_unassign_iface(ctdb, vnn);
|
|
vnn->update_in_flight = false;
|
|
count++;
|
|
}
|
|
|
|
DEBUG(DEBUG_NOTICE,(__location__ " Released %d public IPs\n", count));
|
|
}
|
|
|
|
|
|
/*
|
|
get list of public IPs
|
|
*/
|
|
int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
|
|
struct ctdb_req_control_old *c, TDB_DATA *outdata)
|
|
{
|
|
int i, num, len;
|
|
struct ctdb_public_ip_list_old *ips;
|
|
struct ctdb_vnn *vnn;
|
|
bool only_available = false;
|
|
|
|
if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
|
|
only_available = true;
|
|
}
|
|
|
|
/* count how many public ip structures we have */
|
|
num = 0;
|
|
for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
|
|
num++;
|
|
}
|
|
|
|
len = offsetof(struct ctdb_public_ip_list_old, ips) +
|
|
num*sizeof(struct ctdb_public_ip);
|
|
ips = talloc_zero_size(outdata, len);
|
|
CTDB_NO_MEMORY(ctdb, ips);
|
|
|
|
i = 0;
|
|
for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
|
|
if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
|
|
continue;
|
|
}
|
|
ips->ips[i].pnn = vnn->pnn;
|
|
ips->ips[i].addr = vnn->public_address;
|
|
i++;
|
|
}
|
|
ips->num = i;
|
|
len = offsetof(struct ctdb_public_ip_list_old, ips) +
|
|
i*sizeof(struct ctdb_public_ip);
|
|
|
|
outdata->dsize = len;
|
|
outdata->dptr = (uint8_t *)ips;
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
|
|
struct ctdb_req_control_old *c,
|
|
TDB_DATA indata,
|
|
TDB_DATA *outdata)
|
|
{
|
|
int i, num, len;
|
|
ctdb_sock_addr *addr;
|
|
struct ctdb_public_ip_info_old *info;
|
|
struct ctdb_vnn *vnn;
|
|
|
|
addr = (ctdb_sock_addr *)indata.dptr;
|
|
|
|
vnn = find_public_ip_vnn(ctdb, addr);
|
|
if (vnn == NULL) {
|
|
/* if it is not a public ip it could be our 'single ip' */
|
|
if (ctdb->single_ip_vnn) {
|
|
if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
|
|
vnn = ctdb->single_ip_vnn;
|
|
}
|
|
}
|
|
}
|
|
if (vnn == NULL) {
|
|
DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
|
|
"'%s'not a public address\n",
|
|
ctdb_addr_to_str(addr)));
|
|
return -1;
|
|
}
|
|
|
|
/* count how many public ip structures we have */
|
|
num = 0;
|
|
for (;vnn->ifaces[num];) {
|
|
num++;
|
|
}
|
|
|
|
len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
|
|
num*sizeof(struct ctdb_iface);
|
|
info = talloc_zero_size(outdata, len);
|
|
CTDB_NO_MEMORY(ctdb, info);
|
|
|
|
info->ip.addr = vnn->public_address;
|
|
info->ip.pnn = vnn->pnn;
|
|
info->active_idx = 0xFFFFFFFF;
|
|
|
|
for (i=0; vnn->ifaces[i]; i++) {
|
|
struct ctdb_interface *cur;
|
|
|
|
cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
|
|
if (cur == NULL) {
|
|
DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
|
|
vnn->ifaces[i]));
|
|
return -1;
|
|
}
|
|
if (vnn->iface == cur) {
|
|
info->active_idx = i;
|
|
}
|
|
strncpy(info->ifaces[i].name, cur->name,
|
|
sizeof(info->ifaces[i].name));
|
|
info->ifaces[i].name[sizeof(info->ifaces[i].name)-1] = '\0';
|
|
info->ifaces[i].link_state = cur->link_up;
|
|
info->ifaces[i].references = cur->references;
|
|
}
|
|
info->num = i;
|
|
len = offsetof(struct ctdb_public_ip_info_old, ifaces) +
|
|
i*sizeof(struct ctdb_iface);
|
|
|
|
outdata->dsize = len;
|
|
outdata->dptr = (uint8_t *)info;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
|
|
struct ctdb_req_control_old *c,
|
|
TDB_DATA *outdata)
|
|
{
|
|
int i, num, len;
|
|
struct ctdb_iface_list_old *ifaces;
|
|
struct ctdb_interface *cur;
|
|
|
|
/* count how many public ip structures we have */
|
|
num = 0;
|
|
for (cur=ctdb->ifaces;cur;cur=cur->next) {
|
|
num++;
|
|
}
|
|
|
|
len = offsetof(struct ctdb_iface_list_old, ifaces) +
|
|
num*sizeof(struct ctdb_iface);
|
|
ifaces = talloc_zero_size(outdata, len);
|
|
CTDB_NO_MEMORY(ctdb, ifaces);
|
|
|
|
i = 0;
|
|
for (cur=ctdb->ifaces;cur;cur=cur->next) {
|
|
strncpy(ifaces->ifaces[i].name, cur->name,
|
|
sizeof(ifaces->ifaces[i].name));
|
|
ifaces->ifaces[i].name[sizeof(ifaces->ifaces[i].name)-1] = '\0';
|
|
ifaces->ifaces[i].link_state = cur->link_up;
|
|
ifaces->ifaces[i].references = cur->references;
|
|
i++;
|
|
}
|
|
ifaces->num = i;
|
|
len = offsetof(struct ctdb_iface_list_old, ifaces) +
|
|
i*sizeof(struct ctdb_iface);
|
|
|
|
outdata->dsize = len;
|
|
outdata->dptr = (uint8_t *)ifaces;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
|
|
struct ctdb_req_control_old *c,
|
|
TDB_DATA indata)
|
|
{
|
|
struct ctdb_iface *info;
|
|
struct ctdb_interface *iface;
|
|
bool link_up = false;
|
|
|
|
info = (struct ctdb_iface *)indata.dptr;
|
|
|
|
if (info->name[CTDB_IFACE_SIZE] != '\0') {
|
|
int len = strnlen(info->name, CTDB_IFACE_SIZE);
|
|
DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
|
|
len, len, info->name));
|
|
return -1;
|
|
}
|
|
|
|
switch (info->link_state) {
|
|
case 0:
|
|
link_up = false;
|
|
break;
|
|
case 1:
|
|
link_up = true;
|
|
break;
|
|
default:
|
|
DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
|
|
(unsigned int)info->link_state));
|
|
return -1;
|
|
}
|
|
|
|
if (info->references != 0) {
|
|
DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
|
|
(unsigned int)info->references));
|
|
return -1;
|
|
}
|
|
|
|
iface = ctdb_find_iface(ctdb, info->name);
|
|
if (iface == NULL) {
|
|
return -1;
|
|
}
|
|
|
|
if (link_up == iface->link_up) {
|
|
return 0;
|
|
}
|
|
|
|
DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
|
|
("iface[%s] has changed it's link status %s => %s\n",
|
|
iface->name,
|
|
iface->link_up?"up":"down",
|
|
link_up?"up":"down"));
|
|
|
|
iface->link_up = link_up;
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
called by a daemon to inform us of the entire list of TCP tickles for
|
|
a particular public address.
|
|
this control should only be sent by the node that is currently serving
|
|
that public address.
|
|
*/
|
|
int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
|
|
{
|
|
struct ctdb_tickle_list_old *list = (struct ctdb_tickle_list_old *)indata.dptr;
|
|
struct ctdb_tcp_array *tcparray;
|
|
struct ctdb_vnn *vnn;
|
|
|
|
/* We must at least have tickles.num or else we cant verify the size
|
|
of the received data blob
|
|
*/
|
|
if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)) {
|
|
DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list. Not enough data for the tickle.num field\n"));
|
|
return -1;
|
|
}
|
|
|
|
/* verify that the size of data matches what we expect */
|
|
if (indata.dsize < offsetof(struct ctdb_tickle_list_old, connections)
|
|
+ sizeof(struct ctdb_connection) * list->num) {
|
|
DEBUG(DEBUG_ERR,("Bad indata in ctdb_tickle_list\n"));
|
|
return -1;
|
|
}
|
|
|
|
DEBUG(DEBUG_INFO, ("Received tickle update for public address %s\n",
|
|
ctdb_addr_to_str(&list->addr)));
|
|
|
|
vnn = find_public_ip_vnn(ctdb, &list->addr);
|
|
if (vnn == NULL) {
|
|
DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
|
|
ctdb_addr_to_str(&list->addr)));
|
|
|
|
return 1;
|
|
}
|
|
|
|
if (vnn->pnn == ctdb->pnn) {
|
|
DEBUG(DEBUG_INFO,
|
|
("Ignoring redundant set tcp tickle list, this node hosts '%s'\n",
|
|
ctdb_addr_to_str(&list->addr)));
|
|
return 0;
|
|
}
|
|
|
|
/* remove any old ticklelist we might have */
|
|
talloc_free(vnn->tcp_array);
|
|
vnn->tcp_array = NULL;
|
|
|
|
tcparray = talloc(vnn, struct ctdb_tcp_array);
|
|
CTDB_NO_MEMORY(ctdb, tcparray);
|
|
|
|
tcparray->num = list->num;
|
|
|
|
tcparray->connections = talloc_array(tcparray, struct ctdb_connection, tcparray->num);
|
|
CTDB_NO_MEMORY(ctdb, tcparray->connections);
|
|
|
|
memcpy(tcparray->connections, &list->connections[0],
|
|
sizeof(struct ctdb_connection)*tcparray->num);
|
|
|
|
/* We now have a new fresh tickle list array for this vnn */
|
|
vnn->tcp_array = tcparray;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
called to return the full list of tickles for the puclic address associated
|
|
with the provided vnn
|
|
*/
|
|
int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
|
|
{
|
|
ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
|
|
struct ctdb_tickle_list_old *list;
|
|
struct ctdb_tcp_array *tcparray;
|
|
int num;
|
|
struct ctdb_vnn *vnn;
|
|
|
|
vnn = find_public_ip_vnn(ctdb, addr);
|
|
if (vnn == NULL) {
|
|
DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
|
|
ctdb_addr_to_str(addr)));
|
|
|
|
return 1;
|
|
}
|
|
|
|
tcparray = vnn->tcp_array;
|
|
if (tcparray) {
|
|
num = tcparray->num;
|
|
} else {
|
|
num = 0;
|
|
}
|
|
|
|
outdata->dsize = offsetof(struct ctdb_tickle_list_old, connections)
|
|
+ sizeof(struct ctdb_connection) * num;
|
|
|
|
outdata->dptr = talloc_size(outdata, outdata->dsize);
|
|
CTDB_NO_MEMORY(ctdb, outdata->dptr);
|
|
list = (struct ctdb_tickle_list_old *)outdata->dptr;
|
|
|
|
list->addr = *addr;
|
|
list->num = num;
|
|
if (num) {
|
|
memcpy(&list->connections[0], tcparray->connections,
|
|
sizeof(struct ctdb_connection) * num);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
set the list of all tcp tickles for a public address
|
|
*/
|
|
static int ctdb_send_set_tcp_tickles_for_ip(struct ctdb_context *ctdb,
|
|
ctdb_sock_addr *addr,
|
|
struct ctdb_tcp_array *tcparray)
|
|
{
|
|
int ret, num;
|
|
TDB_DATA data;
|
|
struct ctdb_tickle_list_old *list;
|
|
|
|
if (tcparray) {
|
|
num = tcparray->num;
|
|
} else {
|
|
num = 0;
|
|
}
|
|
|
|
data.dsize = offsetof(struct ctdb_tickle_list_old, connections) +
|
|
sizeof(struct ctdb_connection) * num;
|
|
data.dptr = talloc_size(ctdb, data.dsize);
|
|
CTDB_NO_MEMORY(ctdb, data.dptr);
|
|
|
|
list = (struct ctdb_tickle_list_old *)data.dptr;
|
|
list->addr = *addr;
|
|
list->num = num;
|
|
if (tcparray) {
|
|
memcpy(&list->connections[0], tcparray->connections, sizeof(struct ctdb_connection) * num);
|
|
}
|
|
|
|
ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
|
|
CTDB_CONTROL_SET_TCP_TICKLE_LIST,
|
|
0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
|
|
if (ret != 0) {
|
|
DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
|
|
return -1;
|
|
}
|
|
|
|
talloc_free(data.dptr);
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
/*
|
|
perform tickle updates if required
|
|
*/
|
|
static void ctdb_update_tcp_tickles(struct tevent_context *ev,
|
|
struct tevent_timer *te,
|
|
struct timeval t, void *private_data)
|
|
{
|
|
struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
|
|
int ret;
|
|
struct ctdb_vnn *vnn;
|
|
|
|
for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
|
|
/* we only send out updates for public addresses that
|
|
we have taken over
|
|
*/
|
|
if (ctdb->pnn != vnn->pnn) {
|
|
continue;
|
|
}
|
|
/* We only send out the updates if we need to */
|
|
if (!vnn->tcp_update_needed) {
|
|
continue;
|
|
}
|
|
ret = ctdb_send_set_tcp_tickles_for_ip(ctdb,
|
|
&vnn->public_address,
|
|
vnn->tcp_array);
|
|
if (ret != 0) {
|
|
DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
|
|
ctdb_addr_to_str(&vnn->public_address)));
|
|
} else {
|
|
DEBUG(DEBUG_INFO,
|
|
("Sent tickle update for public address %s\n",
|
|
ctdb_addr_to_str(&vnn->public_address)));
|
|
vnn->tcp_update_needed = false;
|
|
}
|
|
}
|
|
|
|
tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
|
|
timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
|
|
ctdb_update_tcp_tickles, ctdb);
|
|
}
|
|
|
|
/*
|
|
start periodic update of tcp tickles
|
|
*/
|
|
void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
|
|
{
|
|
ctdb->tickle_update_context = talloc_new(ctdb);
|
|
|
|
tevent_add_timer(ctdb->ev, ctdb->tickle_update_context,
|
|
timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0),
|
|
ctdb_update_tcp_tickles, ctdb);
|
|
}
|
|
|
|
|
|
|
|
|
|
struct control_gratious_arp {
|
|
struct ctdb_context *ctdb;
|
|
ctdb_sock_addr addr;
|
|
const char *iface;
|
|
int count;
|
|
};
|
|
|
|
/*
|
|
send a control_gratuitous arp
|
|
*/
|
|
static void send_gratious_arp(struct tevent_context *ev,
|
|
struct tevent_timer *te,
|
|
struct timeval t, void *private_data)
|
|
{
|
|
int ret;
|
|
struct control_gratious_arp *arp = talloc_get_type(private_data,
|
|
struct control_gratious_arp);
|
|
|
|
ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
|
|
if (ret != 0) {
|
|
DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
|
|
arp->iface, strerror(errno)));
|
|
}
|
|
|
|
|
|
arp->count++;
|
|
if (arp->count == CTDB_ARP_REPEAT) {
|
|
talloc_free(arp);
|
|
return;
|
|
}
|
|
|
|
tevent_add_timer(arp->ctdb->ev, arp,
|
|
timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
|
|
send_gratious_arp, arp);
|
|
}
|
|
|
|
|
|
/*
|
|
send a gratious arp
|
|
*/
|
|
int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
|
|
{
|
|
struct ctdb_addr_info_old *gratious_arp = (struct ctdb_addr_info_old *)indata.dptr;
|
|
struct control_gratious_arp *arp;
|
|
|
|
/* verify the size of indata */
|
|
if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
|
|
DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n",
|
|
(unsigned)indata.dsize,
|
|
(unsigned)offsetof(struct ctdb_addr_info_old, iface)));
|
|
return -1;
|
|
}
|
|
if (indata.dsize !=
|
|
( offsetof(struct ctdb_addr_info_old, iface)
|
|
+ gratious_arp->len ) ){
|
|
|
|
DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
|
|
"but should be %u bytes\n",
|
|
(unsigned)indata.dsize,
|
|
(unsigned)(offsetof(struct ctdb_addr_info_old, iface)+gratious_arp->len)));
|
|
return -1;
|
|
}
|
|
|
|
|
|
arp = talloc(ctdb, struct control_gratious_arp);
|
|
CTDB_NO_MEMORY(ctdb, arp);
|
|
|
|
arp->ctdb = ctdb;
|
|
arp->addr = gratious_arp->addr;
|
|
arp->iface = talloc_strdup(arp, gratious_arp->iface);
|
|
CTDB_NO_MEMORY(ctdb, arp->iface);
|
|
arp->count = 0;
|
|
|
|
tevent_add_timer(arp->ctdb->ev, arp,
|
|
timeval_zero(), send_gratious_arp, arp);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
|
|
{
|
|
struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
|
|
int ret;
|
|
|
|
/* verify the size of indata */
|
|
if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
|
|
DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
|
|
return -1;
|
|
}
|
|
if (indata.dsize !=
|
|
( offsetof(struct ctdb_addr_info_old, iface)
|
|
+ pub->len ) ){
|
|
|
|
DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
|
|
"but should be %u bytes\n",
|
|
(unsigned)indata.dsize,
|
|
(unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
|
|
return -1;
|
|
}
|
|
|
|
DEBUG(DEBUG_NOTICE,("Add IP %s\n", ctdb_addr_to_str(&pub->addr)));
|
|
|
|
ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0], true);
|
|
|
|
if (ret != 0) {
|
|
DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
struct delete_ip_callback_state {
|
|
struct ctdb_req_control_old *c;
|
|
};
|
|
|
|
/*
|
|
called when releaseip event finishes for del_public_address
|
|
*/
|
|
static void delete_ip_callback(struct ctdb_context *ctdb,
|
|
int32_t status, TDB_DATA data,
|
|
const char *errormsg,
|
|
void *private_data)
|
|
{
|
|
struct delete_ip_callback_state *state =
|
|
talloc_get_type(private_data, struct delete_ip_callback_state);
|
|
|
|
/* If release failed then fail. */
|
|
ctdb_request_control_reply(ctdb, state->c, NULL, status, errormsg);
|
|
talloc_free(private_data);
|
|
}
|
|
|
|
int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb,
|
|
struct ctdb_req_control_old *c,
|
|
TDB_DATA indata, bool *async_reply)
|
|
{
|
|
struct ctdb_addr_info_old *pub = (struct ctdb_addr_info_old *)indata.dptr;
|
|
struct ctdb_vnn *vnn;
|
|
|
|
/* verify the size of indata */
|
|
if (indata.dsize < offsetof(struct ctdb_addr_info_old, iface)) {
|
|
DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_addr_info structure\n"));
|
|
return -1;
|
|
}
|
|
if (indata.dsize !=
|
|
( offsetof(struct ctdb_addr_info_old, iface)
|
|
+ pub->len ) ){
|
|
|
|
DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
|
|
"but should be %u bytes\n",
|
|
(unsigned)indata.dsize,
|
|
(unsigned)(offsetof(struct ctdb_addr_info_old, iface)+pub->len)));
|
|
return -1;
|
|
}
|
|
|
|
DEBUG(DEBUG_NOTICE,("Delete IP %s\n", ctdb_addr_to_str(&pub->addr)));
|
|
|
|
/* walk over all public addresses until we find a match */
|
|
for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
|
|
if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
|
|
if (vnn->pnn == ctdb->pnn) {
|
|
struct delete_ip_callback_state *state;
|
|
struct ctdb_public_ip *ip;
|
|
TDB_DATA data;
|
|
int ret;
|
|
|
|
vnn->delete_pending = true;
|
|
|
|
state = talloc(ctdb,
|
|
struct delete_ip_callback_state);
|
|
CTDB_NO_MEMORY(ctdb, state);
|
|
state->c = c;
|
|
|
|
ip = talloc(state, struct ctdb_public_ip);
|
|
if (ip == NULL) {
|
|
DEBUG(DEBUG_ERR,
|
|
(__location__ " Out of memory\n"));
|
|
talloc_free(state);
|
|
return -1;
|
|
}
|
|
ip->pnn = -1;
|
|
ip->addr = pub->addr;
|
|
|
|
data.dsize = sizeof(struct ctdb_public_ip);
|
|
data.dptr = (unsigned char *)ip;
|
|
|
|
ret = ctdb_daemon_send_control(ctdb,
|
|
ctdb_get_pnn(ctdb),
|
|
0,
|
|
CTDB_CONTROL_RELEASE_IP,
|
|
0, 0,
|
|
data,
|
|
delete_ip_callback,
|
|
state);
|
|
if (ret == -1) {
|
|
DEBUG(DEBUG_ERR,
|
|
(__location__ "Unable to send "
|
|
"CTDB_CONTROL_RELEASE_IP\n"));
|
|
talloc_free(state);
|
|
return -1;
|
|
}
|
|
|
|
state->c = talloc_steal(state, c);
|
|
*async_reply = true;
|
|
} else {
|
|
/* This IP is not hosted on the
|
|
* current node so just delete it
|
|
* now. */
|
|
do_delete_ip(ctdb, vnn);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
DEBUG(DEBUG_ERR,("Delete IP of unknown public IP address %s\n",
|
|
ctdb_addr_to_str(&pub->addr)));
|
|
return -1;
|
|
}
|
|
|
|
|
|
struct ipreallocated_callback_state {
|
|
struct ctdb_req_control_old *c;
|
|
};
|
|
|
|
static void ctdb_ipreallocated_callback(struct ctdb_context *ctdb,
|
|
int status, void *p)
|
|
{
|
|
struct ipreallocated_callback_state *state =
|
|
talloc_get_type(p, struct ipreallocated_callback_state);
|
|
|
|
if (status != 0) {
|
|
DEBUG(DEBUG_ERR,
|
|
(" \"ipreallocated\" event script failed (status %d)\n",
|
|
status));
|
|
if (status == -ETIME) {
|
|
ctdb_ban_self(ctdb);
|
|
}
|
|
}
|
|
|
|
ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
|
|
talloc_free(state);
|
|
}
|
|
|
|
/* A control to run the ipreallocated event */
|
|
int32_t ctdb_control_ipreallocated(struct ctdb_context *ctdb,
|
|
struct ctdb_req_control_old *c,
|
|
bool *async_reply)
|
|
{
|
|
int ret;
|
|
struct ipreallocated_callback_state *state;
|
|
|
|
state = talloc(ctdb, struct ipreallocated_callback_state);
|
|
CTDB_NO_MEMORY(ctdb, state);
|
|
|
|
DEBUG(DEBUG_INFO,(__location__ " Running \"ipreallocated\" event\n"));
|
|
|
|
ret = ctdb_event_script_callback(ctdb, state,
|
|
ctdb_ipreallocated_callback, state,
|
|
CTDB_EVENT_IPREALLOCATED,
|
|
"%s", "");
|
|
|
|
if (ret != 0) {
|
|
DEBUG(DEBUG_ERR,("Failed to run \"ipreallocated\" event \n"));
|
|
talloc_free(state);
|
|
return -1;
|
|
}
|
|
|
|
/* tell the control that we will be reply asynchronously */
|
|
state->c = talloc_steal(state, c);
|
|
*async_reply = true;
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/* This function is called from the recovery daemon to verify that a remote
|
|
node has the expected ip allocation.
|
|
This is verified against ctdb->ip_tree
|
|
*/
|
|
static int verify_remote_ip_allocation(struct ctdb_context *ctdb,
|
|
struct ctdb_public_ip_list *ips,
|
|
uint32_t pnn)
|
|
{
|
|
struct public_ip_list *tmp_ip;
|
|
int i;
|
|
|
|
if (ctdb->ip_tree == NULL) {
|
|
/* don't know the expected allocation yet, assume remote node
|
|
is correct. */
|
|
return 0;
|
|
}
|
|
|
|
if (ips == NULL) {
|
|
return 0;
|
|
}
|
|
|
|
for (i=0; i<ips->num; i++) {
|
|
tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ip[i].addr));
|
|
if (tmp_ip == NULL) {
|
|
DEBUG(DEBUG_ERR,("Node %u has new or unknown public IP %s\n", pnn, ctdb_addr_to_str(&ips->ip[i].addr)));
|
|
return -1;
|
|
}
|
|
|
|
if (tmp_ip->pnn == -1 || ips->ip[i].pnn == -1) {
|
|
continue;
|
|
}
|
|
|
|
if (tmp_ip->pnn != ips->ip[i].pnn) {
|
|
DEBUG(DEBUG_ERR,
|
|
("Inconsistent IP allocation - node %u thinks %s is held by node %u while it is assigned to node %u\n",
|
|
pnn,
|
|
ctdb_addr_to_str(&ips->ip[i].addr),
|
|
ips->ip[i].pnn, tmp_ip->pnn));
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
|
|
{
|
|
struct public_ip_list *tmp_ip;
|
|
|
|
/* IP tree is never built if DisableIPFailover is set */
|
|
if (ctdb->tunable.disable_ip_failover != 0) {
|
|
return 0;
|
|
}
|
|
|
|
if (ctdb->ip_tree == NULL) {
|
|
DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
|
|
return -1;
|
|
}
|
|
|
|
tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
|
|
if (tmp_ip == NULL) {
|
|
DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
|
|
return -1;
|
|
}
|
|
|
|
DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
|
|
tmp_ip->pnn = ip->pnn;
|
|
|
|
return 0;
|
|
}
|
|
|
|
void clear_ip_assignment_tree(struct ctdb_context *ctdb)
|
|
{
|
|
TALLOC_FREE(ctdb->ip_tree);
|
|
}
|
|
|
|
struct ctdb_reloadips_handle {
|
|
struct ctdb_context *ctdb;
|
|
struct ctdb_req_control_old *c;
|
|
int status;
|
|
int fd[2];
|
|
pid_t child;
|
|
struct tevent_fd *fde;
|
|
};
|
|
|
|
static int ctdb_reloadips_destructor(struct ctdb_reloadips_handle *h)
|
|
{
|
|
if (h == h->ctdb->reload_ips) {
|
|
h->ctdb->reload_ips = NULL;
|
|
}
|
|
if (h->c != NULL) {
|
|
ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
|
|
h->c = NULL;
|
|
}
|
|
ctdb_kill(h->ctdb, h->child, SIGKILL);
|
|
return 0;
|
|
}
|
|
|
|
static void ctdb_reloadips_timeout_event(struct tevent_context *ev,
|
|
struct tevent_timer *te,
|
|
struct timeval t, void *private_data)
|
|
{
|
|
struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
|
|
|
|
talloc_free(h);
|
|
}
|
|
|
|
static void ctdb_reloadips_child_handler(struct tevent_context *ev,
|
|
struct tevent_fd *fde,
|
|
uint16_t flags, void *private_data)
|
|
{
|
|
struct ctdb_reloadips_handle *h = talloc_get_type(private_data, struct ctdb_reloadips_handle);
|
|
|
|
char res;
|
|
int ret;
|
|
|
|
ret = sys_read(h->fd[0], &res, 1);
|
|
if (ret < 1 || res != 0) {
|
|
DEBUG(DEBUG_ERR, (__location__ " Reloadips child process returned error\n"));
|
|
res = 1;
|
|
}
|
|
h->status = res;
|
|
|
|
talloc_free(h);
|
|
}
|
|
|
|
static int ctdb_reloadips_child(struct ctdb_context *ctdb)
|
|
{
|
|
TALLOC_CTX *mem_ctx = talloc_new(NULL);
|
|
struct ctdb_public_ip_list_old *ips;
|
|
struct ctdb_vnn *vnn;
|
|
struct client_async_data *async_data;
|
|
struct timeval timeout;
|
|
TDB_DATA data;
|
|
struct ctdb_client_control_state *state;
|
|
bool first_add;
|
|
int i, ret;
|
|
|
|
CTDB_NO_MEMORY(ctdb, mem_ctx);
|
|
|
|
/* Read IPs from local node */
|
|
ret = ctdb_ctrl_get_public_ips(ctdb, TAKEOVER_TIMEOUT(),
|
|
CTDB_CURRENT_NODE, mem_ctx, &ips);
|
|
if (ret != 0) {
|
|
DEBUG(DEBUG_ERR,
|
|
("Unable to fetch public IPs from local node\n"));
|
|
talloc_free(mem_ctx);
|
|
return -1;
|
|
}
|
|
|
|
/* Read IPs file - this is safe since this is a child process */
|
|
ctdb->vnn = NULL;
|
|
if (ctdb_set_public_addresses(ctdb, false) != 0) {
|
|
DEBUG(DEBUG_ERR,("Failed to re-read public addresses file\n"));
|
|
talloc_free(mem_ctx);
|
|
return -1;
|
|
}
|
|
|
|
async_data = talloc_zero(mem_ctx, struct client_async_data);
|
|
CTDB_NO_MEMORY(ctdb, async_data);
|
|
|
|
/* Compare IPs between node and file for IPs to be deleted */
|
|
for (i = 0; i < ips->num; i++) {
|
|
/* */
|
|
for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
|
|
if (ctdb_same_ip(&vnn->public_address,
|
|
&ips->ips[i].addr)) {
|
|
/* IP is still in file */
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (vnn == NULL) {
|
|
/* Delete IP ips->ips[i] */
|
|
struct ctdb_addr_info_old *pub;
|
|
|
|
DEBUG(DEBUG_NOTICE,
|
|
("IP %s no longer configured, deleting it\n",
|
|
ctdb_addr_to_str(&ips->ips[i].addr)));
|
|
|
|
pub = talloc_zero(mem_ctx, struct ctdb_addr_info_old);
|
|
CTDB_NO_MEMORY(ctdb, pub);
|
|
|
|
pub->addr = ips->ips[i].addr;
|
|
pub->mask = 0;
|
|
pub->len = 0;
|
|
|
|
timeout = TAKEOVER_TIMEOUT();
|
|
|
|
data.dsize = offsetof(struct ctdb_addr_info_old,
|
|
iface) + pub->len;
|
|
data.dptr = (uint8_t *)pub;
|
|
|
|
state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
|
|
CTDB_CONTROL_DEL_PUBLIC_IP,
|
|
0, data, async_data,
|
|
&timeout, NULL);
|
|
if (state == NULL) {
|
|
DEBUG(DEBUG_ERR,
|
|
(__location__
|
|
" failed sending CTDB_CONTROL_DEL_PUBLIC_IP\n"));
|
|
goto failed;
|
|
}
|
|
|
|
ctdb_client_async_add(async_data, state);
|
|
}
|
|
}
|
|
|
|
/* Compare IPs between node and file for IPs to be added */
|
|
first_add = true;
|
|
for (vnn = ctdb->vnn; vnn; vnn = vnn->next) {
|
|
for (i = 0; i < ips->num; i++) {
|
|
if (ctdb_same_ip(&vnn->public_address,
|
|
&ips->ips[i].addr)) {
|
|
/* IP already on node */
|
|
break;
|
|
}
|
|
}
|
|
if (i == ips->num) {
|
|
/* Add IP ips->ips[i] */
|
|
struct ctdb_addr_info_old *pub;
|
|
const char *ifaces = NULL;
|
|
uint32_t len;
|
|
int iface = 0;
|
|
|
|
DEBUG(DEBUG_NOTICE,
|
|
("New IP %s configured, adding it\n",
|
|
ctdb_addr_to_str(&vnn->public_address)));
|
|
if (first_add) {
|
|
uint32_t pnn = ctdb_get_pnn(ctdb);
|
|
|
|
data.dsize = sizeof(pnn);
|
|
data.dptr = (uint8_t *)&pnn;
|
|
|
|
ret = ctdb_client_send_message(
|
|
ctdb,
|
|
CTDB_BROADCAST_CONNECTED,
|
|
CTDB_SRVID_REBALANCE_NODE,
|
|
data);
|
|
if (ret != 0) {
|
|
DEBUG(DEBUG_WARNING,
|
|
("Failed to send message to force node reallocation - IPs may be unbalanced\n"));
|
|
}
|
|
|
|
first_add = false;
|
|
}
|
|
|
|
ifaces = vnn->ifaces[0];
|
|
iface = 1;
|
|
while (vnn->ifaces[iface] != NULL) {
|
|
ifaces = talloc_asprintf(vnn, "%s,%s", ifaces,
|
|
vnn->ifaces[iface]);
|
|
iface++;
|
|
}
|
|
|
|
len = strlen(ifaces) + 1;
|
|
pub = talloc_zero_size(mem_ctx,
|
|
offsetof(struct ctdb_addr_info_old, iface) + len);
|
|
CTDB_NO_MEMORY(ctdb, pub);
|
|
|
|
pub->addr = vnn->public_address;
|
|
pub->mask = vnn->public_netmask_bits;
|
|
pub->len = len;
|
|
memcpy(&pub->iface[0], ifaces, pub->len);
|
|
|
|
timeout = TAKEOVER_TIMEOUT();
|
|
|
|
data.dsize = offsetof(struct ctdb_addr_info_old,
|
|
iface) + pub->len;
|
|
data.dptr = (uint8_t *)pub;
|
|
|
|
state = ctdb_control_send(ctdb, CTDB_CURRENT_NODE, 0,
|
|
CTDB_CONTROL_ADD_PUBLIC_IP,
|
|
0, data, async_data,
|
|
&timeout, NULL);
|
|
if (state == NULL) {
|
|
DEBUG(DEBUG_ERR,
|
|
(__location__
|
|
" failed sending CTDB_CONTROL_ADD_PUBLIC_IP\n"));
|
|
goto failed;
|
|
}
|
|
|
|
ctdb_client_async_add(async_data, state);
|
|
}
|
|
}
|
|
|
|
if (ctdb_client_async_wait(ctdb, async_data) != 0) {
|
|
DEBUG(DEBUG_ERR,(__location__ " Add/delete IPs failed\n"));
|
|
goto failed;
|
|
}
|
|
|
|
talloc_free(mem_ctx);
|
|
return 0;
|
|
|
|
failed:
|
|
talloc_free(mem_ctx);
|
|
return -1;
|
|
}
|
|
|
|
/* This control is sent to force the node to re-read the public addresses file
|
|
and drop any addresses we should nnot longer host, and add new addresses
|
|
that we are now able to host
|
|
*/
|
|
int32_t ctdb_control_reload_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control_old *c, bool *async_reply)
|
|
{
|
|
struct ctdb_reloadips_handle *h;
|
|
pid_t parent = getpid();
|
|
|
|
if (ctdb->reload_ips != NULL) {
|
|
talloc_free(ctdb->reload_ips);
|
|
ctdb->reload_ips = NULL;
|
|
}
|
|
|
|
h = talloc(ctdb, struct ctdb_reloadips_handle);
|
|
CTDB_NO_MEMORY(ctdb, h);
|
|
h->ctdb = ctdb;
|
|
h->c = NULL;
|
|
h->status = -1;
|
|
|
|
if (pipe(h->fd) == -1) {
|
|
DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
|
|
talloc_free(h);
|
|
return -1;
|
|
}
|
|
|
|
h->child = ctdb_fork(ctdb);
|
|
if (h->child == (pid_t)-1) {
|
|
DEBUG(DEBUG_ERR, ("Failed to fork a child for reloadips\n"));
|
|
close(h->fd[0]);
|
|
close(h->fd[1]);
|
|
talloc_free(h);
|
|
return -1;
|
|
}
|
|
|
|
/* child process */
|
|
if (h->child == 0) {
|
|
signed char res = 0;
|
|
|
|
close(h->fd[0]);
|
|
debug_extra = talloc_asprintf(NULL, "reloadips:");
|
|
|
|
prctl_set_comment("ctdb_reloadips");
|
|
if (switch_from_server_to_client(ctdb, "reloadips-child") != 0) {
|
|
DEBUG(DEBUG_CRIT,("ERROR: Failed to switch reloadips child into client mode\n"));
|
|
res = -1;
|
|
} else {
|
|
res = ctdb_reloadips_child(ctdb);
|
|
if (res != 0) {
|
|
DEBUG(DEBUG_ERR,("Failed to reload ips on local node\n"));
|
|
}
|
|
}
|
|
|
|
sys_write(h->fd[1], &res, 1);
|
|
ctdb_wait_for_process_to_exit(parent);
|
|
_exit(0);
|
|
}
|
|
|
|
h->c = talloc_steal(h, c);
|
|
|
|
close(h->fd[1]);
|
|
set_close_on_exec(h->fd[0]);
|
|
|
|
talloc_set_destructor(h, ctdb_reloadips_destructor);
|
|
|
|
|
|
h->fde = tevent_add_fd(ctdb->ev, h, h->fd[0], TEVENT_FD_READ,
|
|
ctdb_reloadips_child_handler, (void *)h);
|
|
tevent_fd_set_auto_close(h->fde);
|
|
|
|
tevent_add_timer(ctdb->ev, h, timeval_current_ofs(120, 0),
|
|
ctdb_reloadips_timeout_event, h);
|
|
|
|
/* we reply later */
|
|
*async_reply = true;
|
|
return 0;
|
|
}
|