mirror of
https://github.com/samba-team/samba.git
synced 2025-01-11 05:18:09 +03:00
In the recovery daemon, keep track of which node we have assigned public ip
addresses and verify that the remote nodes have/keep a consistent view of assigned addresses. If a remote node has an inconsistent view of addresses visavi the recovery master this will trigger a full ip reallocation. (This used to be ctdb commit f3bf2ab61f8dbbc806ec23a68a87aaedd458e712)
This commit is contained in:
parent
3f226d0c8e
commit
06885ea9a7
@ -21,17 +21,17 @@
|
||||
|
||||
#define TRBT_RED 0x00
|
||||
#define TRBT_BLACK 0x01
|
||||
typedef struct _trbt_node_t {
|
||||
struct _trbt_tree_t *tree;
|
||||
struct _trbt_node_t *parent;
|
||||
struct _trbt_node_t *left;
|
||||
struct _trbt_node_t *right;
|
||||
typedef struct trbt_node {
|
||||
struct trbt_tree *tree;
|
||||
struct trbt_node *parent;
|
||||
struct trbt_node *left;
|
||||
struct trbt_node *right;
|
||||
uint32_t rb_color;
|
||||
uint32_t key32;
|
||||
void *data;
|
||||
} trbt_node_t;
|
||||
|
||||
typedef struct _trbt_tree_t {
|
||||
typedef struct trbt_tree {
|
||||
trbt_node_t *root;
|
||||
/* automatically free the tree when the last node has been deleted */
|
||||
#define TRBT_AUTOFREE 0x00000001
|
||||
|
@ -442,7 +442,7 @@ struct ctdb_context {
|
||||
struct ctdb_call_state *pending_calls;
|
||||
struct ctdb_client_ip *client_ip_list;
|
||||
bool do_checkpublicip;
|
||||
struct _trbt_tree_t *server_ids;
|
||||
struct trbt_tree *server_ids;
|
||||
const char *event_script_dir;
|
||||
const char *notification_script;
|
||||
const char *default_public_interface;
|
||||
@ -469,6 +469,9 @@ struct ctdb_context {
|
||||
|
||||
/* mapping from pid to ctdb_client * */
|
||||
struct ctdb_client_pid_list *client_pids;
|
||||
|
||||
/* used in the recovery daemon to remember the ip allocation */
|
||||
struct trbt_tree *ip_tree;
|
||||
};
|
||||
|
||||
struct ctdb_db_context {
|
||||
@ -1633,4 +1636,8 @@ int ctdb_recheck_persistent_health(struct ctdb_context *ctdb);
|
||||
void ctdb_run_notification_script(struct ctdb_context *ctdb, const char *event);
|
||||
|
||||
void ctdb_fault_setup(void);
|
||||
|
||||
int verify_remote_ip_allocation(struct ctdb_context *ctdb,
|
||||
struct ctdb_all_public_ips *ips);
|
||||
|
||||
#endif
|
||||
|
@ -1228,6 +1228,7 @@ static void reload_nodes_file(struct ctdb_context *ctdb)
|
||||
}
|
||||
|
||||
static int ctdb_reload_remote_public_ips(struct ctdb_context *ctdb,
|
||||
struct ctdb_recoverd *rec,
|
||||
struct ctdb_node_map *nodemap,
|
||||
uint32_t *culprit)
|
||||
{
|
||||
@ -1274,6 +1275,11 @@ static int ctdb_reload_remote_public_ips(struct ctdb_context *ctdb,
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (verify_remote_ip_allocation(ctdb, ctdb->nodes[j]->known_public_ips)) {
|
||||
DEBUG(DEBUG_ERR,("Node %d has inconsistent public ip allocation and needs update.\n", ctdb->nodes[j]->pnn));
|
||||
rec->need_takeover_run = true;
|
||||
}
|
||||
|
||||
/* grab a new shiny list of public ips from the node */
|
||||
ret = ctdb_ctrl_get_public_ips_flags(ctdb,
|
||||
CONTROL_TIMEOUT(),
|
||||
@ -1568,7 +1574,7 @@ static int do_recovery(struct ctdb_recoverd *rec,
|
||||
/*
|
||||
tell nodes to takeover their public IPs
|
||||
*/
|
||||
ret = ctdb_reload_remote_public_ips(ctdb, nodemap, &culprit);
|
||||
ret = ctdb_reload_remote_public_ips(ctdb, rec, nodemap, &culprit);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n",
|
||||
culprit));
|
||||
@ -1961,7 +1967,7 @@ static void process_ipreallocate_requests(struct ctdb_context *ctdb, struct ctdb
|
||||
/* update the list of public ips that a node can handle for
|
||||
all connected nodes
|
||||
*/
|
||||
ret = ctdb_reload_remote_public_ips(ctdb, rec->nodemap, &culprit);
|
||||
ret = ctdb_reload_remote_public_ips(ctdb, rec, rec->nodemap, &culprit);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n",
|
||||
culprit));
|
||||
@ -2414,9 +2420,9 @@ static enum monitor_result verify_recmaster(struct ctdb_recoverd *rec, struct ct
|
||||
}
|
||||
|
||||
|
||||
/* called to check that the allocation of public ip addresses is ok.
|
||||
/* called to check that the local allocation of public ip addresses is ok.
|
||||
*/
|
||||
static int verify_ip_allocation(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, uint32_t pnn)
|
||||
static int verify_local_ip_allocation(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, uint32_t pnn)
|
||||
{
|
||||
TALLOC_CTX *mem_ctx = talloc_new(NULL);
|
||||
struct ctdb_control_get_ifaces *ifaces = NULL;
|
||||
@ -3088,7 +3094,7 @@ again:
|
||||
*/
|
||||
if (ctdb->do_checkpublicip) {
|
||||
if (rec->ip_check_disable_ctx == NULL) {
|
||||
if (verify_ip_allocation(ctdb, rec, pnn) != 0) {
|
||||
if (verify_local_ip_allocation(ctdb, rec, pnn) != 0) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Public IPs were inconsistent.\n"));
|
||||
}
|
||||
}
|
||||
@ -3353,7 +3359,7 @@ again:
|
||||
/* update the list of public ips that a node can handle for
|
||||
all connected nodes
|
||||
*/
|
||||
ret = ctdb_reload_remote_public_ips(ctdb, nodemap, &culprit);
|
||||
ret = ctdb_reload_remote_public_ips(ctdb, rec, nodemap, &culprit);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n",
|
||||
culprit));
|
||||
|
@ -1159,14 +1159,17 @@ void getips_count_callback(void *param, void *data)
|
||||
}
|
||||
|
||||
struct ctdb_public_ip_list *
|
||||
create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
|
||||
create_merged_ip_list(struct ctdb_context *ctdb)
|
||||
{
|
||||
int i, j;
|
||||
struct ctdb_public_ip_list *ip_list;
|
||||
struct ctdb_all_public_ips *public_ips;
|
||||
trbt_tree_t *ip_tree;
|
||||
|
||||
ip_tree = trbt_create(tmp_ctx, 0);
|
||||
if (ctdb->ip_tree != NULL) {
|
||||
talloc_free(ctdb->ip_tree);
|
||||
ctdb->ip_tree = NULL;
|
||||
}
|
||||
ctdb->ip_tree = trbt_create(ctdb, 0);
|
||||
|
||||
for (i=0;i<ctdb->num_nodes;i++) {
|
||||
public_ips = ctdb->nodes[i]->known_public_ips;
|
||||
@ -1183,13 +1186,13 @@ create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
|
||||
for (j=0;j<public_ips->num;j++) {
|
||||
struct ctdb_public_ip_list *tmp_ip;
|
||||
|
||||
tmp_ip = talloc_zero(tmp_ctx, struct ctdb_public_ip_list);
|
||||
tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
|
||||
CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
|
||||
tmp_ip->pnn = public_ips->ips[j].pnn;
|
||||
tmp_ip->addr = public_ips->ips[j].addr;
|
||||
tmp_ip->next = NULL;
|
||||
|
||||
trbt_insertarray32_callback(ip_tree,
|
||||
trbt_insertarray32_callback(ctdb->ip_tree,
|
||||
IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
|
||||
add_ip_callback,
|
||||
tmp_ip);
|
||||
@ -1197,7 +1200,7 @@ create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
|
||||
}
|
||||
|
||||
ip_list = NULL;
|
||||
trbt_traversearray32(ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
|
||||
trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
|
||||
|
||||
return ip_list;
|
||||
}
|
||||
@ -1247,8 +1250,10 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
|
||||
a full list of all public addresses that exist in the cluster.
|
||||
Walk over all node structures and create a merged list of
|
||||
all public addresses that exist in the cluster.
|
||||
|
||||
keep the tree of ips around as ctdb->ip_tree
|
||||
*/
|
||||
all_ips = create_merged_ip_list(ctdb, tmp_ctx);
|
||||
all_ips = create_merged_ip_list(ctdb);
|
||||
|
||||
/* If we want deterministic ip allocations, i.e. that the ip addresses
|
||||
will always be allocated the same way for a specific set of
|
||||
@ -2806,3 +2811,41 @@ int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA inda
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* This function is called from the recovery daemon to verify that a remote
|
||||
node has the expected ip allocation.
|
||||
This is verified against ctdb->ip_tree
|
||||
*/
|
||||
int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
|
||||
{
|
||||
struct ctdb_public_ip_list *tmp_ip;
|
||||
int i;
|
||||
|
||||
if (ctdb->ip_tree == NULL) {
|
||||
/* dont know the expected allocation yet, assume remote node
|
||||
is correct. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (ips == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (i=0; i<ips->num; i++) {
|
||||
tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
|
||||
if (tmp_ip == NULL) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (tmp_ip->pnn != ips->ips[i].pnn) {
|
||||
DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation.\n"));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user