mirror of
https://github.com/samba-team/samba.git
synced 2025-01-24 02:04:21 +03:00
allow different nodes in the cluster to use different public_addresses
files so that we can partition the cluster into different subsets of nodes which each serve a different subset of the public addresses (This used to be ctdb commit 889e0fe69e4c88c6166282b12843b8d9727552d6)
This commit is contained in:
parent
8f819c6a0e
commit
77ec4d5248
@ -186,6 +186,12 @@ struct ctdb_node {
|
||||
/* a list of controls pending to this node, so we can time them out quickly
|
||||
if the node becomes disconnected */
|
||||
struct daemon_control_state *pending_controls;
|
||||
|
||||
/* used by the recovery daemon when distributing ip addresses
|
||||
across the nodes. it needs to know which public ip's can be handled
|
||||
by each node.
|
||||
*/
|
||||
struct ctdb_all_public_ips *public_ips;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -1543,6 +1543,30 @@ again:
|
||||
}
|
||||
|
||||
|
||||
/* update the list of public ips that a node can handle for
|
||||
all connected nodes
|
||||
*/
|
||||
for (j=0; j<nodemap->num; j++) {
|
||||
if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
|
||||
continue;
|
||||
}
|
||||
/* release any existing data */
|
||||
if (ctdb->nodes[j]->public_ips) {
|
||||
talloc_free(ctdb->nodes[j]->public_ips);
|
||||
ctdb->nodes[j]->public_ips = NULL;
|
||||
}
|
||||
/* grab a new shiny list of public ips from the node */
|
||||
if (ctdb_ctrl_get_public_ips(ctdb, CONTROL_TIMEOUT(),
|
||||
ctdb->nodes[j]->pnn,
|
||||
ctdb->nodes,
|
||||
&ctdb->nodes[j]->public_ips)) {
|
||||
DEBUG(0,("Failed to read public ips from node : %u\n",
|
||||
ctdb->nodes[j]->pnn));
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* verify that all active nodes agree that we are the recmaster */
|
||||
switch (verify_recmaster(ctdb, nodemap, pnn)) {
|
||||
case MONITOR_RECOVERY_NEEDED:
|
||||
|
@ -473,30 +473,67 @@ int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
struct ctdb_public_ip_list {
|
||||
struct ctdb_public_ip_list *next;
|
||||
uint32_t pnn;
|
||||
struct sockaddr_in sin;
|
||||
};
|
||||
|
||||
|
||||
/* Given a physical node, return the number of
|
||||
public addresses that is currently assigned to this node.
|
||||
*/
|
||||
static int node_ip_coverage(struct ctdb_context *ctdb,
|
||||
int32_t pnn)
|
||||
int32_t pnn,
|
||||
struct ctdb_public_ip_list *ips)
|
||||
{
|
||||
int num=0;
|
||||
struct ctdb_vnn *vnn;
|
||||
|
||||
for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
|
||||
if (vnn->pnn == pnn) {
|
||||
for (;ips;ips=ips->next) {
|
||||
if (ips->pnn == pnn) {
|
||||
num++;
|
||||
}
|
||||
}
|
||||
return num;
|
||||
}
|
||||
|
||||
/* search the vnn list for a node to takeover vnn.
|
||||
pick the node that currently are serving the least number of vnns
|
||||
so that the vnns get spread out evenly.
|
||||
|
||||
/* Check if this is a public ip known to the node, i.e. can that
|
||||
node takeover this ip ?
|
||||
*/
|
||||
static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn,
|
||||
struct ctdb_public_ip_list *ip)
|
||||
{
|
||||
struct ctdb_all_public_ips *public_ips;
|
||||
int i;
|
||||
|
||||
public_ips = ctdb->nodes[pnn]->public_ips;
|
||||
|
||||
if (public_ips == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i=0;i<public_ips->num;i++) {
|
||||
if (ip->sin.sin_addr.s_addr == public_ips->ips[i].sin.sin_addr.s_addr) {
|
||||
/* yes, this node can serve this public ip */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/* search the node lists list for a node to takeover this ip.
|
||||
pick the node that currently are serving the least number of ips
|
||||
so that the ips get spread out evenly.
|
||||
*/
|
||||
static int find_takeover_node(struct ctdb_context *ctdb,
|
||||
struct ctdb_node_map *nodemap, uint32_t mask,
|
||||
struct ctdb_vnn *vnn)
|
||||
struct ctdb_public_ip_list *ip,
|
||||
struct ctdb_public_ip_list *all_ips)
|
||||
{
|
||||
int pnn, min, num;
|
||||
int i;
|
||||
@ -510,7 +547,13 @@ static int find_takeover_node(struct ctdb_context *ctdb,
|
||||
continue;
|
||||
}
|
||||
|
||||
num = node_ip_coverage(ctdb, i);
|
||||
/* verify that this node can serve this ip */
|
||||
if (can_node_serve_ip(ctdb, i, ip)) {
|
||||
/* no it couldnt so skip to the next node */
|
||||
continue;
|
||||
}
|
||||
|
||||
num = node_ip_coverage(ctdb, i, all_ips);
|
||||
/* was this the first node we checked ? */
|
||||
if (pnn == -1) {
|
||||
pnn = i;
|
||||
@ -523,25 +566,78 @@ static int find_takeover_node(struct ctdb_context *ctdb,
|
||||
}
|
||||
}
|
||||
if (pnn == -1) {
|
||||
DEBUG(0,(__location__ " Could not find node to take over public address '%s'\n", vnn->public_address));
|
||||
DEBUG(0,(__location__ " Could not find node to take over public address '%s'\n", inet_ntoa(ip->sin.sin_addr)));
|
||||
return -1;
|
||||
}
|
||||
|
||||
vnn->pnn = pnn;
|
||||
ip->pnn = pnn;
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct ctdb_public_ip_list *
|
||||
add_ip_to_merged_list(struct ctdb_context *ctdb,
|
||||
TALLOC_CTX *tmp_ctx,
|
||||
struct ctdb_public_ip_list *ip_list,
|
||||
struct ctdb_public_ip *ip)
|
||||
{
|
||||
struct ctdb_public_ip_list *tmp_ip;
|
||||
|
||||
/* do we already have this ip in our merged list ?*/
|
||||
for (tmp_ip=ip_list;tmp_ip;tmp_ip=tmp_ip->next) {
|
||||
|
||||
/* we already have this public ip in the list */
|
||||
if (tmp_ip->sin.sin_addr.s_addr == ip->sin.sin_addr.s_addr) {
|
||||
return ip_list;
|
||||
}
|
||||
}
|
||||
|
||||
/* this is a new public ip, we must add it to the list */
|
||||
tmp_ip = talloc_zero(tmp_ctx, struct ctdb_public_ip_list);
|
||||
CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
|
||||
tmp_ip->pnn = ip->pnn;
|
||||
tmp_ip->sin = ip->sin;
|
||||
tmp_ip->next = ip_list;
|
||||
|
||||
return tmp_ip;
|
||||
}
|
||||
|
||||
struct ctdb_public_ip_list *
|
||||
create_merged_ip_list(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx)
|
||||
{
|
||||
int i, j;
|
||||
struct ctdb_public_ip_list *ip_list = NULL;
|
||||
struct ctdb_all_public_ips *public_ips;
|
||||
|
||||
for (i=0;i<ctdb->num_nodes;i++) {
|
||||
public_ips = ctdb->nodes[i]->public_ips;
|
||||
|
||||
/* there were no public ips for this node */
|
||||
if (public_ips == NULL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (j=0;j<public_ips->num;j++) {
|
||||
ip_list = add_ip_to_merged_list(ctdb, tmp_ctx,
|
||||
ip_list, &public_ips->ips[j]);
|
||||
}
|
||||
}
|
||||
|
||||
return ip_list;
|
||||
}
|
||||
|
||||
/*
|
||||
make any IP alias changes for public addresses that are necessary
|
||||
*/
|
||||
int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
|
||||
{
|
||||
int i, num_healthy;
|
||||
int i, num_healthy, retries;
|
||||
int ret;
|
||||
struct ctdb_public_ip ip;
|
||||
uint32_t mask;
|
||||
struct ctdb_vnn *vnn;
|
||||
struct ctdb_public_ip_list *all_ips, *tmp_ip;
|
||||
int maxnode, maxnum, minnode, minnum, num;
|
||||
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
|
||||
|
||||
|
||||
ZERO_STRUCT(ip);
|
||||
|
||||
@ -565,16 +661,24 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
|
||||
mask = NODE_FLAGS_INACTIVE;
|
||||
}
|
||||
|
||||
/* since nodes only know about those public addresses that
|
||||
can be served by that particular node, no single node has
|
||||
a full list of all public addresses that exist in the cluster.
|
||||
Walk over all node structures and create a merged list of
|
||||
all public addresses that exist in the cluster.
|
||||
*/
|
||||
all_ips = create_merged_ip_list(ctdb, tmp_ctx);
|
||||
|
||||
|
||||
/* mark all public addresses with a masked node as being served by
|
||||
node -1
|
||||
*/
|
||||
for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
|
||||
if (vnn->pnn == -1) {
|
||||
for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
|
||||
if (tmp_ip->pnn == -1) {
|
||||
continue;
|
||||
}
|
||||
if (nodemap->nodes[vnn->pnn].flags & mask) {
|
||||
vnn->pnn = -1;
|
||||
if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
|
||||
tmp_ip->pnn = -1;
|
||||
}
|
||||
}
|
||||
|
||||
@ -582,72 +686,101 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
|
||||
/* now we must redistribute all public addresses with takeover node
|
||||
-1 among the nodes available
|
||||
*/
|
||||
retries = 0;
|
||||
try_again:
|
||||
/* loop over all vnn's and find a physical node to cover for
|
||||
each unassigned vnn.
|
||||
/* loop over all ip's and find a physical node to cover for
|
||||
each unassigned ip.
|
||||
*/
|
||||
for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
|
||||
if (vnn->pnn == -1) {
|
||||
if (find_takeover_node(ctdb, nodemap, mask, vnn)) {
|
||||
DEBUG(0,("Failed to find node to cover ip %s\n", vnn->public_address));
|
||||
return -1;
|
||||
for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
|
||||
if (tmp_ip->pnn == -1) {
|
||||
if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
|
||||
DEBUG(0,("Failed to find node to cover ip %s\n", inet_ntoa(tmp_ip->sin.sin_addr)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Get the highest and lowes number of vnn's a valid node
|
||||
covers for this interface
|
||||
|
||||
/* now, try to make sure the ip adresses are evenly distributed
|
||||
across the node.
|
||||
for each ip address, loop over all nodes that can serve this
|
||||
ip and make sure that the difference between the node
|
||||
serving the most and the node serving the least ip's are not greater
|
||||
than 1.
|
||||
*/
|
||||
maxnode = -1;
|
||||
minnode = -1;
|
||||
for (i=0;i<nodemap->num;i++) {
|
||||
if (nodemap->nodes[i].flags & mask) {
|
||||
for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
|
||||
if (tmp_ip->pnn == -1) {
|
||||
continue;
|
||||
}
|
||||
num = node_ip_coverage(ctdb, i);
|
||||
if (maxnode == -1) {
|
||||
maxnode = i;
|
||||
maxnum = num;
|
||||
} else {
|
||||
if (num > maxnum) {
|
||||
|
||||
/* Get the highest and lowest number of ips's served by any
|
||||
valid node which can serve this ip.
|
||||
*/
|
||||
maxnode = -1;
|
||||
minnode = -1;
|
||||
for (i=0;i<nodemap->num;i++) {
|
||||
if (nodemap->nodes[i].flags & mask) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* only check nodes that can actually serve this ip */
|
||||
if (can_node_serve_ip(ctdb, i, tmp_ip)) {
|
||||
/* no it couldnt so skip to the next node */
|
||||
continue;
|
||||
}
|
||||
|
||||
num = node_ip_coverage(ctdb, i, all_ips);
|
||||
if (maxnode == -1) {
|
||||
maxnode = i;
|
||||
maxnum = num;
|
||||
} else {
|
||||
if (num > maxnum) {
|
||||
maxnode = i;
|
||||
maxnum = num;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (minnode == -1) {
|
||||
minnode = i;
|
||||
minnum = num;
|
||||
} else {
|
||||
if (num < minnum) {
|
||||
if (minnode == -1) {
|
||||
minnode = i;
|
||||
minnum = num;
|
||||
} else {
|
||||
if (num < minnum) {
|
||||
minnode = i;
|
||||
minnum = num;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (maxnode == -1) {
|
||||
DEBUG(0,(__location__ " Could not find maxnode\n"));
|
||||
return -1;
|
||||
}
|
||||
if (maxnode == -1) {
|
||||
DEBUG(0,(__location__ " Could not find maxnode. May not be able to server ip '%s'\n", inet_ntoa(tmp_ip->sin.sin_addr)));
|
||||
continue;
|
||||
}
|
||||
|
||||
/* if the spread between the smallest and largest coverage by
|
||||
a node is >=2 we steal one of the ips from the node with the
|
||||
most coverage to even things out a bit
|
||||
*/
|
||||
if (maxnum > minnum+1) {
|
||||
/* mark one of maxnode's vnn's as unassigned and try
|
||||
again
|
||||
/* if the spread between the smallest and largest coverage by
|
||||
a node is >=2 we steal one of the ips from the node with
|
||||
most coverage to even things out a bit.
|
||||
try to do this at most 5 times since we dont want to spend
|
||||
too much time balancing the ip coverage.
|
||||
*/
|
||||
for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
|
||||
if (vnn->pnn == maxnode) {
|
||||
vnn->pnn = -1;
|
||||
goto try_again;
|
||||
if ( (maxnum > minnum+1)
|
||||
&& (retries < 5) ){
|
||||
struct ctdb_public_ip_list *tmp;
|
||||
|
||||
/* mark one of maxnode's vnn's as unassigned and try
|
||||
again
|
||||
*/
|
||||
for (tmp=all_ips;tmp;tmp=tmp->next) {
|
||||
if (tmp->pnn == maxnode) {
|
||||
tmp->pnn = -1;
|
||||
retries++;
|
||||
goto try_again;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* at this point ->pnn is the node which will own each IP */
|
||||
/* at this point ->pnn is the node which will own each IP
|
||||
or -1 if there is no node that can cover this ip
|
||||
*/
|
||||
|
||||
/* now tell all nodes to delete any alias that they should not
|
||||
have. This will be a NOOP on nodes that don't currently
|
||||
@ -658,16 +791,16 @@ try_again:
|
||||
continue;
|
||||
}
|
||||
|
||||
for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
|
||||
if (vnn->pnn == nodemap->nodes[i].pnn) {
|
||||
for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
|
||||
if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
|
||||
/* This node should be serving this
|
||||
vnn so dont tell it to release the ip
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
ip.pnn = vnn->pnn;
|
||||
ip.pnn = tmp_ip->pnn;
|
||||
ip.sin.sin_family = AF_INET;
|
||||
inet_aton(vnn->public_address, &ip.sin.sin_addr);
|
||||
ip.sin.sin_addr = tmp_ip->sin.sin_addr;
|
||||
|
||||
ret = ctdb_ctrl_release_ip(ctdb, TAKEOVER_TIMEOUT(),
|
||||
nodemap->nodes[i].pnn,
|
||||
@ -675,33 +808,37 @@ try_again:
|
||||
if (ret != 0) {
|
||||
DEBUG(0,("Failed to tell vnn %u to release IP %s\n",
|
||||
nodemap->nodes[i].pnn,
|
||||
vnn->public_address));
|
||||
inet_ntoa(tmp_ip->sin.sin_addr)));
|
||||
talloc_free(tmp_ctx);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* tell all nodes to get their own IPs */
|
||||
for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
|
||||
if (vnn->pnn == -1) {
|
||||
for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
|
||||
if (tmp_ip->pnn == -1) {
|
||||
/* this IP won't be taken over */
|
||||
continue;
|
||||
}
|
||||
ip.pnn = vnn->pnn;
|
||||
ip.pnn = tmp_ip->pnn;
|
||||
ip.sin.sin_family = AF_INET;
|
||||
inet_aton(vnn->public_address, &ip.sin.sin_addr);
|
||||
ip.sin.sin_addr = tmp_ip->sin.sin_addr;
|
||||
|
||||
ret = ctdb_ctrl_takeover_ip(ctdb, TAKEOVER_TIMEOUT(),
|
||||
vnn->pnn,
|
||||
tmp_ip->pnn,
|
||||
&ip);
|
||||
if (ret != 0) {
|
||||
DEBUG(0,("Failed asking vnn %u to take over IP %s\n",
|
||||
vnn->pnn,
|
||||
vnn->public_address));
|
||||
tmp_ip->pnn,
|
||||
inet_ntoa(tmp_ip->sin.sin_addr)));
|
||||
talloc_free(tmp_ctx);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
talloc_free(tmp_ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user