1
0
mirror of https://github.com/samba-team/samba.git synced 2024-12-27 03:21:53 +03:00

Merge branch 'master' of 10.1.1.27:/shared/ctdb/ctdb-master

(This used to be ctdb commit 518945e59e2e48f07fcc0955f3aa81cd0d946aea)
This commit is contained in:
Ronnie Sahlberg 2011-07-29 09:04:01 +10:00
commit a17ae8a8be
5 changed files with 958 additions and 116 deletions

View File

@ -70,6 +70,7 @@ TEST_BINS=tests/bin/ctdb_bench tests/bin/ctdb_fetch tests/bin/ctdb_fetch_one \
tests/bin/ctdb_fetch_lock_once tests/bin/ctdb_store \
tests/bin/ctdb_randrec tests/bin/ctdb_persistent \
tests/bin/ctdb_traverse tests/bin/rb_test tests/bin/ctdb_transaction \
tests/bin/ctdb_takeover_tests
@INFINIBAND_BINS@
BINS = bin/ctdb @CTDB_SCSI_IO@ bin/smnotify bin/ping_pong bin/ltdbtool
@ -190,6 +191,12 @@ tests/bin/ctdb_transaction: $(CTDB_CLIENT_OBJ) tests/src/ctdb_transaction.o
@echo Linking $@
@$(CC) $(CFLAGS) -o $@ tests/src/ctdb_transaction.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
CTDB_TAKEOVER_OBJ = $(CTDB_SERVER_OBJ:server/ctdbd.o=)
tests/bin/ctdb_takeover_tests: $(CTDB_TAKEOVER_OBJ) tests/src/ctdb_takeover_tests.o
@echo Linking $@
@$(CC) $(CFLAGS) -o $@ tests/src/ctdb_takeover_tests.o $(CTDB_TAKEOVER_OBJ) $(LIB_FLAGS)
tests/bin/ibwrapper_test: $(CTDB_CLIENT_OBJ) ib/ibwrapper_test.o
@echo Linking $@
@$(CC) $(CFLAGS) -o $@ ib/ibwrapper_test.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)

View File

@ -120,6 +120,7 @@ struct ctdb_tunable {
uint32_t stat_history_interval;
uint32_t deferred_attach_timeout;
uint32_t vacuum_fast_path_count;
uint32_t lcp2_public_ip_assignment;
};
/*
@ -1410,4 +1411,37 @@ int32_t ctdb_local_schedule_for_deletion(struct ctdb_db_context *ctdb_db,
struct ctdb_ltdb_header *ctdb_header_from_record_handle(struct ctdb_record_handle *h);
/* For unit testing ctdb_transaction.c. */
struct ctdb_public_ip_list {
struct ctdb_public_ip_list *next;
uint32_t pnn;
ctdb_sock_addr addr;
};
uint32_t ip_distance(ctdb_sock_addr *ip1, ctdb_sock_addr *ip2);
uint32_t ip_distance_2_sum(ctdb_sock_addr *ip,
struct ctdb_public_ip_list *ips,
int pnn);
uint32_t lcp2_imbalance(struct ctdb_public_ip_list * all_ips, int pnn);
void lcp2_init(struct ctdb_context * tmp_ctx,
struct ctdb_node_map * nodemap,
uint32_t mask,
struct ctdb_public_ip_list *all_ips,
uint32_t **lcp2_imbalances,
bool **newly_healthy);
void lcp2_allocate_unassigned(struct ctdb_context *ctdb,
struct ctdb_node_map *nodemap,
uint32_t mask,
struct ctdb_public_ip_list *all_ips,
uint32_t *lcp2_imbalances);
bool lcp2_failback(struct ctdb_context *ctdb,
struct ctdb_node_map *nodemap,
uint32_t mask,
struct ctdb_public_ip_list *all_ips,
uint32_t *lcp2_imbalances,
bool *newly_healthy);
void ctdb_takeover_run_core(struct ctdb_context *ctdb,
struct ctdb_node_map *nodemap,
struct ctdb_public_ip_list **all_ips_p);
#endif

View File

@ -3,6 +3,7 @@
Copyright (C) Ronnie Sahlberg 2007
Copyright (C) Andrew Tridgell 2007
Copyright (C) Martin Schwenke 2011
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -1058,13 +1059,6 @@ int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
return 0;
}
struct ctdb_public_ip_list {
struct ctdb_public_ip_list *next;
uint32_t pnn;
ctdb_sock_addr addr;
};
/* Given a physical node, return the number of
public addresses that is currently assigned to this node.
*/
@ -1256,111 +1250,118 @@ create_merged_ip_list(struct ctdb_context *ctdb)
}
/*
make any IP alias changes for public addresses that are necessary
* This is the length of the longtest common prefix between the IPs.
* It is calculated by XOR-ing the 2 IPs together and counting the
* number of leading zeroes. The implementation means that all
* addresses end up being 128 bits long.
* Not static, so we can easily link it into a unit test.
*
* FIXME? Should we consider IPv4 and IPv6 separately given that the
* 12 bytes of 0 prefix padding will hurt the algorithm if there are
* lots of nodes and IP addresses?
*/
int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
uint32_t ip_distance(ctdb_sock_addr *ip1, ctdb_sock_addr *ip2)
{
int i, num_healthy, retries, num_ips;
struct ctdb_public_ip ip;
struct ctdb_public_ipv4 ipv4;
uint32_t mask, *nodes;
struct ctdb_public_ip_list *all_ips, *tmp_ip;
int maxnode, maxnum=0, minnode, minnum=0, num;
TDB_DATA data;
struct timeval timeout;
struct client_async_data *async_data;
struct ctdb_client_control_state *state;
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
uint32_t ip1_k[IP_KEYLEN];
uint32_t *t;
int i;
uint32_t x;
/*
* ip failover is completely disabled, just send out the
* ipreallocated event.
*/
if (ctdb->tunable.disable_ip_failover != 0) {
goto ipreallocated;
}
uint32_t distance = 0;
ZERO_STRUCT(ip);
/* Count how many completely healthy nodes we have */
num_healthy = 0;
for (i=0;i<nodemap->num;i++) {
if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
num_healthy++;
}
}
if (num_healthy > 0) {
/* We have healthy nodes, so only consider them for
serving public addresses
*/
mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
memcpy(ip1_k, ip_key(ip1), sizeof(ip1_k));
t = ip_key(ip2);
for (i=0; i<IP_KEYLEN; i++) {
x = ip1_k[i] ^ t[i];
if (x == 0) {
distance += 32;
} else {
/* We didnt have any completely healthy nodes so
use "disabled" nodes as a fallback
/* Count number of leading zeroes.
* FIXME? This could be optimised...
*/
mask = NODE_FLAGS_INACTIVE;
while ((x & (1 << 31)) == 0) {
x <<= 1;
distance += 1;
}
/* since nodes only know about those public addresses that
can be served by that particular node, no single node has
a full list of all public addresses that exist in the cluster.
Walk over all node structures and create a merged list of
all public addresses that exist in the cluster.
keep the tree of ips around as ctdb->ip_tree
*/
all_ips = create_merged_ip_list(ctdb);
/* Count how many ips we have */
num_ips = 0;
for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
num_ips++;
}
/* If we want deterministic ip allocations, i.e. that the ip addresses
will always be allocated the same way for a specific set of
available/unavailable nodes.
*/
if (1 == ctdb->tunable.deterministic_public_ips) {
DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
tmp_ip->pnn = i%nodemap->num;
}
}
return distance;
}
/* mark all public addresses with a masked node as being served by
node -1
/* Calculate the IP distance for the given IP relative to IPs on the
given node. The ips argument is generally the all_ips variable
used in the main part of the algorithm.
* Not static, so we can easily link it into a unit test.
*/
for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
if (tmp_ip->pnn == -1) {
uint32_t ip_distance_2_sum(ctdb_sock_addr *ip,
struct ctdb_public_ip_list *ips,
int pnn)
{
struct ctdb_public_ip_list *t;
uint32_t d;
uint32_t sum = 0;
for (t=ips; t != NULL; t=t->next) {
if (t->pnn != pnn) {
continue;
}
if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
tmp_ip->pnn = -1;
}
}
/* verify that the assigned nodes can serve that public ip
and set it to -1 if not
*/
for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
if (tmp_ip->pnn == -1) {
/* Optimisation: We never calculate the distance
* between an address and itself. This allows us to
* calculate the effect of removing an address from a
* node by simply calculating the distance between
* that address and all of the exitsing addresses.
* Moreover, we assume that we're only ever dealing
* with addresses from all_ips so we can identify an
* address via a pointer rather than doing a more
* expensive address comparison. */
if (&(t->addr) == ip) {
continue;
}
if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
/* this node can not serve this ip. */
tmp_ip->pnn = -1;
}
d = ip_distance(ip, &(t->addr));
sum += d * d; /* Cheaper than pulling in math.h :-) */
}
return sum;
}
/* now we must redistribute all public addresses with takeover node
-1 among the nodes available
/* Return the LCP2 imbalance metric for addresses currently assigned
to the given node.
* Not static, so we can easily link it into a unit test.
*/
retries = 0;
try_again:
uint32_t lcp2_imbalance(struct ctdb_public_ip_list * all_ips, int pnn)
{
struct ctdb_public_ip_list *t;
uint32_t imbalance = 0;
for (t=all_ips; t!=NULL; t=t->next) {
if (t->pnn != pnn) {
continue;
}
/* Pass the rest of the IPs rather than the whole
all_ips input list.
*/
imbalance += ip_distance_2_sum(&(t->addr), t->next, pnn);
}
return imbalance;
}
/* Allocate any unassigned IPs just by looping through the IPs and
* finding the best node for each.
* Not static, so we can easily link it into a unit test.
*/
void basic_allocate_unassigned(struct ctdb_context *ctdb,
struct ctdb_node_map *nodemap,
uint32_t mask,
struct ctdb_public_ip_list *all_ips)
{
struct ctdb_public_ip_list *tmp_ip;
/* loop over all ip's and find a physical node to cover for
each unassigned ip.
*/
@ -1372,26 +1373,26 @@ try_again:
}
}
}
}
/* If we dont want ips to fail back after a node becomes healthy
again, we wont even try to reallocat the ip addresses so that
they are evenly spread out.
This can NOT be used at the same time as DeterministicIPs !
/* Basic non-deterministic rebalancing algorithm.
* Not static, so we can easily link it into a unit test.
*/
if (1 == ctdb->tunable.no_ip_failback) {
if (1 == ctdb->tunable.deterministic_public_ips) {
DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
}
goto finished;
}
bool basic_failback(struct ctdb_context *ctdb,
struct ctdb_node_map *nodemap,
uint32_t mask,
struct ctdb_public_ip_list *all_ips,
int num_ips,
int *retries)
{
int i;
int maxnode, maxnum=0, minnode, minnum=0, num;
struct ctdb_public_ip_list *tmp_ip;
/* now, try to make sure the ip adresses are evenly distributed
across the node.
for each ip address, loop over all nodes that can serve this
ip and make sure that the difference between the node
serving the most and the node serving the least ip's are not greater
than 1.
/* for each ip address, loop over all nodes that can serve
this ip and make sure that the difference between the node
serving the most and the node serving the least ip's are
not greater than 1.
*/
for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
if (tmp_ip->pnn == -1) {
@ -1455,7 +1456,7 @@ try_again:
want to spend too much time balancing the ip coverage.
*/
if ( (maxnum > minnum+1)
&& (retries < (num_ips + 5)) ){
&& (*retries < (num_ips + 5)) ){
struct ctdb_public_ip_list *tmp;
/* mark one of maxnode's vnn's as unassigned and try
@ -1464,13 +1465,402 @@ try_again:
for (tmp=all_ips;tmp;tmp=tmp->next) {
if (tmp->pnn == maxnode) {
tmp->pnn = -1;
retries++;
goto try_again;
(*retries)++;
return true;
}
}
}
}
return false;
}
/* Do necessary LCP2 initialisation. Bury it in a function here so
* that we can unit test it.
* Not static, so we can easily link it into a unit test.
*/
void lcp2_init(struct ctdb_context * tmp_ctx,
struct ctdb_node_map * nodemap,
uint32_t mask,
struct ctdb_public_ip_list *all_ips,
uint32_t **lcp2_imbalances,
bool **newly_healthy)
{
int i;
struct ctdb_public_ip_list *tmp_ip;
*newly_healthy = talloc_array(tmp_ctx, bool, nodemap->num);
CTDB_NO_MEMORY_FATAL(tmp_ctx, *newly_healthy);
*lcp2_imbalances = talloc_array(tmp_ctx, uint32_t, nodemap->num);
CTDB_NO_MEMORY_FATAL(tmp_ctx, *lcp2_imbalances);
for (i=0;i<nodemap->num;i++) {
(*lcp2_imbalances)[i] = lcp2_imbalance(all_ips, i);
/* First step: is the node "healthy"? */
(*newly_healthy)[i] = ! (bool)(nodemap->nodes[i].flags & mask);
}
/* 2nd step: if a ndoe has IPs assigned then it must have been
* healthy before, so we remove it from consideration... */
for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
if (tmp_ip->pnn != -1) {
(*newly_healthy)[tmp_ip->pnn] = false;
}
}
}
/* Allocate any unassigned addresses using the LCP2 algorithm to find
* the IP/node combination that will cost the least.
* Not static, so we can easily link it into a unit test.
*/
void lcp2_allocate_unassigned(struct ctdb_context *ctdb,
struct ctdb_node_map *nodemap,
uint32_t mask,
struct ctdb_public_ip_list *all_ips,
uint32_t *lcp2_imbalances)
{
struct ctdb_public_ip_list *tmp_ip;
int dstnode;
int minnode;
uint32_t mindsum, dstdsum, dstimbl, minimbl;
struct ctdb_public_ip_list *minip;
bool should_loop = true;
bool have_unassigned = true;
while (have_unassigned && should_loop) {
should_loop = false;
DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES (UNASSIGNED)\n"));
minnode = -1;
mindsum = 0;
minip = NULL;
/* loop over each unassigned ip. */
for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
if (tmp_ip->pnn != -1) {
continue;
}
for (dstnode=0; dstnode < nodemap->num; dstnode++) {
/* only check nodes that can actually serve this ip */
if (can_node_serve_ip(ctdb, dstnode, tmp_ip)) {
/* no it couldnt so skip to the next node */
continue;
}
if (nodemap->nodes[dstnode].flags & mask) {
continue;
}
dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode);
dstimbl = lcp2_imbalances[dstnode] + dstdsum;
DEBUG(DEBUG_DEBUG,(" %s -> %d [+%d]\n",
ctdb_addr_to_str(&(tmp_ip->addr)),
dstnode,
dstimbl - lcp2_imbalances[dstnode]));
if ((minnode == -1) || (dstdsum < mindsum)) {
minnode = dstnode;
minimbl = dstimbl;
mindsum = dstdsum;
minip = tmp_ip;
should_loop = true;
}
}
}
DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
/* If we found one then assign it to the given node. */
if (minnode != -1) {
minip->pnn = minnode;
lcp2_imbalances[minnode] = minimbl;
DEBUG(DEBUG_INFO,(" %s -> %d [+%d]\n",
ctdb_addr_to_str(&(minip->addr)),
minnode,
mindsum));
}
/* There might be a better way but at least this is clear. */
have_unassigned = false;
for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
if (tmp_ip->pnn == -1) {
have_unassigned = true;
}
}
}
/* We know if we have an unassigned addresses so we might as
* well optimise.
*/
if (have_unassigned) {
for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
if (tmp_ip->pnn == -1) {
DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
ctdb_addr_to_str(&tmp_ip->addr)));
}
}
}
}
/* LCP2 algorithm for rebalancing the cluster. This finds the source
* node with the highest LCP2 imbalance, and then determines the best
* IP/destination node combination to move from the source node.
*
* Not static, so we can easily link it into a unit test.
*/
bool lcp2_failback(struct ctdb_context *ctdb,
struct ctdb_node_map *nodemap,
uint32_t mask,
struct ctdb_public_ip_list *all_ips,
uint32_t *lcp2_imbalances,
bool *newly_healthy)
{
int srcnode, dstnode, mindstnode, i, num_newly_healthy;
uint32_t srcimbl, srcdsum, maximbl, dstimbl, dstdsum;
uint32_t minsrcimbl, mindstimbl, b;
struct ctdb_public_ip_list *minip;
struct ctdb_public_ip_list *tmp_ip;
/* It is only worth continuing if we have suitable target
* nodes to transfer IPs to. This check is much cheaper than
* continuing on...
*/
num_newly_healthy = 0;
for (i = 0; i < nodemap->num; i++) {
if (newly_healthy[i]) {
num_newly_healthy++;
}
}
if (num_newly_healthy == 0) {
return false;
}
/* Get the node with the highest imbalance metric. */
srcnode = -1;
maximbl = 0;
for (i=0; i < nodemap->num; i++) {
b = lcp2_imbalances[i];
if ((srcnode == -1) || (b > maximbl)) {
srcnode = i;
maximbl = b;
}
}
/* This means that all nodes had 0 or 1 addresses, so can't be
* imbalanced.
*/
if (maximbl == 0) {
return false;
}
/* Find an IP and destination node that best reduces imbalance. */
minip = NULL;
minsrcimbl = 0;
mindstnode = -1;
mindstimbl = 0;
DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES FROM %d [%d]\n", srcnode, maximbl));
for (tmp_ip=all_ips; tmp_ip; tmp_ip=tmp_ip->next) {
/* Only consider addresses on srcnode. */
if (tmp_ip->pnn != srcnode) {
continue;
}
/* What is this IP address costing the source node? */
srcdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, srcnode);
srcimbl = maximbl - srcdsum;
/* Consider this IP address would cost each potential
* destination node. Destination nodes are limited to
* those that are newly healthy, since we don't want
* to do gratuitous failover of IPs just to make minor
* balance improvements.
*/
for (dstnode=0; dstnode < nodemap->num; dstnode++) {
if (! newly_healthy[dstnode]) {
continue;
}
/* only check nodes that can actually serve this ip */
if (can_node_serve_ip(ctdb, dstnode, tmp_ip)) {
/* no it couldnt so skip to the next node */
continue;
}
dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode);
dstimbl = lcp2_imbalances[dstnode] + dstdsum;
DEBUG(DEBUG_DEBUG,(" %d [%d] -> %s -> %d [+%d]\n",
srcnode, srcimbl - lcp2_imbalances[srcnode],
ctdb_addr_to_str(&(tmp_ip->addr)),
dstnode, dstimbl - lcp2_imbalances[dstnode]));
if ((dstimbl < maximbl) && (dstdsum < srcdsum) && \
((mindstnode == -1) || \
((srcimbl + dstimbl) < (minsrcimbl + mindstimbl)))) {
minip = tmp_ip;
minsrcimbl = srcimbl;
mindstnode = dstnode;
mindstimbl = dstimbl;
}
}
}
DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n"));
if (mindstnode != -1) {
/* We found a move that makes things better... */
DEBUG(DEBUG_INFO,("%d [%d] -> %s -> %d [+%d]\n",
srcnode, minsrcimbl - lcp2_imbalances[srcnode],
ctdb_addr_to_str(&(minip->addr)),
mindstnode, mindstimbl - lcp2_imbalances[mindstnode]));
lcp2_imbalances[srcnode] = srcimbl;
lcp2_imbalances[mindstnode] = mindstimbl;
minip->pnn = mindstnode;
return true;
}
return false;
}
/* The calculation part of the IP allocation algorithm.
* Not static, so we can easily link it into a unit test.
*/
void ctdb_takeover_run_core(struct ctdb_context *ctdb,
struct ctdb_node_map *nodemap,
struct ctdb_public_ip_list **all_ips_p)
{
int i, num_healthy, retries, num_ips;
uint32_t mask;
struct ctdb_public_ip_list *all_ips, *tmp_ip;
uint32_t *lcp2_imbalances;
bool *newly_healthy;
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
/* Count how many completely healthy nodes we have */
num_healthy = 0;
for (i=0;i<nodemap->num;i++) {
if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
num_healthy++;
}
}
if (num_healthy > 0) {
/* We have healthy nodes, so only consider them for
serving public addresses
*/
mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
} else {
/* We didnt have any completely healthy nodes so
use "disabled" nodes as a fallback
*/
mask = NODE_FLAGS_INACTIVE;
}
/* since nodes only know about those public addresses that
can be served by that particular node, no single node has
a full list of all public addresses that exist in the cluster.
Walk over all node structures and create a merged list of
all public addresses that exist in the cluster.
keep the tree of ips around as ctdb->ip_tree
*/
all_ips = create_merged_ip_list(ctdb);
*all_ips_p = all_ips; /* minimal code changes */
/* Count how many ips we have */
num_ips = 0;
for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
num_ips++;
}
/* If we want deterministic ip allocations, i.e. that the ip addresses
will always be allocated the same way for a specific set of
available/unavailable nodes.
*/
if (1 == ctdb->tunable.deterministic_public_ips) {
DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
tmp_ip->pnn = i%nodemap->num;
}
}
/* mark all public addresses with a masked node as being served by
node -1
*/
for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
if (tmp_ip->pnn == -1) {
continue;
}
if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
tmp_ip->pnn = -1;
}
}
/* verify that the assigned nodes can serve that public ip
and set it to -1 if not
*/
for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
if (tmp_ip->pnn == -1) {
continue;
}
if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
/* this node can not serve this ip. */
tmp_ip->pnn = -1;
}
}
if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
lcp2_init(tmp_ctx, nodemap, mask, all_ips, &lcp2_imbalances, &newly_healthy);
}
/* now we must redistribute all public addresses with takeover node
-1 among the nodes available
*/
retries = 0;
try_again:
if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
lcp2_allocate_unassigned(ctdb, nodemap, mask, all_ips, lcp2_imbalances);
} else {
basic_allocate_unassigned(ctdb, nodemap, mask, all_ips);
}
/* If we dont want ips to fail back after a node becomes healthy
again, we wont even try to reallocat the ip addresses so that
they are evenly spread out.
This can NOT be used at the same time as DeterministicIPs !
*/
if (1 == ctdb->tunable.no_ip_failback) {
if (1 == ctdb->tunable.deterministic_public_ips) {
DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
}
goto finished;
}
/* now, try to make sure the ip adresses are evenly distributed
across the node.
*/
if (1 == ctdb->tunable.lcp2_public_ip_assignment) {
if (lcp2_failback(ctdb, nodemap, mask, all_ips, lcp2_imbalances, newly_healthy)) {
goto try_again;
}
} else {
if (basic_failback(ctdb, nodemap, mask, all_ips, num_ips, &retries)) {
goto try_again;
}
}
/* finished distributing the public addresses, now just send the
info out to the nodes
@ -1481,6 +1871,38 @@ finished:
or -1 if there is no node that can cover this ip
*/
return;
}
/*
make any IP alias changes for public addresses that are necessary
*/
int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
{
int i;
struct ctdb_public_ip ip;
struct ctdb_public_ipv4 ipv4;
uint32_t *nodes;
struct ctdb_public_ip_list *all_ips, *tmp_ip;
TDB_DATA data;
struct timeval timeout;
struct client_async_data *async_data;
struct ctdb_client_control_state *state;
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
/*
* ip failover is completely disabled, just send out the
* ipreallocated event.
*/
if (ctdb->tunable.disable_ip_failover != 0) {
goto ipreallocated;
}
ZERO_STRUCT(ip);
/* Do the IP reassignment calculations */
ctdb_takeover_run_core(ctdb, nodemap, &all_ips);
/* now tell all nodes to delete any alias that they should not
have. This will be a NOOP on nodes that don't currently
hold the given alias */

View File

@ -46,6 +46,7 @@ static const struct {
{ "RerecoveryTimeout", 10, offsetof(struct ctdb_tunable, rerecovery_timeout) },
{ "EnableBans", 1, offsetof(struct ctdb_tunable, enable_bans) },
{ "DeterministicIPs", 1, offsetof(struct ctdb_tunable, deterministic_public_ips) },
{ "LCP2PublicIPs", 0, offsetof(struct ctdb_tunable, lcp2_public_ip_assignment) },
{ "ReclockPingPeriod", 60, offsetof(struct ctdb_tunable, reclock_ping_period) },
{ "NoIPFailback", 0, offsetof(struct ctdb_tunable, no_ip_failback) },
{ "DisableIPFailover", 0, offsetof(struct ctdb_tunable, disable_ip_failover) },

View File

@ -0,0 +1,378 @@
/*
Tests for ctdb_takeover.c
Copyright (C) Martin Schwenke 2011
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "../include/ctdb_private.h"
/*
* Need these, since they're defined in ctdbd.c but we can't link
* that.
*/
int script_log_level;
bool fast_start;
void ctdb_load_nodes_file(struct ctdb_context *ctdb) {}
/* Format of each line is "IP pnn" - the separator has to be at least
* 1 space (not a tab or whatever - a space!).
*/
static struct ctdb_public_ip_list *
read_ctdb_public_ip_list(TALLOC_CTX *ctx)
{
char line[1024];
ctdb_sock_addr addr;
char *t;
int pnn;
struct ctdb_public_ip_list *last = NULL;
struct ctdb_public_ip_list *ret = NULL;
while (fgets(line, sizeof(line), stdin) != NULL) {
if ((t = strchr(line, ' ')) != NULL) {
/* Make line contain just the address */
*t = '\0';
/* Point to PNN or leading whitespace... */
t++;
pnn = (int) strtol(t, (char **) NULL, 10);
} else {
/* Assume just an IP address, default to PNN -1 */
if ((t = strchr(line, '\n')) != NULL) {
*t = '\0';
}
pnn = -1;
}
if (parse_ip(line, NULL, 0, &addr)) {
if (last == NULL) {
last = talloc(ctx, struct ctdb_public_ip_list);
} else {
last->next = talloc(ctx, struct ctdb_public_ip_list);
last = last->next;
}
last->next = NULL;
last->pnn = pnn;
memcpy(&(last->addr), &addr, sizeof(addr));
if (ret == NULL) {
ret = last;
}
} else {
DEBUG(DEBUG_ERR, (__location__ " ERROR, bad address :%s\n", line));
}
}
return ret;
}
void print_ctdb_public_ip_list(struct ctdb_public_ip_list * ips)
{
while (ips) {
printf("%s %d\n", ctdb_addr_to_str(&(ips->addr)), ips->pnn);
ips = ips->next;
}
}
/* Read some IPs from stdin, 1 per line, parse them and then print
* them back out. */
void ctdb_test_read_ctdb_public_ip_list(void)
{
struct ctdb_public_ip_list *l;
TALLOC_CTX *tmp_ctx = talloc_new(NULL);
l = read_ctdb_public_ip_list(tmp_ctx);
print_ctdb_public_ip_list(l);
talloc_free(tmp_ctx);
}
/* Read 2 IPs from stdin, calculate the IP distance and print it. */
void ctdb_test_ip_distance(void)
{
struct ctdb_public_ip_list *l;
uint32_t distance;
TALLOC_CTX *tmp_ctx = talloc_new(NULL);
l = read_ctdb_public_ip_list(tmp_ctx);
if (l && l->next) {
distance = ip_distance(&(l->addr), &(l->next->addr));
printf ("%lu\n", (unsigned long) distance);
}
talloc_free(tmp_ctx);
}
/* Read some IPs from stdin, calculate the sum of the squares of the
* IP distances between the 1st argument and those read that are on
* the given node. The given IP must one of the ones in the list. */
void ctdb_test_ip_distance_2_sum(const char ip[], int pnn)
{
struct ctdb_public_ip_list *l;
struct ctdb_public_ip_list *t;
ctdb_sock_addr addr;
uint32_t distance;
TALLOC_CTX *tmp_ctx = talloc_new(NULL);
l = read_ctdb_public_ip_list(tmp_ctx);
if (l && parse_ip(ip, NULL, 0, &addr)) {
/* find the entry for the specified IP */
for (t=l; t!=NULL; t=t->next) {
if (ctdb_same_ip(&(t->addr), &addr)) {
break;
}
}
if (t == NULL) {
fprintf(stderr, "IP NOT PRESENT IN LIST");
exit(1);
}
distance = ip_distance_2_sum(&(t->addr), l, pnn);
printf ("%lu\n", (unsigned long) distance);
} else {
fprintf(stderr, "BAD INPUT");
exit(1);
}
talloc_free(tmp_ctx);
}
/* Read some IPs from stdin, calculate the sume of the squares of the
* IP distances between the first and the rest, and print it. */
void ctdb_test_lcp2_imbalance(int pnn)
{
struct ctdb_public_ip_list *l;
uint32_t imbalance;
TALLOC_CTX *tmp_ctx = talloc_new(NULL);
l = read_ctdb_public_ip_list(tmp_ctx);
imbalance = lcp2_imbalance(l, pnn);
printf ("%lu\n", (unsigned long) imbalance);
talloc_free(tmp_ctx);
}
void ctdb_test_init(const char nodestates[],
struct ctdb_context **ctdb,
struct ctdb_public_ip_list **all_ips,
struct ctdb_node_map **nodemap)
{
struct ctdb_public_ip_list *t;
struct ctdb_all_public_ips *available_public_ips;
int i, numips, numnodes;
numnodes = strlen(nodestates);
*ctdb = talloc_zero(NULL, struct ctdb_context);
/* Fake things up... */
(*ctdb)->num_nodes = numnodes;
(*ctdb)->tunable.deterministic_public_ips = 0;
(*ctdb)->tunable.disable_ip_failover = 0;
(*ctdb)->tunable.no_ip_failback = 0;
if (getenv("CTDB_LCP2")) {
if (strcmp(getenv("CTDB_LCP2"), "yes") == 0) {
(*ctdb)->tunable.lcp2_public_ip_assignment = 1;
} else {
(*ctdb)->tunable.lcp2_public_ip_assignment = 0;
}
}
*nodemap = talloc_array(*ctdb, struct ctdb_node_map, numnodes);
(*nodemap)->num = numnodes;
for (i=0; i < numnodes; i++) {
(*nodemap)->nodes[i].pnn = i;
(*nodemap)->nodes[i].flags = nodestates[i] - '0';
/* *nodemap->nodes[i].sockaddr is uninitialised */
}
*all_ips = read_ctdb_public_ip_list(*ctdb);
numips = 0;
for (t = *all_ips; t != NULL; t = t->next) {
numips++;
}
available_public_ips = talloc_array(*ctdb, struct ctdb_all_public_ips, numips); // FIXME: bogus size, overkill
available_public_ips->num = numips;
for (t = *all_ips, i=0; t != NULL && i < numips ; t = t->next, i++) {
available_public_ips->ips[i].pnn = t->pnn;
memcpy(&(available_public_ips->ips[i].addr), &(t->addr), sizeof(t->addr));
}
(*ctdb)->nodes = talloc_array(*ctdb, struct ctdb_node *, numnodes); // FIXME: bogus size, overkill
/* Setup both nodemap and ctdb->nodes. Mark all nodes as
* healthy - change this later. */
for (i=0; i < numnodes; i++) {
uint32_t flags = nodestates[i] - '0' ? NODE_FLAGS_UNHEALTHY : 0;
(*nodemap)->nodes[i].pnn = i;
(*nodemap)->nodes[i].flags = flags;
/* nodemap->nodes[i].sockaddr is uninitialised */
(*ctdb)->nodes[i] = talloc(*ctdb, struct ctdb_node);
(*ctdb)->nodes[i]->pnn = i;
(*ctdb)->nodes[i]->flags = flags;
(*ctdb)->nodes[i]->available_public_ips = available_public_ips;
(*ctdb)->nodes[i]->known_public_ips = available_public_ips;
}
}
/* IP layout is read from stdin. */
void ctdb_test_lcp2_allocate_unassigned(const char nodestates[])
{
struct ctdb_context *ctdb;
struct ctdb_public_ip_list *all_ips;
struct ctdb_node_map *nodemap;
uint32_t *lcp2_imbalances;
bool *newly_healthy;
ctdb_test_init(nodestates, &ctdb, &all_ips, &nodemap);
lcp2_init(ctdb, nodemap,
NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED,
all_ips, &lcp2_imbalances, &newly_healthy);
lcp2_allocate_unassigned(ctdb, nodemap,
NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED,
all_ips, lcp2_imbalances);
print_ctdb_public_ip_list(all_ips);
talloc_free(ctdb);
}
/* IP layout is read from stdin. */
void ctdb_test_lcp2_failback(const char nodestates[])
{
struct ctdb_context *ctdb;
struct ctdb_public_ip_list *all_ips;
struct ctdb_node_map *nodemap;
uint32_t *lcp2_imbalances;
bool *newly_healthy;
ctdb_test_init(nodestates, &ctdb, &all_ips, &nodemap);
lcp2_init(ctdb, nodemap,
NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED,
all_ips, &lcp2_imbalances, &newly_healthy);
lcp2_failback(ctdb, nodemap,
NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED,
all_ips, lcp2_imbalances, newly_healthy);
print_ctdb_public_ip_list(all_ips);
talloc_free(ctdb);
}
/* IP layout is read from stdin. */
void ctdb_test_lcp2_failback_loop(const char nodestates[])
{
struct ctdb_context *ctdb;
struct ctdb_public_ip_list *all_ips;
struct ctdb_node_map *nodemap;
uint32_t *lcp2_imbalances;
bool *newly_healthy;
ctdb_test_init(nodestates, &ctdb, &all_ips, &nodemap);
lcp2_init(ctdb, nodemap,
NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED,
all_ips, &lcp2_imbalances, &newly_healthy);
try_again:
if (lcp2_failback(ctdb, nodemap,
NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED,
all_ips, lcp2_imbalances, newly_healthy)) {
goto try_again;
}
print_ctdb_public_ip_list(all_ips);
talloc_free(ctdb);
}
/* IP layout is read from stdin. */
void ctdb_test_ctdb_takeover_run_core(const char nodestates[])
{
struct ctdb_context *ctdb;
struct ctdb_public_ip_list *all_ips;
struct ctdb_node_map *nodemap;
ctdb_test_init(nodestates, &ctdb, &all_ips, &nodemap);
ctdb_takeover_run_core(ctdb, nodemap, &all_ips);
print_ctdb_public_ip_list(all_ips);
talloc_free(ctdb);
}
void usage(void)
{
fprintf(stderr, "usage: ctdb_takeover_tests <op>\n");
exit(1);
}
int main(int argc, const char *argv[])
{
LogLevel = DEBUG_DEBUG;
if (getenv("CTDB_TEST_LOGLEVEL")) {
LogLevel = atoi(getenv("CTDB_TEST_LOGLEVEL"));
}
if (argc < 2) {
usage();
}
if (strcmp(argv[1], "ip_list") == 0) {
ctdb_test_read_ctdb_public_ip_list();
} else if (strcmp(argv[1], "ip_distance") == 0) {
ctdb_test_ip_distance();
} else if (argc == 4 && strcmp(argv[1], "ip_distance_2_sum") == 0) {
ctdb_test_ip_distance_2_sum(argv[2], atoi(argv[3]));
} else if (argc >= 3 && strcmp(argv[1], "lcp2_imbalance") == 0) {
ctdb_test_lcp2_imbalance(atoi(argv[2]));
} else if (argc == 3 && strcmp(argv[1], "lcp2_allocate_unassigned") == 0) {
ctdb_test_lcp2_allocate_unassigned(argv[2]);
} else if (argc == 3 && strcmp(argv[1], "lcp2_failback") == 0) {
ctdb_test_lcp2_failback(argv[2]);
} else if (argc == 3 && strcmp(argv[1], "lcp2_failback_loop") == 0) {
ctdb_test_lcp2_failback_loop(argv[2]);
} else if (argc == 3 && strcmp(argv[1], "ctdb_takeover_run_core") == 0) {
ctdb_test_ctdb_takeover_run_core(argv[2]);
} else {
usage();
}
return 0;
}