1
0
mirror of https://github.com/samba-team/samba.git synced 2025-01-08 21:18:16 +03:00

recoverd: Fix tunable NoIPTakeoverOnDisabled, rename to NoIPHostOnAllDisabled

This really needs to be per-node.  The rename is because nodes with
this tunable switched on should drop IPs if they become unhealthy (or
disabled in some other way).

* Add new flag NODE_FLAGS_NOIPHOST, only used in recovery daemon.

* Enhance set_ipflags_internal() and set_ipflags() to setup
  NODE_FLAGS_NOIPHOST depending on setting of NoIPHostOnAllDisabled
  and/or whether nodes are disabled/inactive.

* Replace can_node_servce_ip() with functions can_node_host_ip() and
  can_node_takeover_ip().  These functions are the only ones that need
  to look at NODE_FLAGS_NOIPTAKEOVER and NODE_FLAGS_NOIPHOST.  They
  can make the decision without looking at any other flags due to
  previous setup.

* Remove explicit flag checking in IP allocation functions (including
  unassign_unsuitable_ips()) and just call can_node_host_ip() and
  can_node_takeover_ip() as appropriate.

* Update test code to handle CTDB_SET_NoIPHostOnAllDisabled.

Signed-off-by: Martin Schwenke <martin@meltin.net>
Pair-programmed-with: Amitay Isaacs <amitay@gmail.com>

(This used to be ctdb commit 1308a51f73f2e29ba4dbebb6111d9309a89732cc)
This commit is contained in:
Martin Schwenke 2013-05-03 16:59:20 +10:00
parent ac80824709
commit 0445c988e2
9 changed files with 100 additions and 89 deletions

View File

@ -862,15 +862,16 @@
</para> </para>
</refsect2> </refsect2>
<refsect2><title>NoIPTakeoverOnDisabled</title> <refsect2><title>NoIPHostOnAllDisabled</title>
<para>Default: 0</para> <para>Default: 0</para>
<para> <para>
If no nodes are healthy then by default ctdb will happily host If no nodes are healthy then by default ctdb will happily host
public IPs on disabled (unhealthy or administratively disabled) public IPs on disabled (unhealthy or administratively disabled)
nodes. This can cause problems, for example if the underlying nodes. This can cause problems, for example if the underlying
cluster filesystem is not mounted. When set to 1 this behaviour cluster filesystem is not mounted. When set to 1 on a node and
is switched off and disabled nodes will not be able to takeover that node is disabled it, any IPs hosted by this node will be
IPs. released and the node will not takeover any IPs until it is no
longer disabled.
</para> </para>
</refsect2> </refsect2>

View File

@ -135,7 +135,7 @@ struct ctdb_tunable {
uint32_t db_record_size_warn; uint32_t db_record_size_warn;
uint32_t db_size_warn; uint32_t db_size_warn;
uint32_t pulldb_preallocation_size; uint32_t pulldb_preallocation_size;
uint32_t no_ip_takeover_on_disabled; uint32_t no_ip_host_on_all_disabled;
uint32_t deadlock_timeout; uint32_t deadlock_timeout;
uint32_t samba3_hack; uint32_t samba3_hack;
}; };

View File

@ -584,7 +584,9 @@ struct ctdb_node_map {
#define NODE_FLAGS_DISABLED (NODE_FLAGS_UNHEALTHY|NODE_FLAGS_PERMANENTLY_DISABLED) #define NODE_FLAGS_DISABLED (NODE_FLAGS_UNHEALTHY|NODE_FLAGS_PERMANENTLY_DISABLED)
#define NODE_FLAGS_INACTIVE (NODE_FLAGS_DELETED|NODE_FLAGS_DISCONNECTED|NODE_FLAGS_BANNED|NODE_FLAGS_STOPPED) #define NODE_FLAGS_INACTIVE (NODE_FLAGS_DELETED|NODE_FLAGS_DISCONNECTED|NODE_FLAGS_BANNED|NODE_FLAGS_STOPPED)
#define NODE_FLAGS_NOIPTAKEOVER 0x01000000 /* this node can takeover any new ip addresses, this flag is ONLY valid within the recovery daemon */ /* These flags are ONLY valid within the recovery daemon */
#define NODE_FLAGS_NOIPTAKEOVER 0x01000000 /* can not takeover additional IPs */
#define NODE_FLAGS_NOIPHOST 0x02000000 /* can not host IPs */
/* /*

View File

@ -1299,31 +1299,46 @@ static int node_ip_coverage(struct ctdb_context *ctdb,
} }
/* Check if this is a public ip known to the node, i.e. can that /* Can the given node host the given IP: is the public IP known to the
node takeover this ip ? * node and is NOIPHOST unset?
*/ */
static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn, static bool can_node_host_ip(struct ctdb_context *ctdb, int32_t pnn,
struct ctdb_public_ip_list *ip) struct ctdb_node_map *nodemap,
struct ctdb_public_ip_list *ip)
{ {
struct ctdb_all_public_ips *public_ips; struct ctdb_all_public_ips *public_ips;
int i; int i;
if (nodemap->nodes[pnn].flags & NODE_FLAGS_NOIPHOST) {
return false;
}
public_ips = ctdb->nodes[pnn]->available_public_ips; public_ips = ctdb->nodes[pnn]->available_public_ips;
if (public_ips == NULL) { if (public_ips == NULL) {
return -1; return false;
} }
for (i=0;i<public_ips->num;i++) { for (i=0;i<public_ips->num;i++) {
if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) { if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
/* yes, this node can serve this public ip */ /* yes, this node can serve this public ip */
return 0; return true;
} }
} }
return -1; return false;
} }
static bool can_node_takeover_ip(struct ctdb_context *ctdb, int32_t pnn,
struct ctdb_node_map *nodemap,
struct ctdb_public_ip_list *ip)
{
if (nodemap->nodes[pnn].flags & NODE_FLAGS_NOIPTAKEOVER) {
return false;
}
return can_node_host_ip(ctdb, pnn, nodemap, ip);
}
/* search the node lists list for a node to takeover this ip. /* search the node lists list for a node to takeover this ip.
pick the node that currently are serving the least number of ips pick the node that currently are serving the least number of ips
@ -1339,21 +1354,8 @@ static int find_takeover_node(struct ctdb_context *ctdb,
pnn = -1; pnn = -1;
for (i=0;i<nodemap->num;i++) { for (i=0;i<nodemap->num;i++) {
if (nodemap->nodes[i].flags & NODE_FLAGS_NOIPTAKEOVER) {
/* This node is not allowed to takeover any addresses
*/
continue;
}
if (nodemap->nodes[i].flags & mask) {
/* This node is not healty and can not be used to serve
a public address
*/
continue;
}
/* verify that this node can serve this ip */ /* verify that this node can serve this ip */
if (can_node_serve_ip(ctdb, i, ip)) { if (!can_node_takeover_ip(ctdb, i, nodemap, ip)) {
/* no it couldnt so skip to the next node */ /* no it couldnt so skip to the next node */
continue; continue;
} }
@ -1642,17 +1644,8 @@ try_again:
maxnode = -1; maxnode = -1;
minnode = -1; minnode = -1;
for (i=0;i<nodemap->num;i++) { for (i=0;i<nodemap->num;i++) {
if (nodemap->nodes[i].flags & mask) {
continue;
}
/* Only check nodes that are allowed to takeover an ip */
if (nodemap->nodes[i].flags & NODE_FLAGS_NOIPTAKEOVER) {
continue;
}
/* only check nodes that can actually serve this ip */ /* only check nodes that can actually serve this ip */
if (can_node_serve_ip(ctdb, i, tmp_ip)) { if (!can_node_takeover_ip(ctdb, i, nodemap, tmp_ip)) {
/* no it couldnt so skip to the next node */ /* no it couldnt so skip to the next node */
continue; continue;
} }
@ -1815,19 +1808,12 @@ static void lcp2_allocate_unassigned(struct ctdb_context *ctdb,
} }
for (dstnode=0; dstnode < nodemap->num; dstnode++) { for (dstnode=0; dstnode < nodemap->num; dstnode++) {
/* Only check nodes that are allowed to takeover an ip */ /* only check nodes that can actually takeover this ip */
if (nodemap->nodes[dstnode].flags & NODE_FLAGS_NOIPTAKEOVER) { if (!can_node_takeover_ip(ctdb, dstnode,
continue; nodemap, tmp_ip)) {
}
/* only check nodes that can actually serve this ip */
if (can_node_serve_ip(ctdb, dstnode, tmp_ip)) {
/* no it couldnt so skip to the next node */ /* no it couldnt so skip to the next node */
continue; continue;
} }
if (nodemap->nodes[dstnode].flags & mask) {
continue;
}
dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode); dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode);
dstimbl = lcp2_imbalances[dstnode] + dstdsum; dstimbl = lcp2_imbalances[dstnode] + dstdsum;
@ -1929,13 +1915,9 @@ static bool lcp2_failback_candidate(struct ctdb_context *ctdb,
continue; continue;
} }
/* Only check nodes that are allowed to takeover an ip */ /* only check nodes that can actually takeover this ip */
if (nodemap->nodes[dstnode].flags & NODE_FLAGS_NOIPTAKEOVER) { if (!can_node_takeover_ip(ctdb, dstnode,
continue; nodemap, tmp_ip)) {
}
/* only check nodes that can actually serve this ip */
if (can_node_serve_ip(ctdb, dstnode, tmp_ip)) {
/* no it couldnt so skip to the next node */ /* no it couldnt so skip to the next node */
continue; continue;
} }
@ -2075,21 +2057,6 @@ static void unassign_unsuitable_ips(struct ctdb_context *ctdb,
{ {
struct ctdb_public_ip_list *tmp_ip; struct ctdb_public_ip_list *tmp_ip;
/* mark all public addresses with a masked node as being served by
node -1
*/
for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
if (tmp_ip->pnn == -1) {
continue;
}
if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
DEBUG(DEBUG_DEBUG,("Unassign IP: %s from %d\n",
ctdb_addr_to_str(&(tmp_ip->addr)),
tmp_ip->pnn));
tmp_ip->pnn = -1;
}
}
/* verify that the assigned nodes can serve that public ip /* verify that the assigned nodes can serve that public ip
and set it to -1 if not and set it to -1 if not
*/ */
@ -2097,7 +2064,8 @@ static void unassign_unsuitable_ips(struct ctdb_context *ctdb,
if (tmp_ip->pnn == -1) { if (tmp_ip->pnn == -1) {
continue; continue;
} }
if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) { if (!can_node_host_ip(ctdb, tmp_ip->pnn,
nodemap, tmp_ip) != 0) {
/* this node can not serve this ip. */ /* this node can not serve this ip. */
DEBUG(DEBUG_DEBUG,("Unassign IP: %s from %d\n", DEBUG(DEBUG_DEBUG,("Unassign IP: %s from %d\n",
ctdb_addr_to_str(&(tmp_ip->addr)), ctdb_addr_to_str(&(tmp_ip->addr)),
@ -2224,7 +2192,7 @@ static void ctdb_takeover_run_core(struct ctdb_context *ctdb,
*/ */
mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED; mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
if (all_nodes_are_disabled(nodemap) && if (all_nodes_are_disabled(nodemap) &&
(ctdb->tunable.no_ip_takeover_on_disabled == 0)) { (ctdb->tunable.no_ip_host_on_all_disabled == 0)) {
/* We didnt have any completely healthy nodes so /* We didnt have any completely healthy nodes so
use "disabled" nodes as a fallback use "disabled" nodes as a fallback
*/ */
@ -2332,22 +2300,54 @@ static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
/* Set internal flags for IP allocation: /* Set internal flags for IP allocation:
* Clear ip flags * Clear ip flags
* Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable * Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
* Set NOIPHOST ip flag for each INACTIVE node
* if all nodes are disabled:
* Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
* else
* Set NOIPHOST ip flags for disabled nodes
*/ */
static void set_ipflags_internal(struct ctdb_node_map *nodemap, static void set_ipflags_internal(struct ctdb_node_map *nodemap,
uint32_t *tval_noiptakeover) uint32_t *tval_noiptakeover,
uint32_t *tval_noiphostonalldisabled)
{ {
int i; int i;
/* Clear IP flags */ /* Clear IP flags */
for (i=0;i<nodemap->num;i++) { for (i=0;i<nodemap->num;i++) {
nodemap->nodes[i].flags &= ~NODE_FLAGS_NOIPTAKEOVER; nodemap->nodes[i].flags &=
~(NODE_FLAGS_NOIPTAKEOVER|NODE_FLAGS_NOIPHOST);
} }
/* Can not take IPs on node with NoIPTakeover set */
for (i=0;i<nodemap->num;i++) { for (i=0;i<nodemap->num;i++) {
/* Can not take IPs on node with NoIPTakeover set */
if (tval_noiptakeover[i] != 0) { if (tval_noiptakeover[i] != 0) {
nodemap->nodes[i].flags |= NODE_FLAGS_NOIPTAKEOVER; nodemap->nodes[i].flags |= NODE_FLAGS_NOIPTAKEOVER;
} }
/* Can not host IPs on INACTIVE node */
if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
nodemap->nodes[i].flags |= NODE_FLAGS_NOIPHOST;
}
}
if (all_nodes_are_disabled(nodemap)) {
/* If all nodes are disabled, can not host IPs on node
* with NoIPHostOnAllDisabled set
*/
for (i=0;i<nodemap->num;i++) {
if (tval_noiphostonalldisabled[i] != 0) {
nodemap->nodes[i].flags |= NODE_FLAGS_NOIPHOST;
}
}
} else {
/* If some nodes are not disabled, then can not host
* IPs on DISABLED node
*/
for (i=0;i<nodemap->num;i++) {
if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
nodemap->nodes[i].flags |= NODE_FLAGS_NOIPHOST;
}
}
} }
} }
@ -2356,6 +2356,7 @@ static bool set_ipflags(struct ctdb_context *ctdb,
struct ctdb_node_map *nodemap) struct ctdb_node_map *nodemap)
{ {
uint32_t *tval_noiptakeover; uint32_t *tval_noiptakeover;
uint32_t *tval_noiphostonalldisabled;
tval_noiptakeover = get_tunable_from_nodes(ctdb, tmp_ctx, nodemap, tval_noiptakeover = get_tunable_from_nodes(ctdb, tmp_ctx, nodemap,
"NoIPTakeover"); "NoIPTakeover");
@ -2363,9 +2364,18 @@ static bool set_ipflags(struct ctdb_context *ctdb,
return false; return false;
} }
set_ipflags_internal(nodemap, tval_noiptakeover); tval_noiphostonalldisabled =
get_tunable_from_nodes(ctdb, tmp_ctx, nodemap,
"NoIPHostOnAllDisabled");
if (tval_noiphostonalldisabled == NULL) {
return false;
}
set_ipflags_internal(nodemap,
tval_noiptakeover, tval_noiphostonalldisabled);
talloc_free(tval_noiptakeover); talloc_free(tval_noiptakeover);
talloc_free(tval_noiphostonalldisabled);
return true; return true;
} }
@ -2396,7 +2406,6 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
goto ipreallocated; goto ipreallocated;
} }
if (!set_ipflags(ctdb, tmp_ctx, nodemap)) { if (!set_ipflags(ctdb, tmp_ctx, nodemap)) {
DEBUG(DEBUG_ERR,("Failed to set IP flags from tunables\n")); DEBUG(DEBUG_ERR,("Failed to set IP flags from tunables\n"));
return -1; return -1;

View File

@ -84,7 +84,7 @@ static const struct {
{ "DBRecordSizeWarn", 10000000, offsetof(struct ctdb_tunable, db_record_size_warn), false }, { "DBRecordSizeWarn", 10000000, offsetof(struct ctdb_tunable, db_record_size_warn), false },
{ "DBSizeWarn", 100000000, offsetof(struct ctdb_tunable, db_size_warn), false }, { "DBSizeWarn", 100000000, offsetof(struct ctdb_tunable, db_size_warn), false },
{ "PullDBPreallocation", 10*1024*1024, offsetof(struct ctdb_tunable, pulldb_preallocation_size), false }, { "PullDBPreallocation", 10*1024*1024, offsetof(struct ctdb_tunable, pulldb_preallocation_size), false },
{ "NoIPTakeoverOnDisabled", 0, offsetof(struct ctdb_tunable, no_ip_takeover_on_disabled), false }, { "NoIPHostOnAllDisabled", 0, offsetof(struct ctdb_tunable, no_ip_host_on_all_disabled), false },
{ "DeadlockTimeout", 300, offsetof(struct ctdb_tunable, deadlock_timeout), false }, { "DeadlockTimeout", 300, offsetof(struct ctdb_tunable, deadlock_timeout), false },
{ "Samba3AvoidDeadlocks", 0, offsetof(struct ctdb_tunable, samba3_hack), false }, { "Samba3AvoidDeadlocks", 0, offsetof(struct ctdb_tunable, samba3_hack), false },
}; };

View File

@ -363,6 +363,7 @@ void ctdb_test_init(const char nodestates[],
uint32_t nodeflags[CTDB_TEST_MAX_NODES]; uint32_t nodeflags[CTDB_TEST_MAX_NODES];
char *tok, *ns, *t; char *tok, *ns, *t;
uint32_t *tval_noiptakeover; uint32_t *tval_noiptakeover;
uint32_t *tval_noiptakeoverondisabled;
*ctdb = talloc_zero(NULL, struct ctdb_context); *ctdb = talloc_zero(NULL, struct ctdb_context);
@ -400,13 +401,11 @@ void ctdb_test_init(const char nodestates[],
} }
} }
(*ctdb)->tunable.no_ip_takeover_on_disabled = 0;
if (getenv("CTDB_SET_NoIPTakeoverOnDisabled")) {
(*ctdb)->tunable.no_ip_takeover_on_disabled = (uint32_t) strtoul(getenv("CTDB_SET_NoIPTakeoverOnDisabled"), NULL, 0);
}
tval_noiptakeover = get_tunable_values(*ctdb, numnodes, tval_noiptakeover = get_tunable_values(*ctdb, numnodes,
"CTDB_SET_NoIPTakeover"); "CTDB_SET_NoIPTakeover");
tval_noiptakeoverondisabled =
get_tunable_values(*ctdb, numnodes,
"CTDB_SET_NoIPHostOnAllDisabled");
*nodemap = talloc_array(*ctdb, struct ctdb_node_map, numnodes); *nodemap = talloc_array(*ctdb, struct ctdb_node_map, numnodes);
(*nodemap)->num = numnodes; (*nodemap)->num = numnodes;
@ -427,7 +426,7 @@ void ctdb_test_init(const char nodestates[],
(*ctdb)->nodes[i]->known_public_ips = avail[i]; (*ctdb)->nodes[i]->known_public_ips = avail[i];
} }
set_ipflags_internal(*nodemap, tval_noiptakeover); set_ipflags_internal(*nodemap, tval_noiptakeover, tval_noiptakeoverondisabled);
} }
/* IP layout is read from stdin. */ /* IP layout is read from stdin. */

View File

@ -2,10 +2,10 @@
. "${TEST_SCRIPTS_DIR}/unit.sh" . "${TEST_SCRIPTS_DIR}/unit.sh"
define_test "3 nodes, no IPs assigned, all unhealthy, NoIPTakeoverOnDisabled" define_test "3 nodes, no IPs assigned, all unhealthy, NoIPHostOnAllDisabled"
export CTDB_TEST_LOGLEVEL=0 export CTDB_TEST_LOGLEVEL=0
export CTDB_SET_NoIPTakeoverOnDisabled=1 export CTDB_SET_NoIPHostOnAllDisabled=1
required_result <<EOF required_result <<EOF
192.168.21.254 -1 192.168.21.254 -1

View File

@ -2,10 +2,10 @@
. "${TEST_SCRIPTS_DIR}/unit.sh" . "${TEST_SCRIPTS_DIR}/unit.sh"
define_test "3 nodes, all IPs assigned, all unhealthy, NoIPTakeoverOnDisabled" define_test "3 nodes, all IPs assigned, all unhealthy, NoIPHostOnAllDisabled"
export CTDB_TEST_LOGLEVEL=0 export CTDB_TEST_LOGLEVEL=0
export CTDB_SET_NoIPTakeoverOnDisabled=1 export CTDB_SET_NoIPHostOnAllDisabled=1
required_result <<EOF required_result <<EOF
192.168.21.254 -1 192.168.21.254 -1

View File

@ -2,10 +2,10 @@
. "${TEST_SCRIPTS_DIR}/unit.sh" . "${TEST_SCRIPTS_DIR}/unit.sh"
define_test "3 nodes, all IPs assigned, 2->3 unhealthy, NoIPTakeoverOnDisabled" define_test "3 nodes, all IPs assigned, 2->3 unhealthy, NoIPHostOnAllDisabled"
export CTDB_TEST_LOGLEVEL=0 export CTDB_TEST_LOGLEVEL=0
export CTDB_SET_NoIPTakeoverOnDisabled=1 export CTDB_SET_NoIPHostOnAllDisabled=1
required_result <<EOF required_result <<EOF
192.168.21.254 -1 192.168.21.254 -1