mirror of
https://github.com/samba-team/samba.git
synced 2025-01-08 21:18:16 +03:00
recoverd: Fix tunable NoIPTakeoverOnDisabled, rename to NoIPHostOnAllDisabled
This really needs to be per-node. The rename is because nodes with this tunable switched on should drop IPs if they become unhealthy (or disabled in some other way). * Add new flag NODE_FLAGS_NOIPHOST, only used in recovery daemon. * Enhance set_ipflags_internal() and set_ipflags() to setup NODE_FLAGS_NOIPHOST depending on setting of NoIPHostOnAllDisabled and/or whether nodes are disabled/inactive. * Replace can_node_servce_ip() with functions can_node_host_ip() and can_node_takeover_ip(). These functions are the only ones that need to look at NODE_FLAGS_NOIPTAKEOVER and NODE_FLAGS_NOIPHOST. They can make the decision without looking at any other flags due to previous setup. * Remove explicit flag checking in IP allocation functions (including unassign_unsuitable_ips()) and just call can_node_host_ip() and can_node_takeover_ip() as appropriate. * Update test code to handle CTDB_SET_NoIPHostOnAllDisabled. Signed-off-by: Martin Schwenke <martin@meltin.net> Pair-programmed-with: Amitay Isaacs <amitay@gmail.com> (This used to be ctdb commit 1308a51f73f2e29ba4dbebb6111d9309a89732cc)
This commit is contained in:
parent
ac80824709
commit
0445c988e2
@ -862,15 +862,16 @@
|
||||
</para>
|
||||
</refsect2>
|
||||
|
||||
<refsect2><title>NoIPTakeoverOnDisabled</title>
|
||||
<refsect2><title>NoIPHostOnAllDisabled</title>
|
||||
<para>Default: 0</para>
|
||||
<para>
|
||||
If no nodes are healthy then by default ctdb will happily host
|
||||
public IPs on disabled (unhealthy or administratively disabled)
|
||||
nodes. This can cause problems, for example if the underlying
|
||||
cluster filesystem is not mounted. When set to 1 this behaviour
|
||||
is switched off and disabled nodes will not be able to takeover
|
||||
IPs.
|
||||
cluster filesystem is not mounted. When set to 1 on a node and
|
||||
that node is disabled it, any IPs hosted by this node will be
|
||||
released and the node will not takeover any IPs until it is no
|
||||
longer disabled.
|
||||
</para>
|
||||
</refsect2>
|
||||
|
||||
|
@ -135,7 +135,7 @@ struct ctdb_tunable {
|
||||
uint32_t db_record_size_warn;
|
||||
uint32_t db_size_warn;
|
||||
uint32_t pulldb_preallocation_size;
|
||||
uint32_t no_ip_takeover_on_disabled;
|
||||
uint32_t no_ip_host_on_all_disabled;
|
||||
uint32_t deadlock_timeout;
|
||||
uint32_t samba3_hack;
|
||||
};
|
||||
|
@ -584,7 +584,9 @@ struct ctdb_node_map {
|
||||
#define NODE_FLAGS_DISABLED (NODE_FLAGS_UNHEALTHY|NODE_FLAGS_PERMANENTLY_DISABLED)
|
||||
#define NODE_FLAGS_INACTIVE (NODE_FLAGS_DELETED|NODE_FLAGS_DISCONNECTED|NODE_FLAGS_BANNED|NODE_FLAGS_STOPPED)
|
||||
|
||||
#define NODE_FLAGS_NOIPTAKEOVER 0x01000000 /* this node can takeover any new ip addresses, this flag is ONLY valid within the recovery daemon */
|
||||
/* These flags are ONLY valid within the recovery daemon */
|
||||
#define NODE_FLAGS_NOIPTAKEOVER 0x01000000 /* can not takeover additional IPs */
|
||||
#define NODE_FLAGS_NOIPHOST 0x02000000 /* can not host IPs */
|
||||
|
||||
|
||||
/*
|
||||
|
@ -1299,31 +1299,46 @@ static int node_ip_coverage(struct ctdb_context *ctdb,
|
||||
}
|
||||
|
||||
|
||||
/* Check if this is a public ip known to the node, i.e. can that
|
||||
node takeover this ip ?
|
||||
/* Can the given node host the given IP: is the public IP known to the
|
||||
* node and is NOIPHOST unset?
|
||||
*/
|
||||
static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn,
|
||||
struct ctdb_public_ip_list *ip)
|
||||
static bool can_node_host_ip(struct ctdb_context *ctdb, int32_t pnn,
|
||||
struct ctdb_node_map *nodemap,
|
||||
struct ctdb_public_ip_list *ip)
|
||||
{
|
||||
struct ctdb_all_public_ips *public_ips;
|
||||
int i;
|
||||
|
||||
if (nodemap->nodes[pnn].flags & NODE_FLAGS_NOIPHOST) {
|
||||
return false;
|
||||
}
|
||||
|
||||
public_ips = ctdb->nodes[pnn]->available_public_ips;
|
||||
|
||||
if (public_ips == NULL) {
|
||||
return -1;
|
||||
return false;
|
||||
}
|
||||
|
||||
for (i=0;i<public_ips->num;i++) {
|
||||
if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
|
||||
/* yes, this node can serve this public ip */
|
||||
return 0;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool can_node_takeover_ip(struct ctdb_context *ctdb, int32_t pnn,
|
||||
struct ctdb_node_map *nodemap,
|
||||
struct ctdb_public_ip_list *ip)
|
||||
{
|
||||
if (nodemap->nodes[pnn].flags & NODE_FLAGS_NOIPTAKEOVER) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return can_node_host_ip(ctdb, pnn, nodemap, ip);
|
||||
}
|
||||
|
||||
/* search the node lists list for a node to takeover this ip.
|
||||
pick the node that currently are serving the least number of ips
|
||||
@ -1339,21 +1354,8 @@ static int find_takeover_node(struct ctdb_context *ctdb,
|
||||
|
||||
pnn = -1;
|
||||
for (i=0;i<nodemap->num;i++) {
|
||||
if (nodemap->nodes[i].flags & NODE_FLAGS_NOIPTAKEOVER) {
|
||||
/* This node is not allowed to takeover any addresses
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
|
||||
if (nodemap->nodes[i].flags & mask) {
|
||||
/* This node is not healty and can not be used to serve
|
||||
a public address
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
|
||||
/* verify that this node can serve this ip */
|
||||
if (can_node_serve_ip(ctdb, i, ip)) {
|
||||
if (!can_node_takeover_ip(ctdb, i, nodemap, ip)) {
|
||||
/* no it couldnt so skip to the next node */
|
||||
continue;
|
||||
}
|
||||
@ -1642,17 +1644,8 @@ try_again:
|
||||
maxnode = -1;
|
||||
minnode = -1;
|
||||
for (i=0;i<nodemap->num;i++) {
|
||||
if (nodemap->nodes[i].flags & mask) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Only check nodes that are allowed to takeover an ip */
|
||||
if (nodemap->nodes[i].flags & NODE_FLAGS_NOIPTAKEOVER) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* only check nodes that can actually serve this ip */
|
||||
if (can_node_serve_ip(ctdb, i, tmp_ip)) {
|
||||
if (!can_node_takeover_ip(ctdb, i, nodemap, tmp_ip)) {
|
||||
/* no it couldnt so skip to the next node */
|
||||
continue;
|
||||
}
|
||||
@ -1815,19 +1808,12 @@ static void lcp2_allocate_unassigned(struct ctdb_context *ctdb,
|
||||
}
|
||||
|
||||
for (dstnode=0; dstnode < nodemap->num; dstnode++) {
|
||||
/* Only check nodes that are allowed to takeover an ip */
|
||||
if (nodemap->nodes[dstnode].flags & NODE_FLAGS_NOIPTAKEOVER) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* only check nodes that can actually serve this ip */
|
||||
if (can_node_serve_ip(ctdb, dstnode, tmp_ip)) {
|
||||
/* only check nodes that can actually takeover this ip */
|
||||
if (!can_node_takeover_ip(ctdb, dstnode,
|
||||
nodemap, tmp_ip)) {
|
||||
/* no it couldnt so skip to the next node */
|
||||
continue;
|
||||
}
|
||||
if (nodemap->nodes[dstnode].flags & mask) {
|
||||
continue;
|
||||
}
|
||||
|
||||
dstdsum = ip_distance_2_sum(&(tmp_ip->addr), all_ips, dstnode);
|
||||
dstimbl = lcp2_imbalances[dstnode] + dstdsum;
|
||||
@ -1929,13 +1915,9 @@ static bool lcp2_failback_candidate(struct ctdb_context *ctdb,
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Only check nodes that are allowed to takeover an ip */
|
||||
if (nodemap->nodes[dstnode].flags & NODE_FLAGS_NOIPTAKEOVER) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* only check nodes that can actually serve this ip */
|
||||
if (can_node_serve_ip(ctdb, dstnode, tmp_ip)) {
|
||||
/* only check nodes that can actually takeover this ip */
|
||||
if (!can_node_takeover_ip(ctdb, dstnode,
|
||||
nodemap, tmp_ip)) {
|
||||
/* no it couldnt so skip to the next node */
|
||||
continue;
|
||||
}
|
||||
@ -2075,21 +2057,6 @@ static void unassign_unsuitable_ips(struct ctdb_context *ctdb,
|
||||
{
|
||||
struct ctdb_public_ip_list *tmp_ip;
|
||||
|
||||
/* mark all public addresses with a masked node as being served by
|
||||
node -1
|
||||
*/
|
||||
for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
|
||||
if (tmp_ip->pnn == -1) {
|
||||
continue;
|
||||
}
|
||||
if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
|
||||
DEBUG(DEBUG_DEBUG,("Unassign IP: %s from %d\n",
|
||||
ctdb_addr_to_str(&(tmp_ip->addr)),
|
||||
tmp_ip->pnn));
|
||||
tmp_ip->pnn = -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* verify that the assigned nodes can serve that public ip
|
||||
and set it to -1 if not
|
||||
*/
|
||||
@ -2097,7 +2064,8 @@ static void unassign_unsuitable_ips(struct ctdb_context *ctdb,
|
||||
if (tmp_ip->pnn == -1) {
|
||||
continue;
|
||||
}
|
||||
if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
|
||||
if (!can_node_host_ip(ctdb, tmp_ip->pnn,
|
||||
nodemap, tmp_ip) != 0) {
|
||||
/* this node can not serve this ip. */
|
||||
DEBUG(DEBUG_DEBUG,("Unassign IP: %s from %d\n",
|
||||
ctdb_addr_to_str(&(tmp_ip->addr)),
|
||||
@ -2224,7 +2192,7 @@ static void ctdb_takeover_run_core(struct ctdb_context *ctdb,
|
||||
*/
|
||||
mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
|
||||
if (all_nodes_are_disabled(nodemap) &&
|
||||
(ctdb->tunable.no_ip_takeover_on_disabled == 0)) {
|
||||
(ctdb->tunable.no_ip_host_on_all_disabled == 0)) {
|
||||
/* We didnt have any completely healthy nodes so
|
||||
use "disabled" nodes as a fallback
|
||||
*/
|
||||
@ -2332,22 +2300,54 @@ static uint32_t *get_tunable_from_nodes(struct ctdb_context *ctdb,
|
||||
/* Set internal flags for IP allocation:
|
||||
* Clear ip flags
|
||||
* Set NOIPTAKOVER ip flags from per-node NoIPTakeover tunable
|
||||
* Set NOIPHOST ip flag for each INACTIVE node
|
||||
* if all nodes are disabled:
|
||||
* Set NOIPHOST ip flags from per-node NoIPHostOnAllDisabled tunable
|
||||
* else
|
||||
* Set NOIPHOST ip flags for disabled nodes
|
||||
*/
|
||||
static void set_ipflags_internal(struct ctdb_node_map *nodemap,
|
||||
uint32_t *tval_noiptakeover)
|
||||
uint32_t *tval_noiptakeover,
|
||||
uint32_t *tval_noiphostonalldisabled)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* Clear IP flags */
|
||||
for (i=0;i<nodemap->num;i++) {
|
||||
nodemap->nodes[i].flags &= ~NODE_FLAGS_NOIPTAKEOVER;
|
||||
nodemap->nodes[i].flags &=
|
||||
~(NODE_FLAGS_NOIPTAKEOVER|NODE_FLAGS_NOIPHOST);
|
||||
}
|
||||
|
||||
/* Can not take IPs on node with NoIPTakeover set */
|
||||
for (i=0;i<nodemap->num;i++) {
|
||||
/* Can not take IPs on node with NoIPTakeover set */
|
||||
if (tval_noiptakeover[i] != 0) {
|
||||
nodemap->nodes[i].flags |= NODE_FLAGS_NOIPTAKEOVER;
|
||||
}
|
||||
|
||||
/* Can not host IPs on INACTIVE node */
|
||||
if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
|
||||
nodemap->nodes[i].flags |= NODE_FLAGS_NOIPHOST;
|
||||
}
|
||||
}
|
||||
|
||||
if (all_nodes_are_disabled(nodemap)) {
|
||||
/* If all nodes are disabled, can not host IPs on node
|
||||
* with NoIPHostOnAllDisabled set
|
||||
*/
|
||||
for (i=0;i<nodemap->num;i++) {
|
||||
if (tval_noiphostonalldisabled[i] != 0) {
|
||||
nodemap->nodes[i].flags |= NODE_FLAGS_NOIPHOST;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* If some nodes are not disabled, then can not host
|
||||
* IPs on DISABLED node
|
||||
*/
|
||||
for (i=0;i<nodemap->num;i++) {
|
||||
if (nodemap->nodes[i].flags & NODE_FLAGS_DISABLED) {
|
||||
nodemap->nodes[i].flags |= NODE_FLAGS_NOIPHOST;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -2356,6 +2356,7 @@ static bool set_ipflags(struct ctdb_context *ctdb,
|
||||
struct ctdb_node_map *nodemap)
|
||||
{
|
||||
uint32_t *tval_noiptakeover;
|
||||
uint32_t *tval_noiphostonalldisabled;
|
||||
|
||||
tval_noiptakeover = get_tunable_from_nodes(ctdb, tmp_ctx, nodemap,
|
||||
"NoIPTakeover");
|
||||
@ -2363,9 +2364,18 @@ static bool set_ipflags(struct ctdb_context *ctdb,
|
||||
return false;
|
||||
}
|
||||
|
||||
set_ipflags_internal(nodemap, tval_noiptakeover);
|
||||
tval_noiphostonalldisabled =
|
||||
get_tunable_from_nodes(ctdb, tmp_ctx, nodemap,
|
||||
"NoIPHostOnAllDisabled");
|
||||
if (tval_noiphostonalldisabled == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
set_ipflags_internal(nodemap,
|
||||
tval_noiptakeover, tval_noiphostonalldisabled);
|
||||
|
||||
talloc_free(tval_noiptakeover);
|
||||
talloc_free(tval_noiphostonalldisabled);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -2396,7 +2406,6 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
|
||||
goto ipreallocated;
|
||||
}
|
||||
|
||||
|
||||
if (!set_ipflags(ctdb, tmp_ctx, nodemap)) {
|
||||
DEBUG(DEBUG_ERR,("Failed to set IP flags from tunables\n"));
|
||||
return -1;
|
||||
|
@ -84,7 +84,7 @@ static const struct {
|
||||
{ "DBRecordSizeWarn", 10000000, offsetof(struct ctdb_tunable, db_record_size_warn), false },
|
||||
{ "DBSizeWarn", 100000000, offsetof(struct ctdb_tunable, db_size_warn), false },
|
||||
{ "PullDBPreallocation", 10*1024*1024, offsetof(struct ctdb_tunable, pulldb_preallocation_size), false },
|
||||
{ "NoIPTakeoverOnDisabled", 0, offsetof(struct ctdb_tunable, no_ip_takeover_on_disabled), false },
|
||||
{ "NoIPHostOnAllDisabled", 0, offsetof(struct ctdb_tunable, no_ip_host_on_all_disabled), false },
|
||||
{ "DeadlockTimeout", 300, offsetof(struct ctdb_tunable, deadlock_timeout), false },
|
||||
{ "Samba3AvoidDeadlocks", 0, offsetof(struct ctdb_tunable, samba3_hack), false },
|
||||
};
|
||||
|
@ -363,6 +363,7 @@ void ctdb_test_init(const char nodestates[],
|
||||
uint32_t nodeflags[CTDB_TEST_MAX_NODES];
|
||||
char *tok, *ns, *t;
|
||||
uint32_t *tval_noiptakeover;
|
||||
uint32_t *tval_noiptakeoverondisabled;
|
||||
|
||||
*ctdb = talloc_zero(NULL, struct ctdb_context);
|
||||
|
||||
@ -400,13 +401,11 @@ void ctdb_test_init(const char nodestates[],
|
||||
}
|
||||
}
|
||||
|
||||
(*ctdb)->tunable.no_ip_takeover_on_disabled = 0;
|
||||
if (getenv("CTDB_SET_NoIPTakeoverOnDisabled")) {
|
||||
(*ctdb)->tunable.no_ip_takeover_on_disabled = (uint32_t) strtoul(getenv("CTDB_SET_NoIPTakeoverOnDisabled"), NULL, 0);
|
||||
}
|
||||
|
||||
tval_noiptakeover = get_tunable_values(*ctdb, numnodes,
|
||||
"CTDB_SET_NoIPTakeover");
|
||||
tval_noiptakeoverondisabled =
|
||||
get_tunable_values(*ctdb, numnodes,
|
||||
"CTDB_SET_NoIPHostOnAllDisabled");
|
||||
|
||||
*nodemap = talloc_array(*ctdb, struct ctdb_node_map, numnodes);
|
||||
(*nodemap)->num = numnodes;
|
||||
@ -427,7 +426,7 @@ void ctdb_test_init(const char nodestates[],
|
||||
(*ctdb)->nodes[i]->known_public_ips = avail[i];
|
||||
}
|
||||
|
||||
set_ipflags_internal(*nodemap, tval_noiptakeover);
|
||||
set_ipflags_internal(*nodemap, tval_noiptakeover, tval_noiptakeoverondisabled);
|
||||
}
|
||||
|
||||
/* IP layout is read from stdin. */
|
||||
|
@ -2,10 +2,10 @@
|
||||
|
||||
. "${TEST_SCRIPTS_DIR}/unit.sh"
|
||||
|
||||
define_test "3 nodes, no IPs assigned, all unhealthy, NoIPTakeoverOnDisabled"
|
||||
define_test "3 nodes, no IPs assigned, all unhealthy, NoIPHostOnAllDisabled"
|
||||
|
||||
export CTDB_TEST_LOGLEVEL=0
|
||||
export CTDB_SET_NoIPTakeoverOnDisabled=1
|
||||
export CTDB_SET_NoIPHostOnAllDisabled=1
|
||||
|
||||
required_result <<EOF
|
||||
192.168.21.254 -1
|
||||
|
@ -2,10 +2,10 @@
|
||||
|
||||
. "${TEST_SCRIPTS_DIR}/unit.sh"
|
||||
|
||||
define_test "3 nodes, all IPs assigned, all unhealthy, NoIPTakeoverOnDisabled"
|
||||
define_test "3 nodes, all IPs assigned, all unhealthy, NoIPHostOnAllDisabled"
|
||||
|
||||
export CTDB_TEST_LOGLEVEL=0
|
||||
export CTDB_SET_NoIPTakeoverOnDisabled=1
|
||||
export CTDB_SET_NoIPHostOnAllDisabled=1
|
||||
|
||||
required_result <<EOF
|
||||
192.168.21.254 -1
|
||||
|
@ -2,10 +2,10 @@
|
||||
|
||||
. "${TEST_SCRIPTS_DIR}/unit.sh"
|
||||
|
||||
define_test "3 nodes, all IPs assigned, 2->3 unhealthy, NoIPTakeoverOnDisabled"
|
||||
define_test "3 nodes, all IPs assigned, 2->3 unhealthy, NoIPHostOnAllDisabled"
|
||||
|
||||
export CTDB_TEST_LOGLEVEL=0
|
||||
export CTDB_SET_NoIPTakeoverOnDisabled=1
|
||||
export CTDB_SET_NoIPHostOnAllDisabled=1
|
||||
|
||||
required_result <<EOF
|
||||
192.168.21.254 -1
|
||||
|
Loading…
Reference in New Issue
Block a user