mirror of
https://github.com/samba-team/samba.git
synced 2025-01-03 01:18:10 +03:00
ctdb-tool: Add UNKNOWN pseudo state
When a node is starting, CTDB reports remote nodes as unhealthy by default. This can be misleading. To hide this, report an "UNKNOWN" pseudo state when a remote node is not disconnected and the runstate is less than or equal to "FIRST_RECOVERY". Signed-off-by: Vinit Agnihotri <vagnihotri@ddn.com> Signed-off-by: Martin Schwenke <martin@meltin.net> Reviewed-by: Amitay Isaacs <amitay@gmail.com>
This commit is contained in:
parent
428bc71f98
commit
794f125802
@ -25,9 +25,9 @@ EOF
|
||||
simple_test all
|
||||
|
||||
required_result 0 <<EOF
|
||||
|Node|IP|Disconnected|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
|
||||
|0|192.168.20.41|0|0|0|0|0|0|0|N|
|
||||
|1|192.168.20.42|0|0|0|0|0|0|0|N|
|
||||
|2|192.168.20.43|0|0|0|0|0|0|0|Y|
|
||||
|Node|IP|Disconnected|Unknown|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
|
||||
|0|192.168.20.41|0|0|0|0|0|0|0|0|N|
|
||||
|1|192.168.20.42|0|0|0|0|0|0|0|0|N|
|
||||
|2|192.168.20.43|0|0|0|0|0|0|0|0|Y|
|
||||
EOF
|
||||
simple_test -X all
|
||||
|
@ -25,9 +25,9 @@ EOF
|
||||
simple_test all
|
||||
|
||||
required_result 1 <<EOF
|
||||
|Node|IP|Disconnected|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
|
||||
|0|192.168.20.41|0|0|0|0|0|0|0|N|
|
||||
|1|192.168.20.42|1|0|0|0|0|1|0|N|
|
||||
|2|192.168.20.43|0|0|0|0|0|0|0|Y|
|
||||
|Node|IP|Disconnected|Unknown|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
|
||||
|0|192.168.20.41|0|0|0|0|0|0|0|0|N|
|
||||
|1|192.168.20.42|1|0|0|0|0|0|1|0|N|
|
||||
|2|192.168.20.43|0|0|0|0|0|0|0|0|Y|
|
||||
EOF
|
||||
simple_test -X all
|
||||
|
@ -25,9 +25,9 @@ EOF
|
||||
simple_test all
|
||||
|
||||
required_result 2 <<EOF
|
||||
|Node|IP|Disconnected|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
|
||||
|0|192.168.20.41|0|0|0|1|0|0|0|N|
|
||||
|1|192.168.20.42|0|0|0|0|0|0|0|N|
|
||||
|2|192.168.20.43|0|0|0|0|0|0|0|Y|
|
||||
|Node|IP|Disconnected|Unknown|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
|
||||
|0|192.168.20.41|0|0|0|0|1|0|0|0|N|
|
||||
|1|192.168.20.42|0|0|0|0|0|0|0|0|N|
|
||||
|2|192.168.20.43|0|0|0|0|0|0|0|0|Y|
|
||||
EOF
|
||||
simple_test -X all
|
||||
|
@ -22,7 +22,7 @@ EOF
|
||||
simple_test
|
||||
|
||||
required_result 0 <<EOF
|
||||
|Node|IP|Disconnected|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
|
||||
|2|192.168.20.43|0|0|0|0|0|0|0|Y|
|
||||
|Node|IP|Disconnected|Unknown|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
|
||||
|2|192.168.20.43|0|0|0|0|0|0|0|0|Y|
|
||||
EOF
|
||||
simple_test -X
|
||||
|
@ -22,7 +22,7 @@ EOF
|
||||
simple_test 0
|
||||
|
||||
required_result 2 <<EOF
|
||||
|Node|IP|Disconnected|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
|
||||
|0|192.168.20.41|0|0|0|1|0|0|0|N|
|
||||
|Node|IP|Disconnected|Unknown|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
|
||||
|0|192.168.20.41|0|0|0|0|1|0|0|0|N|
|
||||
EOF
|
||||
simple_test -X 0
|
||||
|
@ -22,8 +22,8 @@ EOF
|
||||
simple_test 0
|
||||
|
||||
required_result 36 <<EOF
|
||||
|Node|IP|Disconnected|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
|
||||
|0|192.168.20.41|0|0|1|0|1|1|0|N|
|
||||
|Node|IP|Disconnected|Unknown|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
|
||||
|0|192.168.20.41|0|0|0|1|0|1|1|0|N|
|
||||
EOF
|
||||
simple_test -X 0
|
||||
|
||||
|
@ -38,9 +38,9 @@ EOF
|
||||
simple_test
|
||||
|
||||
required_result 0 <<EOF
|
||||
|Node|IP|Disconnected|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
|
||||
|0|192.168.20.41|0|0|0|0|0|0|0|Y|
|
||||
|1|192.168.20.42|0|0|0|0|0|0|0|N|
|
||||
|2|192.168.20.43|0|0|0|0|0|0|0|N|
|
||||
|Node|IP|Disconnected|Unknown|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
|
||||
|0|192.168.20.41|0|0|0|0|0|0|0|0|Y|
|
||||
|1|192.168.20.42|0|0|0|0|0|0|0|0|N|
|
||||
|2|192.168.20.43|0|0|0|0|0|0|0|0|N|
|
||||
EOF
|
||||
simple_test -X
|
||||
|
@ -38,9 +38,9 @@ EOF
|
||||
simple_test
|
||||
|
||||
required_result 0 <<EOF
|
||||
|Node|IP|Disconnected|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
|
||||
|0|192.168.20.41|0|0|0|1|0|0|0|N|
|
||||
|1|192.168.20.42|0|0|0|0|0|0|0|Y|
|
||||
|2|192.168.20.43|0|0|0|0|0|0|0|N|
|
||||
|Node|IP|Disconnected|Unknown|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode|
|
||||
|0|192.168.20.41|0|0|0|0|1|0|0|0|N|
|
||||
|1|192.168.20.42|0|0|0|0|0|0|0|0|Y|
|
||||
|2|192.168.20.43|0|0|0|0|0|0|0|0|N|
|
||||
EOF
|
||||
simple_test -X
|
||||
|
@ -446,16 +446,16 @@ node_has_status ()
|
||||
|
||||
local bits
|
||||
case "$status" in
|
||||
unhealthy) bits="?|?|?|1|*" ;;
|
||||
healthy) bits="?|?|?|0|*" ;;
|
||||
unhealthy) bits="?|?|?|?|1|*" ;;
|
||||
healthy) bits="?|?|?|?|0|*" ;;
|
||||
disconnected) bits="1|*" ;;
|
||||
connected) bits="0|*" ;;
|
||||
banned) bits="?|1|*" ;;
|
||||
unbanned) bits="?|0|*" ;;
|
||||
disabled) bits="?|?|1|*" ;;
|
||||
enabled) bits="?|?|0|*" ;;
|
||||
stopped) bits="?|?|?|?|1|*" ;;
|
||||
notstopped) bits="?|?|?|?|0|*" ;;
|
||||
banned) bits="?|?|1|*" ;;
|
||||
unbanned) bits="?|?|0|*" ;;
|
||||
disabled) bits="?|?|?|1|*" ;;
|
||||
enabled) bits="?|?|?|0|*" ;;
|
||||
stopped) bits="?|?|?|?|?|1|*" ;;
|
||||
notstopped) bits="?|?|?|?|?|0|*" ;;
|
||||
*)
|
||||
echo "node_has_status: unknown status \"$status\""
|
||||
return 1
|
||||
|
@ -52,6 +52,8 @@
|
||||
#define SRVID_CTDB_TOOL (CTDB_SRVID_TOOL_RANGE | 0x0001000000000000LL)
|
||||
#define SRVID_CTDB_PUSHDB (CTDB_SRVID_TOOL_RANGE | 0x0002000000000000LL)
|
||||
|
||||
#define NODE_FLAGS_UNKNOWN 0x00000040
|
||||
|
||||
static struct {
|
||||
const char *debuglevelstr;
|
||||
int timelimit;
|
||||
@ -111,6 +113,7 @@ static const char *pretty_print_flags(TALLOC_CTX *mem_ctx, uint32_t flags)
|
||||
const char *name;
|
||||
} flag_names[] = {
|
||||
{ NODE_FLAGS_DISCONNECTED, "DISCONNECTED" },
|
||||
{ NODE_FLAGS_UNKNOWN, "UNKNOWN" },
|
||||
{ NODE_FLAGS_PERMANENTLY_DISABLED, "DISABLED" },
|
||||
{ NODE_FLAGS_BANNED, "BANNED" },
|
||||
{ NODE_FLAGS_UNHEALTHY, "UNHEALTHY" },
|
||||
@ -367,6 +370,64 @@ done:
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remote nodes are initialised as UNHEALTHY in the daemon and their
|
||||
* true status is udpated after they are connected. However, there
|
||||
* is a small window when a healthy node may be shown as unhealthy
|
||||
* between connecting and the status update. Hide this for nodes
|
||||
* that are not DISCONNECTED nodes by reporting them as UNKNOWN until
|
||||
* the runstate passes FIRST_RECOVERY. Code paths where this is used
|
||||
* do not make any control decisions depending upon unknown/unhealthy
|
||||
* state.
|
||||
*/
|
||||
static struct ctdb_node_map *get_nodemap_unknown(
|
||||
TALLOC_CTX *mem_ctx,
|
||||
struct ctdb_context *ctdb,
|
||||
struct ctdb_node_map *nodemap_in)
|
||||
{
|
||||
unsigned int i;
|
||||
int ret;
|
||||
enum ctdb_runstate runstate;
|
||||
struct ctdb_node_map *nodemap;
|
||||
|
||||
ret = ctdb_ctrl_get_runstate(mem_ctx,
|
||||
ctdb->ev,
|
||||
ctdb->client,
|
||||
ctdb->cmd_pnn,
|
||||
TIMEOUT(),
|
||||
&runstate);
|
||||
if (ret != 0 ) {
|
||||
printf("Unable to get runstate");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
nodemap = talloc_nodemap(mem_ctx, nodemap_in);
|
||||
if (nodemap == NULL) {
|
||||
printf("Unable to get nodemap");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
nodemap->num = nodemap_in->num;
|
||||
for (i=0; i<nodemap->num; i++) {
|
||||
struct ctdb_node_and_flags *node_in = &nodemap_in->node[i];
|
||||
struct ctdb_node_and_flags *node = &nodemap->node[i];
|
||||
|
||||
*node = *node_in;
|
||||
|
||||
if (node->flags & NODE_FLAGS_DELETED) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((runstate <= CTDB_RUNSTATE_FIRST_RECOVERY) &&
|
||||
!(node->flags & NODE_FLAGS_DISCONNECTED) &&
|
||||
(node->pnn != ctdb->cmd_pnn)) {
|
||||
node->flags = NODE_FLAGS_UNKNOWN;
|
||||
}
|
||||
}
|
||||
|
||||
return nodemap;
|
||||
}
|
||||
|
||||
/* Compare IP address */
|
||||
static bool ctdb_same_ip(ctdb_sock_addr *ip1, ctdb_sock_addr *ip2)
|
||||
{
|
||||
@ -826,11 +887,12 @@ static void print_nodemap_machine(TALLOC_CTX *mem_ctx,
|
||||
struct ctdb_node_and_flags *node;
|
||||
unsigned int i;
|
||||
|
||||
printf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
|
||||
printf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
|
||||
options.sep,
|
||||
"Node", options.sep,
|
||||
"IP", options.sep,
|
||||
"Disconnected", options.sep,
|
||||
"Unknown", options.sep,
|
||||
"Banned", options.sep,
|
||||
"Disabled", options.sep,
|
||||
"Unhealthy", options.sep,
|
||||
@ -845,12 +907,13 @@ static void print_nodemap_machine(TALLOC_CTX *mem_ctx,
|
||||
continue;
|
||||
}
|
||||
|
||||
printf("%s%u%s%s%s%d%s%d%s%d%s%d%s%d%s%d%s%d%s%c%s\n",
|
||||
printf("%s%u%s%s%s%d%s%d%s%d%s%d%s%d%s%d%s%d%s%d%s%c%s\n",
|
||||
options.sep,
|
||||
node->pnn, options.sep,
|
||||
ctdb_sock_addr_to_string(mem_ctx, &node->addr, false),
|
||||
options.sep,
|
||||
!! (node->flags & NODE_FLAGS_DISCONNECTED), options.sep,
|
||||
!! (node->flags & NODE_FLAGS_UNKNOWN), options.sep,
|
||||
!! (node->flags & NODE_FLAGS_BANNED), options.sep,
|
||||
!! (node->flags & NODE_FLAGS_PERMANENTLY_DISABLED),
|
||||
options.sep,
|
||||
@ -935,6 +998,7 @@ static void print_status(TALLOC_CTX *mem_ctx,
|
||||
static int control_status(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
|
||||
int argc, const char **argv)
|
||||
{
|
||||
struct ctdb_node_map *nodemap_in;
|
||||
struct ctdb_node_map *nodemap;
|
||||
struct ctdb_vnn_map *vnnmap;
|
||||
int recmode;
|
||||
@ -945,7 +1009,12 @@ static int control_status(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
|
||||
usage("status");
|
||||
}
|
||||
|
||||
nodemap = get_nodemap(ctdb, false);
|
||||
nodemap_in = get_nodemap(ctdb, false);
|
||||
if (nodemap_in == NULL) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
nodemap = get_nodemap_unknown(mem_ctx, ctdb, nodemap_in);
|
||||
if (nodemap == NULL) {
|
||||
return 1;
|
||||
}
|
||||
@ -5603,6 +5672,7 @@ static int control_nodestatus(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
|
||||
int argc, const char **argv)
|
||||
{
|
||||
const char *nodestring = NULL;
|
||||
struct ctdb_node_map *nodemap_in;
|
||||
struct ctdb_node_map *nodemap;
|
||||
unsigned int i;
|
||||
int ret;
|
||||
@ -5619,7 +5689,12 @@ static int control_nodestatus(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb,
|
||||
}
|
||||
}
|
||||
|
||||
if (! parse_nodestring(mem_ctx, ctdb, nodestring, &nodemap)) {
|
||||
if (! parse_nodestring(mem_ctx, ctdb, nodestring, &nodemap_in)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
nodemap = get_nodemap_unknown(mem_ctx, ctdb, nodemap_in);
|
||||
if (nodemap == NULL) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -32,7 +32,7 @@ EOF
|
||||
|
||||
nodestatus_X=""
|
||||
# Fields are:
|
||||
# Node|IP|Disconnected|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode
|
||||
# Node|IP|Disconnected|Unknown|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode
|
||||
get_nodestatus_X ()
|
||||
{
|
||||
# Result is cached in global variable nodestatus_X
|
||||
@ -100,11 +100,11 @@ filter_nodes ()
|
||||
# them, so the first to succeed will print the nodes.
|
||||
|
||||
# First try for a fully active and healthy node, so must not
|
||||
# be DISABLED, UNHEALTHY or INACTIVE (last covers
|
||||
# be UNKNOWN, DISABLED, UNHEALTHY or INACTIVE (last covers
|
||||
# DISCONNECTED, BANNED or STOPPED)
|
||||
awk -F '|' -v ns="$_ns" '
|
||||
BEGIN { ret = 255 }
|
||||
ns ~ "@" $2 "@" && $5 == 0 && $6 == 0 && $8 == 0 {
|
||||
ns ~ "@" $2 "@" && $4 == 0 && $6 == 0 && $7 == 0 && $9 == 0 {
|
||||
print $1, $2 ; ret=0
|
||||
}
|
||||
END { exit ret }
|
||||
@ -115,7 +115,7 @@ EOF
|
||||
# DISABLED
|
||||
awk -F '|' -v ns="$_ns" '
|
||||
BEGIN { ret = 255 }
|
||||
ns ~ "@" $2 "@" && $5 == 0 && $8 == 0 {
|
||||
ns ~ "@" $2 "@" && $6 == 0 && $9 == 0 {
|
||||
print $1, $2 ; ret=0
|
||||
}
|
||||
END { exit ret }
|
||||
|
@ -32,7 +32,7 @@ EOF
|
||||
|
||||
nodestatus_X=""
|
||||
# Fields are:
|
||||
# Node|IP|Disconnected|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode
|
||||
# Node|IP|Disconnected|Unknown|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode
|
||||
get_nodestatus_X ()
|
||||
{
|
||||
# Result is cached in global variable nodestatus_X
|
||||
@ -102,12 +102,12 @@ EOF
|
||||
# the first to succeed will select the leader node.
|
||||
|
||||
# First try for a fully active and healthy node, so must not be
|
||||
# DISABLED, UNHEALTHY or INACTIVE (last covers DISCONNECTED,
|
||||
# UNKNOWN, DISABLED, UNHEALTHY or INACTIVE (last covers DISCONNECTED,
|
||||
# BANNED or STOPPED)
|
||||
awk -F '|' -v ms="$_ms" \
|
||||
'BEGIN { ret = 2 }
|
||||
ms ~ "@" $2 "@" &&
|
||||
$5 == 0 && $6 == 0 && $8 == 0 { print $1, $2 ; ret=0 ; exit }
|
||||
$4 == 0 && $6 == 0 && $7 == 0 && $9 == 0 { print $1, $2 ; ret=0 ; exit }
|
||||
END { exit ret }' <<EOF ||
|
||||
$nodestatus_X
|
||||
EOF
|
||||
@ -116,7 +116,7 @@ EOF
|
||||
awk -F '|' -v ms="$_ms" \
|
||||
'BEGIN { ret = 2 }
|
||||
ms ~ "@" $2 "@" &&
|
||||
$3 == 0 && $5 == 0 && $7 == 0 { print $1, $2 ; ret=0 ; exit }
|
||||
$3 == 0 && $6 == 0 && $8 == 0 { print $1, $2 ; ret=0 ; exit }
|
||||
END { exit ret }' <<EOF ||
|
||||
$nodestatus_X
|
||||
EOF
|
||||
@ -125,7 +125,7 @@ EOF
|
||||
awk -F '|' -v ms="$_ms" \
|
||||
'BEGIN { ret = 2 }
|
||||
ms ~ "@" $2 "@" &&
|
||||
$3 == 0 && $5 == 0 { print $1, $2 ; ret=0 ; exit }
|
||||
$3 == 0 && $6 == 0 { print $1, $2 ; ret=0 ; exit }
|
||||
END { exit ret }' <<EOF
|
||||
$nodestatus_X
|
||||
EOF
|
||||
|
Loading…
Reference in New Issue
Block a user