From 794f125802969a6b99f2758f70d7c2318309d924 Mon Sep 17 00:00:00 2001 From: Vinit Agnihotri Date: Tue, 26 Apr 2022 17:20:21 +1000 Subject: [PATCH] ctdb-tool: Add UNKNOWN pseudo state When a node is starting, CTDB reports remote nodes as unhealthy by default. This can be misleading. To hide this, report an "UNKNOWN" pseudo state when a remote node is not disconnected and the runstate is less than or equal to "FIRST_RECOVERY". Signed-off-by: Vinit Agnihotri Signed-off-by: Martin Schwenke Reviewed-by: Amitay Isaacs --- ctdb/tests/UNIT/tool/ctdb.nodestatus.001.sh | 8 +- ctdb/tests/UNIT/tool/ctdb.nodestatus.002.sh | 8 +- ctdb/tests/UNIT/tool/ctdb.nodestatus.003.sh | 8 +- ctdb/tests/UNIT/tool/ctdb.nodestatus.004.sh | 4 +- ctdb/tests/UNIT/tool/ctdb.nodestatus.005.sh | 4 +- ctdb/tests/UNIT/tool/ctdb.nodestatus.006.sh | 4 +- ctdb/tests/UNIT/tool/ctdb.status.001.sh | 8 +- ctdb/tests/UNIT/tool/ctdb.status.002.sh | 8 +- ctdb/tests/scripts/integration.bash | 16 ++-- ctdb/tools/ctdb.c | 83 ++++++++++++++++++++- ctdb/tools/ctdb_lvs | 8 +- ctdb/tools/ctdb_natgw | 10 +-- 12 files changed, 122 insertions(+), 47 deletions(-) diff --git a/ctdb/tests/UNIT/tool/ctdb.nodestatus.001.sh b/ctdb/tests/UNIT/tool/ctdb.nodestatus.001.sh index 2217afcc0b9..3c754e2a838 100755 --- a/ctdb/tests/UNIT/tool/ctdb.nodestatus.001.sh +++ b/ctdb/tests/UNIT/tool/ctdb.nodestatus.001.sh @@ -25,9 +25,9 @@ EOF simple_test all required_result 0 <ev, + ctdb->client, + ctdb->cmd_pnn, + TIMEOUT(), + &runstate); + if (ret != 0 ) { + printf("Unable to get runstate"); + return NULL; + } + + nodemap = talloc_nodemap(mem_ctx, nodemap_in); + if (nodemap == NULL) { + printf("Unable to get nodemap"); + return NULL; + } + + nodemap->num = nodemap_in->num; + for (i=0; inum; i++) { + struct ctdb_node_and_flags *node_in = &nodemap_in->node[i]; + struct ctdb_node_and_flags *node = &nodemap->node[i]; + + *node = *node_in; + + if (node->flags & NODE_FLAGS_DELETED) { + continue; + } + + if ((runstate <= CTDB_RUNSTATE_FIRST_RECOVERY) && + !(node->flags & NODE_FLAGS_DISCONNECTED) && + (node->pnn != ctdb->cmd_pnn)) { + node->flags = NODE_FLAGS_UNKNOWN; + } + } + + return nodemap; +} + /* Compare IP address */ static bool ctdb_same_ip(ctdb_sock_addr *ip1, ctdb_sock_addr *ip2) { @@ -826,11 +887,12 @@ static void print_nodemap_machine(TALLOC_CTX *mem_ctx, struct ctdb_node_and_flags *node; unsigned int i; - printf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + printf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", options.sep, "Node", options.sep, "IP", options.sep, "Disconnected", options.sep, + "Unknown", options.sep, "Banned", options.sep, "Disabled", options.sep, "Unhealthy", options.sep, @@ -845,12 +907,13 @@ static void print_nodemap_machine(TALLOC_CTX *mem_ctx, continue; } - printf("%s%u%s%s%s%d%s%d%s%d%s%d%s%d%s%d%s%d%s%c%s\n", + printf("%s%u%s%s%s%d%s%d%s%d%s%d%s%d%s%d%s%d%s%d%s%c%s\n", options.sep, node->pnn, options.sep, ctdb_sock_addr_to_string(mem_ctx, &node->addr, false), options.sep, !! (node->flags & NODE_FLAGS_DISCONNECTED), options.sep, + !! (node->flags & NODE_FLAGS_UNKNOWN), options.sep, !! (node->flags & NODE_FLAGS_BANNED), options.sep, !! (node->flags & NODE_FLAGS_PERMANENTLY_DISABLED), options.sep, @@ -935,6 +998,7 @@ static void print_status(TALLOC_CTX *mem_ctx, static int control_status(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb, int argc, const char **argv) { + struct ctdb_node_map *nodemap_in; struct ctdb_node_map *nodemap; struct ctdb_vnn_map *vnnmap; int recmode; @@ -945,7 +1009,12 @@ static int control_status(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb, usage("status"); } - nodemap = get_nodemap(ctdb, false); + nodemap_in = get_nodemap(ctdb, false); + if (nodemap_in == NULL) { + return 1; + } + + nodemap = get_nodemap_unknown(mem_ctx, ctdb, nodemap_in); if (nodemap == NULL) { return 1; } @@ -5603,6 +5672,7 @@ static int control_nodestatus(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb, int argc, const char **argv) { const char *nodestring = NULL; + struct ctdb_node_map *nodemap_in; struct ctdb_node_map *nodemap; unsigned int i; int ret; @@ -5619,7 +5689,12 @@ static int control_nodestatus(TALLOC_CTX *mem_ctx, struct ctdb_context *ctdb, } } - if (! parse_nodestring(mem_ctx, ctdb, nodestring, &nodemap)) { + if (! parse_nodestring(mem_ctx, ctdb, nodestring, &nodemap_in)) { + return 1; + } + + nodemap = get_nodemap_unknown(mem_ctx, ctdb, nodemap_in); + if (nodemap == NULL) { return 1; } diff --git a/ctdb/tools/ctdb_lvs b/ctdb/tools/ctdb_lvs index ee521ba2bc1..d0249b9d4f4 100755 --- a/ctdb/tools/ctdb_lvs +++ b/ctdb/tools/ctdb_lvs @@ -32,7 +32,7 @@ EOF nodestatus_X="" # Fields are: -# Node|IP|Disconnected|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode +# Node|IP|Disconnected|Unknown|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode get_nodestatus_X () { # Result is cached in global variable nodestatus_X @@ -100,11 +100,11 @@ filter_nodes () # them, so the first to succeed will print the nodes. # First try for a fully active and healthy node, so must not - # be DISABLED, UNHEALTHY or INACTIVE (last covers + # be UNKNOWN, DISABLED, UNHEALTHY or INACTIVE (last covers # DISCONNECTED, BANNED or STOPPED) awk -F '|' -v ns="$_ns" ' BEGIN { ret = 255 } - ns ~ "@" $2 "@" && $5 == 0 && $6 == 0 && $8 == 0 { + ns ~ "@" $2 "@" && $4 == 0 && $6 == 0 && $7 == 0 && $9 == 0 { print $1, $2 ; ret=0 } END { exit ret } @@ -115,7 +115,7 @@ EOF # DISABLED awk -F '|' -v ns="$_ns" ' BEGIN { ret = 255 } - ns ~ "@" $2 "@" && $5 == 0 && $8 == 0 { + ns ~ "@" $2 "@" && $6 == 0 && $9 == 0 { print $1, $2 ; ret=0 } END { exit ret } diff --git a/ctdb/tools/ctdb_natgw b/ctdb/tools/ctdb_natgw index b37b7d34032..728cd9c9b32 100755 --- a/ctdb/tools/ctdb_natgw +++ b/ctdb/tools/ctdb_natgw @@ -32,7 +32,7 @@ EOF nodestatus_X="" # Fields are: -# Node|IP|Disconnected|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode +# Node|IP|Disconnected|Unknown|Banned|Disabled|Unhealthy|Stopped|Inactive|PartiallyOnline|ThisNode get_nodestatus_X () { # Result is cached in global variable nodestatus_X @@ -102,12 +102,12 @@ EOF # the first to succeed will select the leader node. # First try for a fully active and healthy node, so must not be - # DISABLED, UNHEALTHY or INACTIVE (last covers DISCONNECTED, + # UNKNOWN, DISABLED, UNHEALTHY or INACTIVE (last covers DISCONNECTED, # BANNED or STOPPED) awk -F '|' -v ms="$_ms" \ 'BEGIN { ret = 2 } ms ~ "@" $2 "@" && - $5 == 0 && $6 == 0 && $8 == 0 { print $1, $2 ; ret=0 ; exit } + $4 == 0 && $6 == 0 && $7 == 0 && $9 == 0 { print $1, $2 ; ret=0 ; exit } END { exit ret }' <