tier/cli : printing a warning instead of skipping the node

Problem: skipping the status of the nodes down creates confusion to the user as one might see the status as completed for all nodes and while performing detach commit, the operation will fail as the node is down Fix: Display a warning message Note: When the last node is down (as per the peer list) then warning message can't be displayed as the total number of peers participating in the transaction is considered to be the total count. Change-Id: Ib7afbd1b26df3378e4d537db06f41f5c105ad86e BUG: 1324439 Signed-off-by: hari gowtham <hgowtham@redhat.com> Reviewed-on: http://review.gluster.org/14347 Tested-by: hari gowtham <hari.gowtham005@gmail.com> Smoke: Gluster Build System <jenkins@build.gluster.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Atin Mukherjee <amukherj@redhat.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
2016-05-16 10:55:17 +05:30 · 2016-05-16 10:55:17 +05:30 · d5e67cd2b2
commit d5e67cd2b2
parent 6837b2793b
3 changed files with 40 additions and 19 deletions
--- a/cli/src/cli-rpc-ops.c
+++ b/cli/src/cli-rpc-ops.c
@ -1531,7 +1531,8 @@ out:
 }

 int
-gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type)
+gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type,
+                               gf_boolean_t is_tier)
 {
        int                ret          = -1;
        int                count        = 0;
@ -1550,6 +1551,7 @@ gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type)
        int                hrs          = 0;
        int                min          = 0;
        int                sec          = 0;
+        gf_boolean_t       down         = _gf_false;

        ret = dict_get_int32 (dict, "count", &count);
        if (ret) {
@ -1584,6 +1586,7 @@ gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type)
                        gf_log ("cli", GF_LOG_TRACE, "failed to get status");
                        gf_log ("cli", GF_LOG_ERROR, "node down and has failed"
                                " to set dict");
+                        down = _gf_true;
                        continue;
                        /* skip this node if value not available*/
                } else if (ret) {
@ -1672,6 +1675,11 @@ gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type)
                }
                GF_FREE(size_str);
        }
+        if (is_tier && down)
+                cli_out ("WARNING: glusterd might be down on one or more nodes."
+                         " Please check the nodes that are down using \'gluster"
+                         " peer status\' and start the glusterd on those nodes,"
+                         " else tier detach commit might fail!");
 out:
        return ret;
 }
@ -1689,6 +1697,7 @@ gf_cli_print_tier_status (dict_t *dict, enum gf_task_types task_type)
        gf_defrag_status_t status_rcd   = GF_DEFRAG_STATUS_NOT_STARTED;
        char               *status_str  = NULL;
        char               *size_str    = NULL;
+        gf_boolean_t       down         = _gf_false;

        ret = dict_get_int32 (dict, "count", &count);
        if (ret) {
@ -1717,6 +1726,7 @@ gf_cli_print_tier_status (dict_t *dict, enum gf_task_types task_type)
                                "failed to get status", count, i);
                        gf_log ("cli", GF_LOG_ERROR, "node down and has failed"
                                " to set dict");
+                        down = _gf_true;
                        continue;
                        /*skipping this node as value unavailable*/
                } else if (ret) {
@ -1755,8 +1765,11 @@ gf_cli_print_tier_status (dict_t *dict, enum gf_task_types task_type)
                status_str = cli_vol_task_status_str[status_rcd];
                cli_out ("%-20s %-20"PRIu64" %-20"PRIu64" %-20s",
                         node_name, promoted, demoted, status_str);
-
        }
+        if (down)
+                cli_out ("WARNING: glusterd might be down on one or more nodes."
+                         " Please check the nodes that are down using \'gluster"
+                         " peer status\' and start the glusterd on those nodes.");
 out:
        return ret;
 }
@ -1914,9 +1927,14 @@ gf_cli_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,

        if (cmd == GF_DEFRAG_CMD_STATUS_TIER)
                ret = gf_cli_print_tier_status (dict, GF_TASK_TYPE_REBALANCE);
+        else if (cmd == GF_DEFRAG_CMD_DETACH_STATUS)
+                ret = gf_cli_print_rebalance_status (dict,
+                                                     GF_TASK_TYPE_REBALANCE,
+                                                     _gf_true);
        else
                ret = gf_cli_print_rebalance_status (dict,
-                                                     GF_TASK_TYPE_REBALANCE);
+                                                     GF_TASK_TYPE_REBALANCE,
+                                                     _gf_false);

        if (ret)
                gf_log ("cli", GF_LOG_ERROR,
@ -2531,7 +2549,8 @@ xml_output:
                goto out;
        }

-        ret = gf_cli_print_rebalance_status (dict, GF_TASK_TYPE_REMOVE_BRICK);
+        ret = gf_cli_print_rebalance_status (dict, GF_TASK_TYPE_REMOVE_BRICK,
+                                             _gf_true);
        if (ret) {
                gf_log ("cli", GF_LOG_ERROR, "Failed to print remove-brick "
                        "rebalance status");
@ -2716,7 +2735,8 @@ xml_output:
                goto out;
        }

-        ret = gf_cli_print_rebalance_status (dict, GF_TASK_TYPE_REMOVE_BRICK);
+        ret = gf_cli_print_rebalance_status (dict, GF_TASK_TYPE_REMOVE_BRICK,
+                                             _gf_false);
        if (ret) {
                gf_log ("cli", GF_LOG_ERROR, "Failed to print remove-brick "
                        "rebalance status");
--- a/tests/basic/tier/new-tier-cmds.t
+++ b/tests/basic/tier/new-tier-cmds.t
@ -19,18 +19,6 @@ function create_dist_tier_vol () {
        TEST $CLI_1 volume attach-tier $V0 $H1:$B1/${V0}_h1 $H2:$B2/${V0}_h2 $H3:$B3/${V0}_h3
 }

-function tier_detach_commit () {
-	$CLI_1 volume tier $V0 detach commit | grep "success" | wc -l
-}
-
-function tier_detach_status_node_down () {
-        $CLI_1 volume tier $V0 detach status | wc -l
-}
-
-function tier_status_node_down () {
-	$CLI_1 volume tier $V0 status | wc -l
-}
-
 cleanup;

 #setup cluster and test volume
@ -58,10 +46,10 @@ TEST $CLI_1 volume tier $V0 detach status
 TEST kill_node 2

 #check if we have the rest of the node available printed in the output of detach status
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" tier_detach_status_node_down
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_detach_status_node_down

 #check if we have the rest of the node available printed in the output of tier status
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "5" tier_status_node_down
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_status_node_down

 TEST $glusterd_2;

--- a/tests/tier.rc
+++ b/tests/tier.rc
@ -134,3 +134,16 @@ function rebalance_run_time () {

    echo $total;
 }
+
+function tier_detach_commit () {
+	$CLI_1 volume tier $V0 detach commit | grep "success" | wc -l
+}
+
+function tier_detach_status_node_down () {
+        $CLI_1 volume tier $V0 detach status | grep "WARNING" | wc -l
+}
+
+function tier_status_node_down () {
+	$CLI_1 volume tier $V0 status | grep "WARNING" | wc -l
+}
+