diff --git a/ctdb/packaging/RPM/ctdb.spec b/ctdb/packaging/RPM/ctdb.spec index b2f7efa4873..83b43c00d40 100644 --- a/ctdb/packaging/RPM/ctdb.spec +++ b/ctdb/packaging/RPM/ctdb.spec @@ -4,7 +4,7 @@ Summary: Clustered TDB Vendor: Samba Team Packager: Samba Team Name: ctdb -Version: 1.0.83 +Version: 1.0.84 Release: 1 Epoch: 0 License: GNU GPL version 3 @@ -131,6 +131,8 @@ fi %{_libdir}/pkgconfig/ctdb.pc %changelog +* Tue Jun 2 2009 : Version 1.0.84 + - Fix a bug in onnode that could not handle dead nodes * Tue Jun 2 2009 : Version 1.0.83 - Document how to remove a ndoe from a running cluster. - Hide all deleted nodes from ctdb output. diff --git a/ctdb/server/ctdb_monitor.c b/ctdb/server/ctdb_monitor.c index 46d9c632293..14da3b77e4b 100644 --- a/ctdb/server/ctdb_monitor.c +++ b/ctdb/server/ctdb_monitor.c @@ -309,7 +309,7 @@ int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata) CTDB_SRVID_SET_NODE_FLAGS, indata); /* if we have become banned, we should go into recovery mode */ - if ((node->flags & NODE_FLAGS_BANNED) && !(c->old_flags & NODE_FLAGS_BANNED)) { + if ((node->flags & NODE_FLAGS_BANNED) && !(c->old_flags & NODE_FLAGS_BANNED) && (node->pnn == ctdb->pnn)) { /* make sure we are frozen */ DEBUG(DEBUG_NOTICE,("This node has been banned - forcing freeze and recovery\n")); /* Reset the generation id to 1 to make us ignore any diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index 21f7dc84899..5cd711d73f5 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -2021,7 +2021,7 @@ int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indat int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata) { struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr; - + int ret; /* verify the size of indata */ if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) { @@ -2039,7 +2039,14 @@ int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA inda return -1; } - return ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]); + ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]); + + if (ret != 0) { + DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n")); + return -1; + } + + return 0; } /* diff --git a/ctdb/tools/ctdb.c b/ctdb/tools/ctdb.c index 118b309d754..589533b6d00 100644 --- a/ctdb/tools/ctdb.c +++ b/ctdb/tools/ctdb.c @@ -383,20 +383,15 @@ struct pnn_node { int pnn; }; -/* - show the PNN of the current node - discover the pnn by loading the nodes file and try to bind to all - addresses one at a time until the ip address is found. - */ -static int control_xpnn(struct ctdb_context *ctdb, int argc, const char **argv) +static struct pnn_node *read_nodes_file(TALLOC_CTX *mem_ctx) { - TALLOC_CTX *mem_ctx = talloc_new(NULL); const char *nodes_list; int nlines; char **lines; + int i, pnn; struct pnn_node *pnn_nodes = NULL; struct pnn_node *pnn_node; - int i, pnn; + struct pnn_node *tmp_node; /* read the nodes file */ nodes_list = getenv("CTDB_NODES"); @@ -405,8 +400,7 @@ static int control_xpnn(struct ctdb_context *ctdb, int argc, const char **argv) } lines = file_lines_load(nodes_list, &nlines, mem_ctx); if (lines == NULL) { - ctdb_set_error(ctdb, "Failed to load nodes list '%s'\n", nodes_list); - return -1; + return NULL; } while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) { nlines--; @@ -420,6 +414,7 @@ static int control_xpnn(struct ctdb_context *ctdb, int argc, const char **argv) node++; } if (*node == '#') { + pnn++; continue; } if (strcmp(node, "") == 0) { @@ -428,26 +423,60 @@ static int control_xpnn(struct ctdb_context *ctdb, int argc, const char **argv) pnn_node = talloc(mem_ctx, struct pnn_node); pnn_node->pnn = pnn++; pnn_node->addr = talloc_strdup(pnn_node, node); - CTDB_NO_MEMORY(ctdb, pnn_node->addr); pnn_node->next = pnn_nodes; pnn_nodes = pnn_node; } + /* swap them around so we return them in incrementing order */ + pnn_node = pnn_nodes; + pnn_nodes = NULL; + while (pnn_node) { + tmp_node = pnn_node; + pnn_node = pnn_node->next; + + tmp_node->next = pnn_nodes; + pnn_nodes = tmp_node; + } + + return pnn_nodes; +} + +/* + show the PNN of the current node + discover the pnn by loading the nodes file and try to bind to all + addresses one at a time until the ip address is found. + */ +static int control_xpnn(struct ctdb_context *ctdb, int argc, const char **argv) +{ + TALLOC_CTX *mem_ctx = talloc_new(NULL); + struct pnn_node *pnn_nodes; + struct pnn_node *pnn_node; + + pnn_nodes = read_nodes_file(mem_ctx); + if (pnn_nodes == NULL) { + DEBUG(DEBUG_ERR,("Failed to read nodes file\n")); + talloc_free(mem_ctx); + return -1; + } + for(pnn_node=pnn_nodes;pnn_node;pnn_node=pnn_node->next) { ctdb_sock_addr addr; if (parse_ip(pnn_node->addr, NULL, 63999, &addr) == 0) { - DEBUG(DEBUG_ERR,("Wrongly formed ip address '%s' in nodes file %s\n", pnn_node->addr, nodes_list)); + DEBUG(DEBUG_ERR,("Wrongly formed ip address '%s' in nodes file\n", pnn_node->addr)); + talloc_free(mem_ctx); return -1; } if (ctdb_sys_have_ip(&addr)) { printf("PNN:%d\n", pnn_node->pnn); + talloc_free(mem_ctx); return 0; } } printf("Failed to detect which PNN this node is\n"); + talloc_free(mem_ctx); return -1; } @@ -762,39 +791,6 @@ static int control_get_tickles(struct ctdb_context *ctdb, int argc, const char * return 0; } -/* send a release ip to all nodes */ -static int control_send_release(struct ctdb_context *ctdb, uint32_t pnn, -ctdb_sock_addr *addr) -{ - int ret; - struct ctdb_public_ip pip; - TDB_DATA data; - struct ctdb_node_map *nodemap=NULL; - - ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &nodemap); - if (ret != 0) { - DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n")); - return ret; - } - - /* send a moveip message to the recovery master */ - pip.pnn = pnn; - pip.addr = *addr; - data.dsize = sizeof(pip); - data.dptr = (unsigned char *)&pip; - - - /* send release ip to all nodes */ - if (ctdb_client_async_control(ctdb, CTDB_CONTROL_RELEASE_IP, - list_of_active_nodes(ctdb, nodemap, ctdb, true), - TIMELIMIT(), false, data, - NULL, NULL, NULL) != 0) { - DEBUG(DEBUG_ERR, (__location__ " Unable to send 'ReleaseIP' to all nodes.\n")); - return -1; - } - - return 0; -} /* move/failover an ip address to a specific node @@ -803,49 +799,39 @@ static int control_moveip(struct ctdb_context *ctdb, int argc, const char **argv { uint32_t pnn; ctdb_sock_addr addr; - uint32_t value; struct ctdb_all_public_ips *ips; + struct ctdb_public_ip ip; + uint32_t *nodes; int i, ret; + TDB_DATA data; + struct ctdb_node_map *nodemap=NULL; + TALLOC_CTX *tmp_ctx = talloc_new(ctdb); if (argc < 2) { usage(); + talloc_free(tmp_ctx); + return -1; } if (parse_ip(argv[0], NULL, 0, &addr) == 0) { DEBUG(DEBUG_ERR,("Wrongly formed ip address '%s'\n", argv[0])); + talloc_free(tmp_ctx); return -1; } if (sscanf(argv[1], "%u", &pnn) != 1) { DEBUG(DEBUG_ERR, ("Badly formed pnn\n")); + talloc_free(tmp_ctx); return -1; } - ret = ctdb_ctrl_get_tunable(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, "DeterministicIPs", &value); - if (ret == -1) { - DEBUG(DEBUG_ERR, ("Unable to get tunable variable 'DeterministicIPs' from local node\n")); - return -1; - } - if (value != 0) { - DEBUG(DEBUG_ERR, ("The tunable 'DeterministicIPs' is set. You can only move ip addresses when this feature is disabled\n")); - return -1; - } - - ret = ctdb_ctrl_get_tunable(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, "NoIPFailback", &value); - if (ret == -1) { - DEBUG(DEBUG_ERR, ("Unable to get tunable variable 'NoIPFailback' from local node\n")); - return -1; - } - if (value == 0) { - DEBUG(DEBUG_ERR, ("The tunable 'NoIPFailback' is NOT set. You can only move ip addresses when this feature is enabled\n")); - return -1; - } /* read the public ip list from the node */ ret = ctdb_ctrl_get_public_ips(ctdb, TIMELIMIT(), pnn, ctdb, &ips); if (ret != 0) { DEBUG(DEBUG_ERR, ("Unable to get public ip list from node %u\n", pnn)); + talloc_free(tmp_ctx); return -1; } @@ -857,20 +843,49 @@ static int control_moveip(struct ctdb_context *ctdb, int argc, const char **argv if (i==ips->num) { DEBUG(DEBUG_ERR, ("Node %u can not host ip address '%s'\n", pnn, ctdb_addr_to_str(&addr))); + talloc_free(tmp_ctx); return -1; } if (ips->ips[i].pnn == pnn) { DEBUG(DEBUG_ERR, ("Host %u is already hosting '%s'\n", pnn, ctdb_addr_to_str(&ips->ips[i].addr))); + talloc_free(tmp_ctx); return -1; } - ret = control_send_release(ctdb, pnn, &ips->ips[i].addr); + ip.pnn = pnn; + ip.addr = addr; + + data.dptr = (uint8_t *)&ip; + data.dsize = sizeof(ip); + + ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), options.pnn, tmp_ctx, &nodemap); if (ret != 0) { - DEBUG(DEBUG_ERR, ("Failed to send 'change ip' to all nodes\n"));; + DEBUG(DEBUG_ERR, ("Unable to get nodemap from node %u\n", options.pnn)); + talloc_free(tmp_ctx); + return ret; + } + + nodes = list_of_active_nodes(ctdb, nodemap, tmp_ctx, true); + ret = ctdb_client_async_control(ctdb, CTDB_CONTROL_RELEASE_IP, + nodes, TIMELIMIT(), + false, data, + NULL, NULL, + NULL); + if (ret != 0) { + DEBUG(DEBUG_ERR,("Failed to release IP on nodes\n")); + talloc_free(tmp_ctx); return -1; } + ret = ctdb_ctrl_takeover_ip(ctdb, TIMELIMIT(), pnn, &ip); + if (ret != 0) { + DEBUG(DEBUG_ERR,("Failed to take over IP on node %d\n", pnn)); + talloc_free(tmp_ctx); + return -1; + } + + talloc_free(tmp_ctx); return 0; } @@ -1063,6 +1078,15 @@ static int control_addip(struct ctdb_context *ctdb, int argc, const char **argv) } + /* check if some other node is already serving this ip, if not, + * we will claim it + */ + for (i=0;inum;i++) { + if (ctdb_same_ip(&addr, &ips->ips[i].addr)) { + break; + } + } + len = offsetof(struct ctdb_control_ip_iface, iface) + strlen(argv[1]) + 1; pub = talloc_size(tmp_ctx, len); CTDB_NO_MEMORY(ctdb, pub); @@ -1079,22 +1103,21 @@ static int control_addip(struct ctdb_context *ctdb, int argc, const char **argv) return ret; } - - /* check if some other node is already serving this ip, if not, - * we will claim it - */ - for (i=0;inum;i++) { - if (ctdb_same_ip(&addr, &ips->ips[i].addr)) { - break; - } - } /* no one has this ip so we claim it */ if (i == ips->num) { - ret = control_send_release(ctdb, options.pnn, &addr); - } else { - ret = control_send_release(ctdb, ips->ips[i].pnn, &addr); + struct ctdb_public_ip ip; + + ip.pnn = options.pnn; + ip.addr = addr; + + ret = ctdb_ctrl_takeover_ip(ctdb, TIMELIMIT(), options.pnn, &ip); + if (ret != 0) { + DEBUG(DEBUG_ERR,("Failed to take over IP on node %d\n", options.pnn)); + return -1; + } } + if (ret != 0) { DEBUG(DEBUG_ERR, ("Failed to send 'change ip' to all nodes\n")); return -1; @@ -1226,9 +1249,15 @@ static int control_delip(struct ctdb_context *ctdb, int argc, const char **argv) if (ips->ips[i].pnn == options.pnn) { ret = find_other_host_for_public_ip(ctdb, &addr); if (ret != -1) { - ret = control_send_release(ctdb, ret, &addr); + struct ctdb_public_ip ip; + + ip.pnn = ret; + ip.addr = addr; + + ret = ctdb_ctrl_takeover_ip(ctdb, TIMELIMIT(), ret, &ip); if (ret != 0) { - DEBUG(DEBUG_ERR, ("Failed to migrate this ip to another node. Use moveip of recover to reassign this address to a node\n")); + DEBUG(DEBUG_ERR,("Failed to take over IP on node %d\n", options.pnn)); + return -1; } } } @@ -2787,23 +2816,59 @@ static int control_rddumpmemory(struct ctdb_context *ctdb, int argc, const char /* list all nodes in the cluster + if the daemon is running, we read the data from the daemon. + if the daemon is not running we parse the nodes file directly */ static int control_listnodes(struct ctdb_context *ctdb, int argc, const char **argv) { int i, ret; struct ctdb_node_map *nodemap=NULL; - ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), options.pnn, ctdb, &nodemap); - if (ret != 0) { - DEBUG(DEBUG_ERR, ("Unable to get nodemap from node %u\n", options.pnn)); - return ret; - } - - for(i=0;inum;i++){ - if (nodemap->nodes[i].flags & NODE_FLAGS_DELETED) { - continue; + if (ctdb != NULL) { + ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), options.pnn, ctdb, &nodemap); + if (ret != 0) { + DEBUG(DEBUG_ERR, ("Unable to get nodemap from node %u\n", options.pnn)); + return ret; } - printf("%s\n", ctdb_addr_to_str(&nodemap->nodes[i].addr)); + + for(i=0;inum;i++){ + if (nodemap->nodes[i].flags & NODE_FLAGS_DELETED) { + continue; + } + if (options.machinereadable){ + printf(":%d:%s:\n", nodemap->nodes[i].pnn, ctdb_addr_to_str(&nodemap->nodes[i].addr)); + } else { + printf("%s\n", ctdb_addr_to_str(&nodemap->nodes[i].addr)); + } + } + } else { + TALLOC_CTX *mem_ctx = talloc_new(NULL); + struct pnn_node *pnn_nodes; + struct pnn_node *pnn_node; + + pnn_nodes = read_nodes_file(mem_ctx); + if (pnn_nodes == NULL) { + DEBUG(DEBUG_ERR,("Failed to read nodes file\n")); + talloc_free(mem_ctx); + return -1; + } + + for(pnn_node=pnn_nodes;pnn_node;pnn_node=pnn_node->next) { + ctdb_sock_addr addr; + + if (parse_ip(pnn_node->addr, NULL, 63999, &addr) == 0) { + DEBUG(DEBUG_ERR,("Wrongly formed ip address '%s' in nodes file\n", pnn_node->addr)); + talloc_free(mem_ctx); + return -1; + } + + if (options.machinereadable){ + printf(":%d:%s:\n", pnn_node->pnn, pnn_node->addr); + } else { + printf("%s\n", pnn_node->addr); + } + } + talloc_free(mem_ctx); } return 0; @@ -2885,8 +2950,8 @@ static const struct { { "pnn", control_pnn, true, false, "show the pnn of the currnet node" }, { "lvs", control_lvs, true, false, "show lvs configuration" }, { "lvsmaster", control_lvsmaster, true, false, "show which node is the lvs master" }, - { "disablemonitor", control_disable_monmode, true, false, "set monitoring mode to DISABLE" }, - { "enablemonitor", control_enable_monmode, true, false, "set monitoring mode to ACTIVE" }, + { "disablemonitor", control_disable_monmode,true, false, "set monitoring mode to DISABLE" }, + { "enablemonitor", control_enable_monmode, true, false, "set monitoring mode to ACTIVE" }, { "setdebug", control_setdebug, true, false, "set debug level", "" }, { "getdebug", control_getdebug, true, false, "get debug level" }, { "attach", control_attach, true, false, "attach to a database", "" }, @@ -2913,19 +2978,19 @@ static const struct { { "getsrvids", getsrvids, false, false, "get a list of all server ids"}, { "vacuum", ctdb_vacuum, false, false, "vacuum the databases of empty records", "[max_records]"}, { "repack", ctdb_repack, false, false, "repack all databases", "[max_freelist]"}, - { "listnodes", control_listnodes, false, false, "list all nodes in the cluster"}, - { "reloadnodes", control_reload_nodes_file, false, false, "reload the nodes file and restart the transport on all nodes"}, + { "listnodes", control_listnodes, false, true, "list all nodes in the cluster"}, + { "reloadnodes", control_reload_nodes_file, false, false, "reload the nodes file and restart the transport on all nodes"}, { "moveip", control_moveip, false, false, "move/failover an ip address to another node", " "}, { "addip", control_addip, true, false, "add a ip address to a node", " "}, { "delip", control_delip, false, false, "delete an ip address from a node", ""}, { "eventscript", control_eventscript, true, false, "run the eventscript with the given parameters on a node", ""}, { "backupdb", control_backupdb, false, false, "backup the database into a file.", " "}, - { "restoredb", control_restoredb, false, false, "restore the database from a file.", ""}, - { "recmaster", control_recmaster, false, false, "show the pnn for the recovery master."}, - { "setflags", control_setflags, false, false, "set flags for a node in the nodemap.", " "}, - { "scriptstatus", control_scriptstatus, false, false, "show the status of the monitoring scripts"}, - { "natgwlist", control_natgwlist, false, false, "show the nodes belonging to this natgw configuration"}, - { "xpnn", control_xpnn, true, true, "find the pnn of the local node without talking to the daemon (unreliable)" }, + { "restoredb", control_restoredb, false, false, "restore the database from a file.", ""}, + { "recmaster", control_recmaster, false, false, "show the pnn for the recovery master."}, + { "setflags", control_setflags, false, false, "set flags for a node in the nodemap.", " "}, + { "scriptstatus", control_scriptstatus, false, false, "show the status of the monitoring scripts"}, + { "natgwlist", control_natgwlist, false, false, "show the nodes belonging to this natgw configuration"}, + { "xpnn", control_xpnn, true, true, "find the pnn of the local node without talking to the daemon (unreliable)" }, }; /* @@ -3039,9 +3104,14 @@ int main(int argc, const char *argv[]) if (strcmp(control, ctdb_commands[i].name) == 0) { int j; + if (ctdb_commands[i].without_daemon == true) { + close(2); + } + + /* initialise ctdb */ + ctdb = ctdb_cmdline_client(ev); + if (ctdb_commands[i].without_daemon == false) { - /* initialise ctdb */ - ctdb = ctdb_cmdline_client(ev); if (ctdb == NULL) { DEBUG(DEBUG_ERR, ("Failed to init ctdb\n")); exit(1); diff --git a/ctdb/tools/onnode b/ctdb/tools/onnode index d7adbb05af3..aae77706534 100755 --- a/ctdb/tools/onnode +++ b/ctdb/tools/onnode @@ -95,7 +95,7 @@ echo_nth () shift $n local node="$1" - if [ -n "$node" ] ; then + if [ -n "$node" -a "$node" != "#DEAD" ] ; then echo $node else echo "${prog}: \"node ${n}\" does not exist" >&2 @@ -206,7 +206,7 @@ get_nodes () all_nodes="$CTDB_NODES_SOCKETS" else [ -f "$CTDB_NODES_FILE" ] || CTDB_NODES_FILE=/etc/ctdb/nodes - all_nodes=$(egrep '^[[:alnum:]]' $CTDB_NODES_FILE) + all_nodes=$(sed -e 's@#.*@@g' -e 's@ *@@g' -e 's@^$@#DEAD@' $CTDB_NODES_FILE) fi local nodes="" @@ -215,7 +215,8 @@ get_nodes () [ $? != 0 ] && exit 1 # Required to catch exit in above subshell. case "$n" in all) - echo $all_nodes ;; + echo "${all_nodes//#DEAD/}" + ;; ok|healthy) get_nodes_with_status "$all_nodes" "healthy" || exit 1 ;; diff --git a/ctdb/utils/ping_pong/ping_pong.c b/ctdb/utils/ping_pong/ping_pong.c index 27ff0577ac1..6061ee7a8f3 100644 --- a/ctdb/utils/ping_pong/ping_pong.c +++ b/ctdb/utils/ping_pong/ping_pong.c @@ -37,7 +37,9 @@ #include #include #include +#ifndef _AIX #include +#endif #include static struct timeval tp1,tp2;