mirror of
https://github.com/samba-team/samba.git
synced 2025-01-11 05:18:09 +03:00
redo and update how we synchronize flags across the cluster.
this simplifies the code and should close a race condition between the local recovery daemon and a remote node when flags are changing. (This used to be ctdb commit 32d460b8469eb53145f04161a5d01166f9b5f09e)
This commit is contained in:
parent
bf67153402
commit
e54347fa4e
@ -639,27 +639,12 @@ static int pull_remote_database(struct ctdb_context *ctdb, struct ctdb_node_map
|
||||
/*
|
||||
update flags on all active nodes
|
||||
*/
|
||||
static int update_flags_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
|
||||
{
|
||||
int i;
|
||||
for (i=0;i<nodemap->num;i++) {
|
||||
int ret;
|
||||
|
||||
ret = ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[i].pnn, nodemap->nodes[i].flags, ~nodemap->nodes[i].flags);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Unable to update nodeflags on remote nodes\n"));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int update_our_flags_on_all_nodes(struct ctdb_context *ctdb, uint32_t pnn, struct ctdb_node_map *nodemap)
|
||||
static int update_flags_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t pnn, uint32_t flags)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[pnn].pnn, nodemap->nodes[pnn].flags, ~nodemap->nodes[pnn].flags);
|
||||
if (ret != 0) {
|
||||
ret = ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), pnn, flags, ~flags);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Unable to update nodeflags on remote nodes\n"));
|
||||
return -1;
|
||||
}
|
||||
@ -1513,12 +1498,18 @@ static int do_recovery(struct ctdb_recoverd *rec,
|
||||
/*
|
||||
update all nodes to have the same flags that we have
|
||||
*/
|
||||
ret = update_flags_on_all_nodes(ctdb, nodemap);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Unable to update flags on all nodes\n"));
|
||||
return -1;
|
||||
for (i=0;i<nodemap->num;i++) {
|
||||
if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = update_flags_on_all_nodes(ctdb, nodemap, i, nodemap->nodes[i].flags);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Unable to update flags on all nodes for node %d\n", i));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
DEBUG(DEBUG_NOTICE, (__location__ " Recovery - updated flags\n"));
|
||||
|
||||
/* disable recovery mode */
|
||||
@ -2271,6 +2262,51 @@ static int verify_ip_allocation(struct ctdb_context *ctdb, uint32_t pnn)
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static void async_getnodemap_callback(struct ctdb_context *ctdb, uint32_t node_pnn, int32_t res, TDB_DATA outdata, void *callback_data)
|
||||
{
|
||||
struct ctdb_node_map **remote_nodemaps = callback_data;
|
||||
|
||||
if (node_pnn >= ctdb->num_nodes) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " pnn from invalid node\n"));
|
||||
return;
|
||||
}
|
||||
|
||||
remote_nodemaps[node_pnn] = (struct ctdb_node_map *)talloc_steal(remote_nodemaps, outdata.dptr);
|
||||
|
||||
}
|
||||
|
||||
static int get_remote_nodemaps(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx,
|
||||
struct ctdb_node_map *nodemap,
|
||||
struct ctdb_node_map ***remote_nodemaps)
|
||||
{
|
||||
uint32_t *nodes;
|
||||
int i;
|
||||
|
||||
*remote_nodemaps = talloc_array(mem_ctx, struct ctdb_node_map *, nodemap->num);
|
||||
if (*remote_nodemaps == NULL) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " failed to allocate remote nodemap array\n"));
|
||||
return -1;
|
||||
}
|
||||
for(i=0; i<nodemap->num; i++) {
|
||||
(*remote_nodemaps)[i] = NULL;
|
||||
}
|
||||
|
||||
nodes = list_of_active_nodes(ctdb, nodemap, mem_ctx, true);
|
||||
if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_NODEMAP,
|
||||
nodes,
|
||||
CONTROL_TIMEOUT(), false, tdb_null,
|
||||
async_getnodemap_callback,
|
||||
NULL,
|
||||
*remote_nodemaps) != 0) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Unable to pull all remote nodemaps\n"));
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
the main monitoring loop
|
||||
*/
|
||||
@ -2279,7 +2315,8 @@ static void monitor_cluster(struct ctdb_context *ctdb)
|
||||
uint32_t pnn;
|
||||
TALLOC_CTX *mem_ctx=NULL;
|
||||
struct ctdb_node_map *nodemap=NULL;
|
||||
struct ctdb_node_map *remote_nodemap=NULL;
|
||||
struct ctdb_node_map *recmaster_nodemap=NULL;
|
||||
struct ctdb_node_map **remote_nodemaps=NULL;
|
||||
struct ctdb_vnn_map *vnnmap=NULL;
|
||||
struct ctdb_vnn_map *remote_vnnmap=NULL;
|
||||
int32_t debug_level;
|
||||
@ -2484,7 +2521,7 @@ again:
|
||||
|
||||
/* grap the nodemap from the recovery master to check if it is banned */
|
||||
ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn,
|
||||
mem_ctx, &remote_nodemap);
|
||||
mem_ctx, &recmaster_nodemap);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Unable to get nodemap from recovery master %u\n",
|
||||
nodemap->nodes[j].pnn));
|
||||
@ -2492,21 +2529,13 @@ again:
|
||||
}
|
||||
|
||||
|
||||
if (remote_nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
|
||||
if (recmaster_nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
|
||||
DEBUG(DEBUG_NOTICE, ("Recmaster node %u no longer available. Force reelection\n", nodemap->nodes[j].pnn));
|
||||
force_election(rec, pnn, nodemap);
|
||||
goto again;
|
||||
}
|
||||
|
||||
|
||||
/* verify that we and the recmaster agrees on our flags */
|
||||
if (nodemap->nodes[pnn].flags != remote_nodemap->nodes[pnn].flags) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Recmaster disagrees on our flags flags:0x%x recmaster_flags:0x%x Broadcasting out flags.\n", nodemap->nodes[pnn].flags, remote_nodemap->nodes[pnn].flags));
|
||||
|
||||
update_our_flags_on_all_nodes(ctdb, pnn, nodemap);
|
||||
}
|
||||
|
||||
|
||||
/* verify that we have all ip addresses we should have and we dont
|
||||
* have addresses we shouldnt have.
|
||||
*/
|
||||
@ -2619,31 +2648,27 @@ again:
|
||||
goto again;
|
||||
}
|
||||
|
||||
/* get the nodemap for all active remote nodes and verify
|
||||
they are the same as for this node
|
||||
|
||||
/* get the nodemap for all active remote nodes
|
||||
*/
|
||||
if (get_remote_nodemaps(ctdb, mem_ctx, nodemap, &remote_nodemaps) != 0) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Failed to read remote nodemaps\n"));
|
||||
goto again;
|
||||
}
|
||||
|
||||
/* verify that all other nodes have the same nodemap as we have
|
||||
*/
|
||||
for (j=0; j<nodemap->num; j++) {
|
||||
if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
|
||||
continue;
|
||||
}
|
||||
if (nodemap->nodes[j].pnn == pnn) {
|
||||
if (nodemap->nodes[j].flags & NODE_FLAGS_DISCONNECTED) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn,
|
||||
mem_ctx, &remote_nodemap);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Unable to get nodemap from remote node %u\n",
|
||||
nodemap->nodes[j].pnn));
|
||||
goto again;
|
||||
}
|
||||
|
||||
/* if the nodes disagree on how many nodes there are
|
||||
/* if the nodes disagree on how many nodes there are
|
||||
then this is a good reason to try recovery
|
||||
*/
|
||||
if (remote_nodemap->num != nodemap->num) {
|
||||
if (remote_nodemaps[j]->num != nodemap->num) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Remote node:%u has different node count. %u vs %u of the local node\n",
|
||||
nodemap->nodes[j].pnn, remote_nodemap->num, nodemap->num));
|
||||
nodemap->nodes[j].pnn, remote_nodemaps[j]->num, nodemap->num));
|
||||
do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap, nodemap->nodes[j].pnn);
|
||||
goto again;
|
||||
}
|
||||
@ -2652,25 +2677,44 @@ again:
|
||||
active, then that is also a good reason to do recovery
|
||||
*/
|
||||
for (i=0;i<nodemap->num;i++) {
|
||||
if (remote_nodemap->nodes[i].pnn != nodemap->nodes[i].pnn) {
|
||||
if (remote_nodemaps[j]->nodes[i].pnn != nodemap->nodes[i].pnn) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Remote node:%u has different nodemap pnn for %d (%u vs %u).\n",
|
||||
nodemap->nodes[j].pnn, i,
|
||||
remote_nodemap->nodes[i].pnn, nodemap->nodes[i].pnn));
|
||||
do_recovery(rec, mem_ctx, pnn, nodemap,
|
||||
vnnmap, nodemap->nodes[j].pnn);
|
||||
goto again;
|
||||
}
|
||||
if ((remote_nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) !=
|
||||
(nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE)) {
|
||||
DEBUG(DEBUG_WARNING, (__location__ " Remote node:%u has different nodemap flag for %d (0x%x vs 0x%x)\n",
|
||||
nodemap->nodes[j].pnn, i,
|
||||
remote_nodemap->nodes[i].flags, nodemap->nodes[i].flags));
|
||||
remote_nodemaps[j]->nodes[i].pnn, nodemap->nodes[i].pnn));
|
||||
do_recovery(rec, mem_ctx, pnn, nodemap,
|
||||
vnnmap, nodemap->nodes[j].pnn);
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
|
||||
/* verify the flags are consistent
|
||||
*/
|
||||
for (i=0; i<nodemap->num; i++) {
|
||||
if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (nodemap->nodes[i].flags != remote_nodemaps[j]->nodes[i].flags) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Remote node:%u has different flags for node %u. It has 0x%02x vs our 0x%02x\n",
|
||||
nodemap->nodes[j].pnn,
|
||||
nodemap->nodes[i].pnn,
|
||||
remote_nodemaps[j]->nodes[i].flags,
|
||||
nodemap->nodes[j].flags));
|
||||
if (i == j) {
|
||||
DEBUG(DEBUG_ERR,("Use flags 0x%02x from remote node %d for cluster update of its own flags\n", remote_nodemaps[j]->nodes[i].flags, j));
|
||||
update_flags_on_all_nodes(ctdb, nodemap, nodemap->nodes[i].pnn, remote_nodemaps[j]->nodes[i].flags);
|
||||
do_recovery(rec, mem_ctx, pnn, nodemap,
|
||||
vnnmap, nodemap->nodes[j].pnn);
|
||||
goto again;
|
||||
} else {
|
||||
DEBUG(DEBUG_ERR,("Use flags 0x%02x from local recmaster node for cluster update of node %d flags\n", nodemap->nodes[i].flags, i));
|
||||
update_flags_on_all_nodes(ctdb, nodemap, nodemap->nodes[i].pnn, nodemap->nodes[i].flags);
|
||||
do_recovery(rec, mem_ctx, pnn, nodemap,
|
||||
vnnmap, nodemap->nodes[j].pnn);
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -153,7 +153,6 @@ void ctdb_tcp_node_connect(struct event_context *ev, struct timed_event *te,
|
||||
return;
|
||||
}
|
||||
|
||||
DEBUG(DEBUG_ERR,("create socket...\n"));
|
||||
tnode->fd = socket(sock_out.sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
|
||||
set_nonblocking(tnode->fd);
|
||||
set_close_on_exec(tnode->fd);
|
||||
|
@ -2279,6 +2279,50 @@ static int control_restoredb(struct ctdb_context *ctdb, int argc, const char **a
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* set flags of a node in the nodemap
|
||||
*/
|
||||
static int control_setflags(struct ctdb_context *ctdb, int argc, const char **argv)
|
||||
{
|
||||
int ret;
|
||||
int32_t status;
|
||||
int node;
|
||||
int flags;
|
||||
TDB_DATA data;
|
||||
struct ctdb_node_flag_change c;
|
||||
|
||||
if (argc != 2) {
|
||||
usage();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (sscanf(argv[0], "%d", &node) != 1) {
|
||||
DEBUG(DEBUG_ERR, ("Badly formed node\n"));
|
||||
usage();
|
||||
return -1;
|
||||
}
|
||||
if (sscanf(argv[1], "0x%x", &flags) != 1) {
|
||||
DEBUG(DEBUG_ERR, ("Badly formed flags\n"));
|
||||
usage();
|
||||
return -1;
|
||||
}
|
||||
|
||||
c.pnn = node;
|
||||
c.old_flags = 0;
|
||||
c.new_flags = flags;
|
||||
|
||||
data.dsize = sizeof(c);
|
||||
data.dptr = (unsigned char *)&c;
|
||||
|
||||
ret = ctdb_control(ctdb, options.pnn, 0, CTDB_CONTROL_MODIFY_FLAGS, 0,
|
||||
data, NULL, NULL, &status, NULL, NULL);
|
||||
if (ret != 0 || status != 0) {
|
||||
DEBUG(DEBUG_ERR,("Failed to modify flags\n"));
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
dump memory usage
|
||||
*/
|
||||
@ -2483,6 +2527,7 @@ static const struct {
|
||||
{ "backupdb", control_backupdb, false, "backup the database into a file.", "<database> <file>"},
|
||||
{ "restoredb", control_restoredb, false, "restore the database from a file.", "<file>"},
|
||||
{ "recmaster", control_recmaster, false, "show the pnn for the recovery master."},
|
||||
{ "setflags", control_setflags, false, "set flags for a node in the nodemap.", "<node> <flags>"},
|
||||
};
|
||||
|
||||
/*
|
||||
|
Loading…
Reference in New Issue
Block a user