diff --git a/ctdb/common/ctdb_client.c b/ctdb/common/ctdb_client.c index ee195569183..cdaea470b31 100644 --- a/ctdb/common/ctdb_client.c +++ b/ctdb/common/ctdb_client.c @@ -812,7 +812,7 @@ int ctdb_ctrl_getvnnmap(struct ctdb_context *ctdb, struct timeval timeout, uint3 ZERO_STRUCT(data); ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GETVNNMAP, 0, data, - ctdb, &outdata, &res, &timeout); + mem_ctx, &outdata, &res, &timeout); if (ret != 0 || res != 0) { DEBUG(0,(__location__ " ctdb_control for getvnnmap failed\n")); return -1; @@ -826,7 +826,7 @@ int ctdb_ctrl_getvnnmap(struct ctdb_context *ctdb, struct timeval timeout, uint3 /* get the recovery mode of a remote node */ -int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, uint32_t destnode, uint32_t *recmode) +int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *recmode) { int ret; TDB_DATA data, outdata; @@ -835,7 +835,7 @@ int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, uint32_t destnode, uint32_t ZERO_STRUCT(data); ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_RECMODE, 0, data, - ctdb, &outdata, &res, NULL); + ctdb, &outdata, &res, &timeout); if (ret != 0 || res != 0) { DEBUG(0,(__location__ " ctdb_control for getrecmode failed\n")); return -1; @@ -849,7 +849,7 @@ int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, uint32_t destnode, uint32_t /* set the recovery mode of a remote node */ -int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, uint32_t destnode, uint32_t recmode) +int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmode) { int ret; TDB_DATA data, outdata; @@ -861,7 +861,7 @@ int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, uint32_t destnode, uint32_t ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SET_RECMODE, 0, data, - ctdb, &outdata, &res, NULL); + ctdb, &outdata, &res, &timeout); if (ret != 0 || res != 0) { DEBUG(0,(__location__ " ctdb_control for getrecmode failed\n")); return -1; @@ -873,7 +873,7 @@ int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, uint32_t destnode, uint32_t /* get a list of databases off a remote node */ -int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_dbid_map **dbmap) +int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_dbid_map **dbmap) { int ret; TDB_DATA data, outdata; @@ -882,16 +882,12 @@ int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_CTX ZERO_STRUCT(data); ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_DBMAP, 0, data, - ctdb, &outdata, &res, NULL); + mem_ctx, &outdata, &res, &timeout); if (ret != 0 || res != 0) { DEBUG(0,(__location__ " ctdb_control for getdbmap failed\n")); return -1; } - if (*dbmap) { - talloc_free(*dbmap); - *dbmap = NULL; - } *dbmap = (struct ctdb_dbid_map *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize); return 0; @@ -912,7 +908,7 @@ int ctdb_ctrl_getnodemap(struct ctdb_context *ctdb, ZERO_STRUCT(data); ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_NODEMAP, 0, data, - ctdb, &outdata, &res, &timeout); + mem_ctx, &outdata, &res, &timeout); if (ret != 0 || res != 0) { DEBUG(0,(__location__ " ctdb_control for getnodes failed\n")); return -1; @@ -926,7 +922,7 @@ int ctdb_ctrl_getnodemap(struct ctdb_context *ctdb, /* set vnn map on a node */ -int ctdb_ctrl_setvnnmap(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_vnn_map *vnnmap) +int ctdb_ctrl_setvnnmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_vnn_map *vnnmap) { int ret; TDB_DATA data, outdata; @@ -937,7 +933,7 @@ int ctdb_ctrl_setvnnmap(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_CTX ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SETVNNMAP, 0, data, - ctdb, &outdata, &res, NULL); + mem_ctx, &outdata, &res, &timeout); if (ret != 0 || res != 0) { DEBUG(0,(__location__ " ctdb_control for setvnnmap failed\n")); return -1; @@ -1017,7 +1013,7 @@ int ctdb_ctrl_pulldb(struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid /* copy a tdb from one node to another node */ -int ctdb_ctrl_copydb(struct ctdb_context *ctdb, uint32_t sourcenode, uint32_t destnode, uint32_t dbid, uint32_t lmaster, TALLOC_CTX *mem_ctx) +int ctdb_ctrl_copydb(struct ctdb_context *ctdb, struct timeval timeout, uint32_t sourcenode, uint32_t destnode, uint32_t dbid, uint32_t lmaster, TALLOC_CTX *mem_ctx) { int ret; TDB_DATA indata, outdata; @@ -1031,7 +1027,7 @@ int ctdb_ctrl_copydb(struct ctdb_context *ctdb, uint32_t sourcenode, uint32_t de ret = ctdb_control(ctdb, sourcenode, 0, CTDB_CONTROL_PULL_DB, 0, indata, - mem_ctx, &outdata, &res, NULL); + mem_ctx, &outdata, &res, &timeout); if (ret != 0 || res != 0) { DEBUG(0,(__location__ " ctdb_control for pulldb failed\n")); return -1; @@ -1039,7 +1035,7 @@ int ctdb_ctrl_copydb(struct ctdb_context *ctdb, uint32_t sourcenode, uint32_t de ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_PUSH_DB, 0, outdata, - mem_ctx, NULL, &res, NULL); + mem_ctx, NULL, &res, &timeout); if (ret != 0 || res != 0) { DEBUG(0,(__location__ " ctdb_control for pushdb failed\n")); return -1; @@ -1051,7 +1047,7 @@ int ctdb_ctrl_copydb(struct ctdb_context *ctdb, uint32_t sourcenode, uint32_t de /* change dmaster for all keys in the database to the new value */ -int ctdb_ctrl_setdmaster(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_CTX *mem_ctx, uint32_t dbid, uint32_t dmaster) +int ctdb_ctrl_setdmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, uint32_t dbid, uint32_t dmaster) { int ret; TDB_DATA indata, outdata; @@ -1065,7 +1061,7 @@ int ctdb_ctrl_setdmaster(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_CT ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SET_DMASTER, 0, indata, - mem_ctx, &outdata, &res, NULL); + mem_ctx, &outdata, &res, &timeout); if (ret != 0 || res != 0) { DEBUG(0,(__location__ " ctdb_control for setdmaster failed\n")); return -1; @@ -1182,7 +1178,7 @@ int ctdb_ctrl_get_config(struct ctdb_context *ctdb) /* find the real path to a ltdb */ -int ctdb_ctrl_getdbpath(struct ctdb_context *ctdb, uint32_t dbid, TALLOC_CTX *mem_ctx, +int ctdb_ctrl_getdbpath(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx, const char **path) { int ret; @@ -1192,9 +1188,9 @@ int ctdb_ctrl_getdbpath(struct ctdb_context *ctdb, uint32_t dbid, TALLOC_CTX *me data.dptr = (uint8_t *)&dbid; data.dsize = sizeof(dbid); - ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, + ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GETDBPATH, 0, data, - mem_ctx, &data, &res, NULL); + mem_ctx, &data, &res, &timeout); if (ret != 0 || res != 0) { return -1; } @@ -1209,6 +1205,59 @@ int ctdb_ctrl_getdbpath(struct ctdb_context *ctdb, uint32_t dbid, TALLOC_CTX *me return 0; } +/* + find the name of a db + */ +int ctdb_ctrl_getdbname(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx, + const char **name) +{ + int ret; + int32_t res; + TDB_DATA data; + + data.dptr = (uint8_t *)&dbid; + data.dsize = sizeof(dbid); + + ret = ctdb_control(ctdb, destnode, 0, + CTDB_CONTROL_GET_DBNAME, 0, data, + mem_ctx, &data, &res, &timeout); + if (ret != 0 || res != 0) { + return -1; + } + + (*name) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize); + if ((*name) == NULL) { + return -1; + } + + talloc_free(data.dptr); + + return 0; +} + +/* + create a database + */ +int ctdb_ctrl_createdb(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, const char *name) +{ + int ret; + int32_t res; + TDB_DATA data; + + data.dptr = discard_const(name); + data.dsize = strlen(name)+1; + + ret = ctdb_control(ctdb, destnode, 0, + CTDB_CONTROL_DB_ATTACH, 0, data, + mem_ctx, &data, &res, &timeout); + + if (ret != 0 || res != 0) { + return -1; + } + + return 0; +} + /* get debug level on a node */ @@ -1342,7 +1391,7 @@ struct ctdb_db_context *ctdb_attach(struct ctdb_context *ctdb, const char *name) ctdb_db->db_id = *(uint32_t *)data.dptr; - ret = ctdb_ctrl_getdbpath(ctdb, ctdb_db->db_id, ctdb_db, &ctdb_db->db_path); + ret = ctdb_ctrl_getdbpath(ctdb, timeval_current_ofs(1, 0), CTDB_CURRENT_NODE, ctdb_db->db_id, ctdb_db, &ctdb_db->db_path); if (ret != 0) { DEBUG(0,("Failed to get dbpath for database '%s'\n", name)); talloc_free(ctdb_db); diff --git a/ctdb/common/ctdb_control.c b/ctdb/common/ctdb_control.c index bd52d60453a..daf53ec04e6 100644 --- a/ctdb/common/ctdb_control.c +++ b/ctdb/common/ctdb_control.c @@ -356,6 +356,19 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb, CHECK_CONTROL_DATA_SIZE(0); return ctdb->num_clients; + case CTDB_CONTROL_GET_DBNAME: { + uint32_t db_id; + struct ctdb_db_context *ctdb_db; + + CHECK_CONTROL_DATA_SIZE(sizeof(db_id)); + db_id = *(uint32_t *)indata.dptr; + ctdb_db = find_ctdb_db(ctdb, db_id); + if (ctdb_db == NULL) return -1; + outdata->dptr = discard_const(ctdb_db->db_name); + outdata->dsize = strlen(ctdb_db->db_name)+1; + return 0; + } + case CTDB_CONTROL_GETDBPATH: { uint32_t db_id; struct ctdb_db_context *ctdb_db; diff --git a/ctdb/direct/recoverd.c b/ctdb/direct/recoverd.c index 9203faf70ff..810c999ffc4 100644 --- a/ctdb/direct/recoverd.c +++ b/ctdb/direct/recoverd.c @@ -39,15 +39,270 @@ static void usage(void) exit(1); } -void timeout_func(struct event_context *ev, struct timed_event *te, +static void timeout_func(struct event_context *ev, struct timed_event *te, struct timeval t, void *private_data) { timed_out = 1; } -void do_recovery(struct ctdb_context *ctdb, struct event_context *ev) +static int do_recovery(struct ctdb_context *ctdb, struct event_context *ev, + TALLOC_CTX *mem_ctx, uint32_t vnn, uint32_t num_active, + struct ctdb_node_map *nodemap, struct ctdb_vnn_map *vnnmap) { + int i, j, db, ret; + uint32_t generation; + struct ctdb_dbid_map *dbmap; + struct ctdb_dbid_map *remote_dbmap; + printf("we need to do recovery !!!\n"); + + /* pick a new generation number */ + generation = random(); + + + /* change the vnnmap on this node to use the new generation + number but not on any other nodes. + this guarantees that if we abort the recovery prematurely + for some reason (a node stops responding?) + that we can just return immediately and we will reenter + recovery shortly again. + I.e. we deliberately leave the cluster with an inconsistent + generation id to allow us to abort recovery at any stage and + just restart it from scratch. + */ + vnnmap->generation = generation; + ret = ctdb_ctrl_setvnnmap(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, vnnmap); + if (ret != 0) { + printf("Unable to set vnnmap for node %u\n", vnn); + return -1; + } + + + /* set recovery mode to active on all nodes */ + for (j=0; jnum; j++) { + /* dont change it for nodes that are unavailable */ + if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) { + continue; + } + + ret = ctdb_ctrl_setrecmode(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, CTDB_RECOVERY_ACTIVE); + if (ret != 0) { + printf("Unable to set recmode on node %u\n", nodemap->nodes[j].vnn); + return -1; + } + } + + + /* get a list of all databases */ + ret = ctdb_ctrl_getdbmap(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, &dbmap); + if (ret != 0) { + printf("Unable to get dbids from node %u\n", vnn); + return -1; + } + + + /* verify that we have all database any other node has */ + for (j=0; jnum; j++) { + /* we dont need to ourself ourselves */ + if (nodemap->nodes[j].vnn == vnn) { + continue; + } + /* dont check nodes that are unavailable */ + if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) { + continue; + } + + ret = ctdb_ctrl_getdbmap(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, mem_ctx, &remote_dbmap); + if (ret != 0) { + printf("Unable to get dbids from node %u\n", vnn); + return -1; + } + + /* step through all databases on the remote node */ + for (db=0; dbnum;db++) { + const char *name; + + for (i=0;inum;i++) { + if (remote_dbmap->dbids[db] == dbmap->dbids[i]) { + break; + } + } + /* we already have this db locally */ + if (i!=dbmap->num) { + continue; + } + /* ok so we need to create this database and + rebuild dbmap + */ + ctdb_ctrl_getdbname(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, remote_dbmap->dbids[db], mem_ctx, &name); + if (ret != 0) { + printf("Unable to get dbname from node %u\n", nodemap->nodes[j].vnn); + return -1; + } + ctdb_ctrl_createdb(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, name); + if (ret != 0) { + printf("Unable to create local db:%s\n", name); + return -1; + } + ret = ctdb_ctrl_getdbmap(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, &dbmap); + if (ret != 0) { + printf("Unable to reread dbmap on node %u\n", vnn); + return -1; + } + } + } + + + + /* verify that all other nodes have all our databases */ + for (j=0; jnum; j++) { + /* we dont need to ourself ourselves */ + if (nodemap->nodes[j].vnn == vnn) { + continue; + } + /* dont check nodes that are unavailable */ + if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) { + continue; + } + + ret = ctdb_ctrl_getdbmap(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, mem_ctx, &remote_dbmap); + if (ret != 0) { + printf("Unable to get dbids from node %u\n", vnn); + return -1; + } + + /* step through all local databases */ + for (db=0; dbnum;db++) { + const char *name; + + for (i=0;inum;i++) { + if (dbmap->dbids[db] == remote_dbmap->dbids[i]) { + break; + } + } + /* the remote node already have this database */ + if (i!=dbmap->num) { + continue; + } + /* ok so we need to create this database */ + ctdb_ctrl_getdbname(ctdb, timeval_current_ofs(1, 0), vnn, dbmap->dbids[db], mem_ctx, &name); + if (ret != 0) { + printf("Unable to get dbname from node %u\n", vnn); + return -1; + } + ctdb_ctrl_createdb(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, mem_ctx, name); + if (ret != 0) { + printf("Unable to create remote db:%s\n", name); + return -1; + } + } + } + + + /* pull all records from all other nodes across to this node + (this merges based on rsn) + */ + for (i=0;inum;i++) { + for (j=0; jnum; j++) { + /* we dont need to merge with ourselves */ + if (nodemap->nodes[j].vnn == vnn) { + continue; + } + /* dont merge from nodes that are unavailable */ + if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) { + continue; + } + ret = ctdb_ctrl_copydb(ctdb, timeval_current_ofs(2, 0), nodemap->nodes[j].vnn, vnn, dbmap->dbids[i], CTDB_LMASTER_ANY, mem_ctx); + if (ret != 0) { + printf("Unable to copy db from node %u to node %u\n", nodemap->nodes[j].vnn, vnn); + return -1; + } + } + } + + + /* update dmaster to point to this node for all databases/nodes */ + for (i=0;inum;i++) { + for (j=0; jnum; j++) { + /* dont repoint nodes that are unavailable */ + if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) { + continue; + } + ret = ctdb_ctrl_setdmaster(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, ctdb, dbmap->dbids[i], vnn); + if (ret != 0) { + printf("Unable to set dmaster for node %u db:0x%08x\n", nodemap->nodes[j].vnn, dbmap->dbids[i]); + return -1; + } + } + } + + + /* push all records out to the nodes again */ + for (i=0;inum;i++) { + for (j=0; jnum; j++) { + /* we dont need to push to ourselves */ + if (nodemap->nodes[j].vnn == vnn) { + continue; + } + /* dont push to nodes that are unavailable */ + if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) { + continue; + } + ret = ctdb_ctrl_copydb(ctdb, timeval_current_ofs(1, 0), vnn, nodemap->nodes[j].vnn, dbmap->dbids[i], CTDB_LMASTER_ANY, mem_ctx); + if (ret != 0) { + printf("Unable to copy db from node %u to node %u\n", vnn, nodemap->nodes[j].vnn); + return -1; + } + } + } + + + /* build a new vnn map */ + vnnmap = talloc_zero_size(mem_ctx, offsetof(struct ctdb_vnn_map, map) + 4*num_active); + if (vnnmap == NULL) { + DEBUG(0,(__location__ " Unable to allocate vnn_map structure\n")); + exit(1); + } + vnnmap->generation = generation; + vnnmap->size = num_active; + for (i=j=0;inum;i++) { + if (nodemap->nodes[i].flags&NODE_FLAGS_CONNECTED) { + vnnmap->map[j++]=nodemap->nodes[i].vnn; + } + } + + + /* push the new vnn map out to all the nodes */ + for (j=0; jnum; j++) { + /* dont push to nodes that are unavailable */ + if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) { + continue; + } + + ret = ctdb_ctrl_setvnnmap(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, mem_ctx, vnnmap); + if (ret != 0) { + printf("Unable to set vnnmap for node %u\n", vnn); + return -1; + } + } + + + /* disable recovery mode */ + for (j=0; jnum; j++) { + /* dont push to nodes that are unavailable */ + if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) { + continue; + } + + ret = ctdb_ctrl_setrecmode(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, CTDB_RECOVERY_NORMAL); + if (ret != 0) { + printf("Unable to set recmode on node %u\n", nodemap->nodes[j].vnn); + return -1; + } + } + + + return 0; } void recoverd(struct ctdb_context *ctdb, struct event_context *ev) @@ -122,7 +377,7 @@ again: */ if (remote_nodemap->num != nodemap->num) { printf("Remote node:%d has different node count. %d vs %d of the local node\n", nodemap->nodes[j].vnn, remote_nodemap->num, nodemap->num); - do_recovery(ctdb, ev); + do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap); goto again; } @@ -133,7 +388,7 @@ again: if ((remote_nodemap->nodes[i].vnn != nodemap->nodes[i].vnn) || (remote_nodemap->nodes[i].flags != nodemap->nodes[i].flags)) { printf("Remote node:%d has different nodemap.\n", nodemap->nodes[j].vnn); - do_recovery(ctdb, ev); + do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap); goto again; } } @@ -152,7 +407,7 @@ again: */ if (vnnmap->size != num_active) { printf("The vnnmap count is different from the number of active nodes. %d vs %d\n", vnnmap->size, num_active); - do_recovery(ctdb, ev); + do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap); goto again; } @@ -174,7 +429,7 @@ again: } if (i==vnnmap->size) { printf("Node %d is active in the nodemap but did not exist in the vnnmap\n", nodemap->nodes[j].vnn); - do_recovery(ctdb, ev); + do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap); goto again; } } @@ -200,14 +455,14 @@ again: /* verify the vnnmap generation is the same */ if (vnnmap->generation != remote_vnnmap->generation) { printf("Remote node %d has different generation of vnnmap. %d vs %d (ours)\n", nodemap->nodes[j].vnn, remote_vnnmap->generation, vnnmap->generation); - do_recovery(ctdb, ev); + do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap); goto again; } /* verify the vnnmap size is the same */ if (vnnmap->size != remote_vnnmap->size) { printf("Remote node %d has different size of vnnmap. %d vs %d (ours)\n", nodemap->nodes[j].vnn, remote_vnnmap->size, vnnmap->size); - do_recovery(ctdb, ev); + do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap); goto again; } @@ -215,7 +470,7 @@ again: for (i=0;isize;i++) { if (remote_vnnmap->map[i] != vnnmap->map[i]) { printf("Remote node %d has different vnnmap.\n", nodemap->nodes[j].vnn); - do_recovery(ctdb, ev); + do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap); goto again; } } diff --git a/ctdb/include/ctdb.h b/ctdb/include/ctdb.h index 93e75af6737..33faa2d56bf 100644 --- a/ctdb/include/ctdb.h +++ b/ctdb/include/ctdb.h @@ -204,7 +204,9 @@ struct ctdb_vnn_map; int ctdb_ctrl_getvnnmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_vnn_map **vnnmap); -int ctdb_ctrl_setvnnmap(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_vnn_map *vnnmap); +int ctdb_ctrl_setvnnmap(struct ctdb_context *ctdb, + struct timeval timeout, uint32_t destnode, + TALLOC_CTX *mem_ctx, struct ctdb_vnn_map *vnnmap); /* table that contains a list of all dbids on a node */ @@ -212,7 +214,9 @@ struct ctdb_dbid_map { uint32_t num; uint32_t dbids[1]; }; -int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_dbid_map **dbmap); +int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, + struct timeval timeout, uint32_t destnode, + TALLOC_CTX *mem_ctx, struct ctdb_dbid_map **dbmap); /* table that contains a list of all nodes a ctdb knows about and their @@ -239,9 +243,14 @@ struct ctdb_key_list { TDB_DATA *data; }; int ctdb_ctrl_pulldb(struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid, uint32_t lmaster, TALLOC_CTX *mem_ctx, struct ctdb_key_list *keys); -int ctdb_ctrl_copydb(struct ctdb_context *ctdb, uint32_t sourcenode, uint32_t destnode, uint32_t dbid, uint32_t lmaster, TALLOC_CTX *mem_ctx); +int ctdb_ctrl_copydb(struct ctdb_context *ctdb, + struct timeval timeout, uint32_t sourcenode, + uint32_t destnode, uint32_t dbid, uint32_t lmaster, + TALLOC_CTX *mem_ctx); -int ctdb_ctrl_getdbpath(struct ctdb_context *ctdb, uint32_t dbid, TALLOC_CTX *mem_ctx, const char **path); +int ctdb_ctrl_getdbpath(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx, const char **path); +int ctdb_ctrl_getdbname(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx, const char **name); +int ctdb_ctrl_createdb(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, const char *name); int ctdb_ctrl_process_exists(struct ctdb_context *ctdb, uint32_t destnode, pid_t pid); @@ -255,7 +264,9 @@ int ctdb_ctrl_set_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, uint3 /* change dmaster for all keys in the database to the new value */ -int ctdb_ctrl_setdmaster(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_CTX *mem_ctx, uint32_t dbid, uint32_t dmaster); +int ctdb_ctrl_setdmaster(struct ctdb_context *ctdb, + struct timeval timeout, uint32_t destnode, + TALLOC_CTX *mem_ctx, uint32_t dbid, uint32_t dmaster); /* delete all records from a tdb @@ -272,11 +283,11 @@ int ctdb_ctrl_write_record(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_ /* get the recovery mode of a remote node */ -int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, uint32_t destnode, uint32_t *recmode); +int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *recmode); /* set the recovery mode of a remote node */ -int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, uint32_t destnode, uint32_t recmode); +int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmode); uint32_t *ctdb_get_connected_nodes(struct ctdb_context *ctdb, struct timeval timeout, diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 518df2900b8..411496a03be 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -282,6 +282,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS, CTDB_CONTROL_TRAVERSE_DATA, CTDB_CONTROL_REGISTER_SRVID, CTDB_CONTROL_DEREGISTER_SRVID, + CTDB_CONTROL_GET_DBNAME, }; diff --git a/ctdb/tests/ctdb_test.c b/ctdb/tests/ctdb_test.c index 2e765368049..d00f619f57e 100644 --- a/ctdb/tests/ctdb_test.c +++ b/ctdb/tests/ctdb_test.c @@ -138,7 +138,7 @@ int main(int argc, const char *argv[]) ctdb_connect_wait(ctdb); /* find the full path to the database file */ - ctdb_ctrl_getdbpath(ctdb, ctdb_db->db_id, ctdb_db, &path); + ctdb_ctrl_getdbpath(ctdb, timeval_current_ofs(1, 0), CTDB_CURRENT_NODE, ctdb_db->db_id, ctdb_db, &path); printf("path to database:[%s]\n",path); ZERO_STRUCT(call); diff --git a/ctdb/tests/recover.sh b/ctdb/tests/recover.sh index 79fe70f2a4b..9ed48c1ed2c 100755 --- a/ctdb/tests/recover.sh +++ b/ctdb/tests/recover.sh @@ -67,6 +67,8 @@ bin/ctdb_control --socket=/tmp/ctdb.socket getdbmap 0 | egrep "^dbid:" | sed -e done done +exit + echo echo echo "killing off node #0" diff --git a/ctdb/tools/ctdb_control.c b/ctdb/tools/ctdb_control.c index 1961572a530..90b09f4fa4a 100644 --- a/ctdb/tools/ctdb_control.c +++ b/ctdb/tools/ctdb_control.c @@ -284,7 +284,7 @@ static int control_recover(struct ctdb_context *ctdb, int argc, const char **arg } printf("setting node %d to recovery mode\n",nodemap->nodes[j].vnn); - ret = ctdb_ctrl_setrecmode(ctdb, nodemap->nodes[j].vnn, CTDB_RECOVERY_ACTIVE); + ret = ctdb_ctrl_setrecmode(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, CTDB_RECOVERY_ACTIVE); if (ret != 0) { printf("Unable to set recmode on node %u\n", nodemap->nodes[j].vnn); return ret; @@ -293,7 +293,7 @@ static int control_recover(struct ctdb_context *ctdb, int argc, const char **arg /* 4: get a list of all databases */ printf("\n4: getting list of databases to recover\n"); - ret = ctdb_ctrl_getdbmap(ctdb, vnn, ctdb, &dbmap); + ret = ctdb_ctrl_getdbmap(ctdb, timeval_current_ofs(1, 0), vnn, ctdb, &dbmap); if (ret != 0) { printf("Unable to get dbids from node %u\n", vnn); return ret; @@ -301,7 +301,7 @@ static int control_recover(struct ctdb_context *ctdb, int argc, const char **arg for (i=0;inum;i++) { const char *path; - ctdb_ctrl_getdbpath(ctdb, dbmap->dbids[i], ctdb, &path); + ctdb_ctrl_getdbpath(ctdb, timeval_current_ofs(1, 0), CTDB_CURRENT_NODE, dbmap->dbids[i], ctdb, &path); printf("dbid:0x%08x path:%s\n", dbmap->dbids[i], path); } @@ -322,7 +322,7 @@ static int control_recover(struct ctdb_context *ctdb, int argc, const char **arg } printf("merging all records from node %d for database 0x%08x\n", nodemap->nodes[j].vnn, dbmap->dbids[i]); - ret = ctdb_ctrl_copydb(ctdb, nodemap->nodes[j].vnn, vnn, dbmap->dbids[i], CTDB_LMASTER_ANY, ctdb); + ret = ctdb_ctrl_copydb(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, vnn, dbmap->dbids[i], CTDB_LMASTER_ANY, ctdb); if (ret != 0) { printf("Unable to copy db from node %u to node %u\n", nodemap->nodes[j].vnn, vnn); return ret; @@ -342,7 +342,7 @@ static int control_recover(struct ctdb_context *ctdb, int argc, const char **arg } printf("setting dmaster to %d for node %d db 0x%08x\n",dmaster,nodemap->nodes[j].vnn,dbmap->dbids[i]); - ret = ctdb_ctrl_setdmaster(ctdb, nodemap->nodes[j].vnn, ctdb, dbmap->dbids[i], dmaster); + ret = ctdb_ctrl_setdmaster(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, ctdb, dbmap->dbids[i], dmaster); if (ret != 0) { printf("Unable to set dmaster for node %u db:0x%08x\n", nodemap->nodes[j].vnn, dbmap->dbids[i]); return ret; @@ -365,7 +365,7 @@ static int control_recover(struct ctdb_context *ctdb, int argc, const char **arg } printf("pushing all records to node %d for database 0x%08x\n", nodemap->nodes[j].vnn, dbmap->dbids[i]); - ret = ctdb_ctrl_copydb(ctdb, vnn, nodemap->nodes[j].vnn, dbmap->dbids[i], CTDB_LMASTER_ANY, ctdb); + ret = ctdb_ctrl_copydb(ctdb, timeval_current_ofs(1, 0), vnn, nodemap->nodes[j].vnn, dbmap->dbids[i], CTDB_LMASTER_ANY, ctdb); if (ret != 0) { printf("Unable to copy db from node %u to node %u\n", vnn, nodemap->nodes[j].vnn); return ret; @@ -404,7 +404,7 @@ static int control_recover(struct ctdb_context *ctdb, int argc, const char **arg } printf("setting new vnn map on node %d\n",nodemap->nodes[j].vnn); - ret = ctdb_ctrl_setvnnmap(ctdb, nodemap->nodes[j].vnn, ctdb, vnnmap); + ret = ctdb_ctrl_setvnnmap(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, ctdb, vnnmap); if (ret != 0) { printf("Unable to set vnnmap for node %u\n", vnn); return ret; @@ -420,7 +420,7 @@ static int control_recover(struct ctdb_context *ctdb, int argc, const char **arg } printf("changing recovery mode back to normal for node %d\n",nodemap->nodes[j].vnn); - ret = ctdb_ctrl_setrecmode(ctdb, nodemap->nodes[j].vnn, CTDB_RECOVERY_NORMAL); + ret = ctdb_ctrl_setrecmode(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, CTDB_RECOVERY_NORMAL); if (ret != 0) { printf("Unable to set recmode on node %u\n", nodemap->nodes[j].vnn); return ret; @@ -472,7 +472,7 @@ static int control_getrecmode(struct ctdb_context *ctdb, int argc, const char ** vnn = strtoul(argv[0], NULL, 0); - ret = ctdb_ctrl_getrecmode(ctdb, vnn, &recmode); + ret = ctdb_ctrl_getrecmode(ctdb, timeval_current_ofs(1, 0), vnn, &recmode); if (ret != 0) { printf("Unable to get recmode from node %u\n", vnn); return ret; @@ -498,7 +498,7 @@ static int control_setrecmode(struct ctdb_context *ctdb, int argc, const char ** vnn = strtoul(argv[0], NULL, 0); recmode = strtoul(argv[0], NULL, 0); - ret = ctdb_ctrl_setrecmode(ctdb, vnn, recmode); + ret = ctdb_ctrl_setrecmode(ctdb, timeval_current_ofs(1, 0), vnn, recmode); if (ret != 0) { printf("Unable to set recmode on node %u\n", vnn); return ret; @@ -566,7 +566,7 @@ static int control_cpdb(struct ctdb_context *ctdb, int argc, const char **argv) dbid = strtoul(argv[2], NULL, 0); mem_ctx = talloc_new(ctdb); - ret = ctdb_ctrl_copydb(ctdb, fromvnn, tovnn, dbid, CTDB_LMASTER_ANY, mem_ctx); + ret = ctdb_ctrl_copydb(ctdb, timeval_current_ofs(1, 0), fromvnn, tovnn, dbid, CTDB_LMASTER_ANY, mem_ctx); if (ret != 0) { printf("Unable to copy db from node %u to node %u\n", fromvnn, tovnn); return ret; @@ -591,7 +591,7 @@ static int control_getdbmap(struct ctdb_context *ctdb, int argc, const char **ar vnn = strtoul(argv[0], NULL, 0); - ret = ctdb_ctrl_getdbmap(ctdb, vnn, ctdb, &dbmap); + ret = ctdb_ctrl_getdbmap(ctdb, timeval_current_ofs(1, 0), vnn, ctdb, &dbmap); if (ret != 0) { printf("Unable to get dbids from node %u\n", vnn); return ret; @@ -601,7 +601,7 @@ static int control_getdbmap(struct ctdb_context *ctdb, int argc, const char **ar for(i=0;inum;i++){ const char *path; - ctdb_ctrl_getdbpath(ctdb, dbmap->dbids[i], ctdb, &path); + ctdb_ctrl_getdbpath(ctdb, timeval_current_ofs(1, 0), CTDB_CURRENT_NODE, dbmap->dbids[i], ctdb, &path); printf("dbid:0x%08x path:%s\n", dbmap->dbids[i], path); } @@ -667,7 +667,7 @@ static int control_setvnnmap(struct ctdb_context *ctdb, int argc, const char **a vnnmap->map[i] = strtoul(argv[3+i], NULL, 0); } - ret = ctdb_ctrl_setvnnmap(ctdb, vnn, ctdb, vnnmap); + ret = ctdb_ctrl_setvnnmap(ctdb, timeval_current_ofs(1, 0), vnn, ctdb, vnnmap); if (ret != 0) { printf("Unable to set vnnmap for node %u\n", vnn); return ret; @@ -720,7 +720,7 @@ static int control_setdmaster(struct ctdb_context *ctdb, int argc, const char ** dbid = strtoul(argv[1], NULL, 0); dmaster = strtoul(argv[2], NULL, 0); - ret = ctdb_ctrl_setdmaster(ctdb, vnn, ctdb, dbid, dmaster); + ret = ctdb_ctrl_setdmaster(ctdb, timeval_current_ofs(1, 0), vnn, ctdb, dbid, dmaster); if (ret != 0) { printf("Unable to set dmaster for node %u db:0x%08x\n", vnn, dbid); return ret;