1
0
mirror of https://github.com/samba-team/samba.git synced 2025-02-28 01:58:17 +03:00

recovery daemon

this program is a client to the local ctdb daemon

every second it pulls all vnnmap and nodemaps from all nodes that are 
available and checks if a recovery is required

a recovery is required if :
* all nodes do NOT have an identical vnnmap and generation
* all nodes do NOT have an identical nodemap
* there are active nodes that are NOT in the nodemap
* there are nodes in the nodemap that are NOT active

During recovery,  the recovery tool will also make sure that all nodes 
know about and have created all databases.

(This used to be ctdb commit 2f2650467bac7e8954de7c17cb34f46b0bdbcd26)
This commit is contained in:
Ronnie Sahlberg 2007-05-04 15:21:40 +10:00
parent b3c8d6ea7a
commit 7dfdab1b9d
8 changed files with 386 additions and 55 deletions

View File

@ -812,7 +812,7 @@ int ctdb_ctrl_getvnnmap(struct ctdb_context *ctdb, struct timeval timeout, uint3
ZERO_STRUCT(data);
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_GETVNNMAP, 0, data,
ctdb, &outdata, &res, &timeout);
mem_ctx, &outdata, &res, &timeout);
if (ret != 0 || res != 0) {
DEBUG(0,(__location__ " ctdb_control for getvnnmap failed\n"));
return -1;
@ -826,7 +826,7 @@ int ctdb_ctrl_getvnnmap(struct ctdb_context *ctdb, struct timeval timeout, uint3
/*
get the recovery mode of a remote node
*/
int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, uint32_t destnode, uint32_t *recmode)
int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *recmode)
{
int ret;
TDB_DATA data, outdata;
@ -835,7 +835,7 @@ int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, uint32_t destnode, uint32_t
ZERO_STRUCT(data);
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_GET_RECMODE, 0, data,
ctdb, &outdata, &res, NULL);
ctdb, &outdata, &res, &timeout);
if (ret != 0 || res != 0) {
DEBUG(0,(__location__ " ctdb_control for getrecmode failed\n"));
return -1;
@ -849,7 +849,7 @@ int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, uint32_t destnode, uint32_t
/*
set the recovery mode of a remote node
*/
int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, uint32_t destnode, uint32_t recmode)
int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmode)
{
int ret;
TDB_DATA data, outdata;
@ -861,7 +861,7 @@ int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, uint32_t destnode, uint32_t
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_SET_RECMODE, 0, data,
ctdb, &outdata, &res, NULL);
ctdb, &outdata, &res, &timeout);
if (ret != 0 || res != 0) {
DEBUG(0,(__location__ " ctdb_control for getrecmode failed\n"));
return -1;
@ -873,7 +873,7 @@ int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, uint32_t destnode, uint32_t
/*
get a list of databases off a remote node
*/
int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_dbid_map **dbmap)
int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_dbid_map **dbmap)
{
int ret;
TDB_DATA data, outdata;
@ -882,16 +882,12 @@ int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_CTX
ZERO_STRUCT(data);
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_GET_DBMAP, 0, data,
ctdb, &outdata, &res, NULL);
mem_ctx, &outdata, &res, &timeout);
if (ret != 0 || res != 0) {
DEBUG(0,(__location__ " ctdb_control for getdbmap failed\n"));
return -1;
}
if (*dbmap) {
talloc_free(*dbmap);
*dbmap = NULL;
}
*dbmap = (struct ctdb_dbid_map *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
return 0;
@ -912,7 +908,7 @@ int ctdb_ctrl_getnodemap(struct ctdb_context *ctdb,
ZERO_STRUCT(data);
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_GET_NODEMAP, 0, data,
ctdb, &outdata, &res, &timeout);
mem_ctx, &outdata, &res, &timeout);
if (ret != 0 || res != 0) {
DEBUG(0,(__location__ " ctdb_control for getnodes failed\n"));
return -1;
@ -926,7 +922,7 @@ int ctdb_ctrl_getnodemap(struct ctdb_context *ctdb,
/*
set vnn map on a node
*/
int ctdb_ctrl_setvnnmap(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_vnn_map *vnnmap)
int ctdb_ctrl_setvnnmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_vnn_map *vnnmap)
{
int ret;
TDB_DATA data, outdata;
@ -937,7 +933,7 @@ int ctdb_ctrl_setvnnmap(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_CTX
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_SETVNNMAP, 0, data,
ctdb, &outdata, &res, NULL);
mem_ctx, &outdata, &res, &timeout);
if (ret != 0 || res != 0) {
DEBUG(0,(__location__ " ctdb_control for setvnnmap failed\n"));
return -1;
@ -1017,7 +1013,7 @@ int ctdb_ctrl_pulldb(struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid
/*
copy a tdb from one node to another node
*/
int ctdb_ctrl_copydb(struct ctdb_context *ctdb, uint32_t sourcenode, uint32_t destnode, uint32_t dbid, uint32_t lmaster, TALLOC_CTX *mem_ctx)
int ctdb_ctrl_copydb(struct ctdb_context *ctdb, struct timeval timeout, uint32_t sourcenode, uint32_t destnode, uint32_t dbid, uint32_t lmaster, TALLOC_CTX *mem_ctx)
{
int ret;
TDB_DATA indata, outdata;
@ -1031,7 +1027,7 @@ int ctdb_ctrl_copydb(struct ctdb_context *ctdb, uint32_t sourcenode, uint32_t de
ret = ctdb_control(ctdb, sourcenode, 0,
CTDB_CONTROL_PULL_DB, 0, indata,
mem_ctx, &outdata, &res, NULL);
mem_ctx, &outdata, &res, &timeout);
if (ret != 0 || res != 0) {
DEBUG(0,(__location__ " ctdb_control for pulldb failed\n"));
return -1;
@ -1039,7 +1035,7 @@ int ctdb_ctrl_copydb(struct ctdb_context *ctdb, uint32_t sourcenode, uint32_t de
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_PUSH_DB, 0, outdata,
mem_ctx, NULL, &res, NULL);
mem_ctx, NULL, &res, &timeout);
if (ret != 0 || res != 0) {
DEBUG(0,(__location__ " ctdb_control for pushdb failed\n"));
return -1;
@ -1051,7 +1047,7 @@ int ctdb_ctrl_copydb(struct ctdb_context *ctdb, uint32_t sourcenode, uint32_t de
/*
change dmaster for all keys in the database to the new value
*/
int ctdb_ctrl_setdmaster(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_CTX *mem_ctx, uint32_t dbid, uint32_t dmaster)
int ctdb_ctrl_setdmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, uint32_t dbid, uint32_t dmaster)
{
int ret;
TDB_DATA indata, outdata;
@ -1065,7 +1061,7 @@ int ctdb_ctrl_setdmaster(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_CT
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_SET_DMASTER, 0, indata,
mem_ctx, &outdata, &res, NULL);
mem_ctx, &outdata, &res, &timeout);
if (ret != 0 || res != 0) {
DEBUG(0,(__location__ " ctdb_control for setdmaster failed\n"));
return -1;
@ -1182,7 +1178,7 @@ int ctdb_ctrl_get_config(struct ctdb_context *ctdb)
/*
find the real path to a ltdb
*/
int ctdb_ctrl_getdbpath(struct ctdb_context *ctdb, uint32_t dbid, TALLOC_CTX *mem_ctx,
int ctdb_ctrl_getdbpath(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx,
const char **path)
{
int ret;
@ -1192,9 +1188,9 @@ int ctdb_ctrl_getdbpath(struct ctdb_context *ctdb, uint32_t dbid, TALLOC_CTX *me
data.dptr = (uint8_t *)&dbid;
data.dsize = sizeof(dbid);
ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0,
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_GETDBPATH, 0, data,
mem_ctx, &data, &res, NULL);
mem_ctx, &data, &res, &timeout);
if (ret != 0 || res != 0) {
return -1;
}
@ -1209,6 +1205,59 @@ int ctdb_ctrl_getdbpath(struct ctdb_context *ctdb, uint32_t dbid, TALLOC_CTX *me
return 0;
}
/*
find the name of a db
*/
int ctdb_ctrl_getdbname(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx,
const char **name)
{
int ret;
int32_t res;
TDB_DATA data;
data.dptr = (uint8_t *)&dbid;
data.dsize = sizeof(dbid);
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_GET_DBNAME, 0, data,
mem_ctx, &data, &res, &timeout);
if (ret != 0 || res != 0) {
return -1;
}
(*name) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize);
if ((*name) == NULL) {
return -1;
}
talloc_free(data.dptr);
return 0;
}
/*
create a database
*/
int ctdb_ctrl_createdb(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, const char *name)
{
int ret;
int32_t res;
TDB_DATA data;
data.dptr = discard_const(name);
data.dsize = strlen(name)+1;
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_DB_ATTACH, 0, data,
mem_ctx, &data, &res, &timeout);
if (ret != 0 || res != 0) {
return -1;
}
return 0;
}
/*
get debug level on a node
*/
@ -1342,7 +1391,7 @@ struct ctdb_db_context *ctdb_attach(struct ctdb_context *ctdb, const char *name)
ctdb_db->db_id = *(uint32_t *)data.dptr;
ret = ctdb_ctrl_getdbpath(ctdb, ctdb_db->db_id, ctdb_db, &ctdb_db->db_path);
ret = ctdb_ctrl_getdbpath(ctdb, timeval_current_ofs(1, 0), CTDB_CURRENT_NODE, ctdb_db->db_id, ctdb_db, &ctdb_db->db_path);
if (ret != 0) {
DEBUG(0,("Failed to get dbpath for database '%s'\n", name));
talloc_free(ctdb_db);

View File

@ -356,6 +356,19 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
CHECK_CONTROL_DATA_SIZE(0);
return ctdb->num_clients;
case CTDB_CONTROL_GET_DBNAME: {
uint32_t db_id;
struct ctdb_db_context *ctdb_db;
CHECK_CONTROL_DATA_SIZE(sizeof(db_id));
db_id = *(uint32_t *)indata.dptr;
ctdb_db = find_ctdb_db(ctdb, db_id);
if (ctdb_db == NULL) return -1;
outdata->dptr = discard_const(ctdb_db->db_name);
outdata->dsize = strlen(ctdb_db->db_name)+1;
return 0;
}
case CTDB_CONTROL_GETDBPATH: {
uint32_t db_id;
struct ctdb_db_context *ctdb_db;

View File

@ -39,15 +39,270 @@ static void usage(void)
exit(1);
}
void timeout_func(struct event_context *ev, struct timed_event *te,
static void timeout_func(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data)
{
timed_out = 1;
}
void do_recovery(struct ctdb_context *ctdb, struct event_context *ev)
static int do_recovery(struct ctdb_context *ctdb, struct event_context *ev,
TALLOC_CTX *mem_ctx, uint32_t vnn, uint32_t num_active,
struct ctdb_node_map *nodemap, struct ctdb_vnn_map *vnnmap)
{
int i, j, db, ret;
uint32_t generation;
struct ctdb_dbid_map *dbmap;
struct ctdb_dbid_map *remote_dbmap;
printf("we need to do recovery !!!\n");
/* pick a new generation number */
generation = random();
/* change the vnnmap on this node to use the new generation
number but not on any other nodes.
this guarantees that if we abort the recovery prematurely
for some reason (a node stops responding?)
that we can just return immediately and we will reenter
recovery shortly again.
I.e. we deliberately leave the cluster with an inconsistent
generation id to allow us to abort recovery at any stage and
just restart it from scratch.
*/
vnnmap->generation = generation;
ret = ctdb_ctrl_setvnnmap(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, vnnmap);
if (ret != 0) {
printf("Unable to set vnnmap for node %u\n", vnn);
return -1;
}
/* set recovery mode to active on all nodes */
for (j=0; j<nodemap->num; j++) {
/* dont change it for nodes that are unavailable */
if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
continue;
}
ret = ctdb_ctrl_setrecmode(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, CTDB_RECOVERY_ACTIVE);
if (ret != 0) {
printf("Unable to set recmode on node %u\n", nodemap->nodes[j].vnn);
return -1;
}
}
/* get a list of all databases */
ret = ctdb_ctrl_getdbmap(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, &dbmap);
if (ret != 0) {
printf("Unable to get dbids from node %u\n", vnn);
return -1;
}
/* verify that we have all database any other node has */
for (j=0; j<nodemap->num; j++) {
/* we dont need to ourself ourselves */
if (nodemap->nodes[j].vnn == vnn) {
continue;
}
/* dont check nodes that are unavailable */
if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
continue;
}
ret = ctdb_ctrl_getdbmap(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, mem_ctx, &remote_dbmap);
if (ret != 0) {
printf("Unable to get dbids from node %u\n", vnn);
return -1;
}
/* step through all databases on the remote node */
for (db=0; db<remote_dbmap->num;db++) {
const char *name;
for (i=0;i<dbmap->num;i++) {
if (remote_dbmap->dbids[db] == dbmap->dbids[i]) {
break;
}
}
/* we already have this db locally */
if (i!=dbmap->num) {
continue;
}
/* ok so we need to create this database and
rebuild dbmap
*/
ctdb_ctrl_getdbname(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, remote_dbmap->dbids[db], mem_ctx, &name);
if (ret != 0) {
printf("Unable to get dbname from node %u\n", nodemap->nodes[j].vnn);
return -1;
}
ctdb_ctrl_createdb(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, name);
if (ret != 0) {
printf("Unable to create local db:%s\n", name);
return -1;
}
ret = ctdb_ctrl_getdbmap(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, &dbmap);
if (ret != 0) {
printf("Unable to reread dbmap on node %u\n", vnn);
return -1;
}
}
}
/* verify that all other nodes have all our databases */
for (j=0; j<nodemap->num; j++) {
/* we dont need to ourself ourselves */
if (nodemap->nodes[j].vnn == vnn) {
continue;
}
/* dont check nodes that are unavailable */
if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
continue;
}
ret = ctdb_ctrl_getdbmap(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, mem_ctx, &remote_dbmap);
if (ret != 0) {
printf("Unable to get dbids from node %u\n", vnn);
return -1;
}
/* step through all local databases */
for (db=0; db<dbmap->num;db++) {
const char *name;
for (i=0;i<remote_dbmap->num;i++) {
if (dbmap->dbids[db] == remote_dbmap->dbids[i]) {
break;
}
}
/* the remote node already have this database */
if (i!=dbmap->num) {
continue;
}
/* ok so we need to create this database */
ctdb_ctrl_getdbname(ctdb, timeval_current_ofs(1, 0), vnn, dbmap->dbids[db], mem_ctx, &name);
if (ret != 0) {
printf("Unable to get dbname from node %u\n", vnn);
return -1;
}
ctdb_ctrl_createdb(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, mem_ctx, name);
if (ret != 0) {
printf("Unable to create remote db:%s\n", name);
return -1;
}
}
}
/* pull all records from all other nodes across to this node
(this merges based on rsn)
*/
for (i=0;i<dbmap->num;i++) {
for (j=0; j<nodemap->num; j++) {
/* we dont need to merge with ourselves */
if (nodemap->nodes[j].vnn == vnn) {
continue;
}
/* dont merge from nodes that are unavailable */
if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
continue;
}
ret = ctdb_ctrl_copydb(ctdb, timeval_current_ofs(2, 0), nodemap->nodes[j].vnn, vnn, dbmap->dbids[i], CTDB_LMASTER_ANY, mem_ctx);
if (ret != 0) {
printf("Unable to copy db from node %u to node %u\n", nodemap->nodes[j].vnn, vnn);
return -1;
}
}
}
/* update dmaster to point to this node for all databases/nodes */
for (i=0;i<dbmap->num;i++) {
for (j=0; j<nodemap->num; j++) {
/* dont repoint nodes that are unavailable */
if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
continue;
}
ret = ctdb_ctrl_setdmaster(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, ctdb, dbmap->dbids[i], vnn);
if (ret != 0) {
printf("Unable to set dmaster for node %u db:0x%08x\n", nodemap->nodes[j].vnn, dbmap->dbids[i]);
return -1;
}
}
}
/* push all records out to the nodes again */
for (i=0;i<dbmap->num;i++) {
for (j=0; j<nodemap->num; j++) {
/* we dont need to push to ourselves */
if (nodemap->nodes[j].vnn == vnn) {
continue;
}
/* dont push to nodes that are unavailable */
if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
continue;
}
ret = ctdb_ctrl_copydb(ctdb, timeval_current_ofs(1, 0), vnn, nodemap->nodes[j].vnn, dbmap->dbids[i], CTDB_LMASTER_ANY, mem_ctx);
if (ret != 0) {
printf("Unable to copy db from node %u to node %u\n", vnn, nodemap->nodes[j].vnn);
return -1;
}
}
}
/* build a new vnn map */
vnnmap = talloc_zero_size(mem_ctx, offsetof(struct ctdb_vnn_map, map) + 4*num_active);
if (vnnmap == NULL) {
DEBUG(0,(__location__ " Unable to allocate vnn_map structure\n"));
exit(1);
}
vnnmap->generation = generation;
vnnmap->size = num_active;
for (i=j=0;i<nodemap->num;i++) {
if (nodemap->nodes[i].flags&NODE_FLAGS_CONNECTED) {
vnnmap->map[j++]=nodemap->nodes[i].vnn;
}
}
/* push the new vnn map out to all the nodes */
for (j=0; j<nodemap->num; j++) {
/* dont push to nodes that are unavailable */
if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
continue;
}
ret = ctdb_ctrl_setvnnmap(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, mem_ctx, vnnmap);
if (ret != 0) {
printf("Unable to set vnnmap for node %u\n", vnn);
return -1;
}
}
/* disable recovery mode */
for (j=0; j<nodemap->num; j++) {
/* dont push to nodes that are unavailable */
if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
continue;
}
ret = ctdb_ctrl_setrecmode(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, CTDB_RECOVERY_NORMAL);
if (ret != 0) {
printf("Unable to set recmode on node %u\n", nodemap->nodes[j].vnn);
return -1;
}
}
return 0;
}
void recoverd(struct ctdb_context *ctdb, struct event_context *ev)
@ -122,7 +377,7 @@ again:
*/
if (remote_nodemap->num != nodemap->num) {
printf("Remote node:%d has different node count. %d vs %d of the local node\n", nodemap->nodes[j].vnn, remote_nodemap->num, nodemap->num);
do_recovery(ctdb, ev);
do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap);
goto again;
}
@ -133,7 +388,7 @@ again:
if ((remote_nodemap->nodes[i].vnn != nodemap->nodes[i].vnn)
|| (remote_nodemap->nodes[i].flags != nodemap->nodes[i].flags)) {
printf("Remote node:%d has different nodemap.\n", nodemap->nodes[j].vnn);
do_recovery(ctdb, ev);
do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap);
goto again;
}
}
@ -152,7 +407,7 @@ again:
*/
if (vnnmap->size != num_active) {
printf("The vnnmap count is different from the number of active nodes. %d vs %d\n", vnnmap->size, num_active);
do_recovery(ctdb, ev);
do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap);
goto again;
}
@ -174,7 +429,7 @@ again:
}
if (i==vnnmap->size) {
printf("Node %d is active in the nodemap but did not exist in the vnnmap\n", nodemap->nodes[j].vnn);
do_recovery(ctdb, ev);
do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap);
goto again;
}
}
@ -200,14 +455,14 @@ again:
/* verify the vnnmap generation is the same */
if (vnnmap->generation != remote_vnnmap->generation) {
printf("Remote node %d has different generation of vnnmap. %d vs %d (ours)\n", nodemap->nodes[j].vnn, remote_vnnmap->generation, vnnmap->generation);
do_recovery(ctdb, ev);
do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap);
goto again;
}
/* verify the vnnmap size is the same */
if (vnnmap->size != remote_vnnmap->size) {
printf("Remote node %d has different size of vnnmap. %d vs %d (ours)\n", nodemap->nodes[j].vnn, remote_vnnmap->size, vnnmap->size);
do_recovery(ctdb, ev);
do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap);
goto again;
}
@ -215,7 +470,7 @@ again:
for (i=0;i<vnnmap->size;i++) {
if (remote_vnnmap->map[i] != vnnmap->map[i]) {
printf("Remote node %d has different vnnmap.\n", nodemap->nodes[j].vnn);
do_recovery(ctdb, ev);
do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap);
goto again;
}
}

View File

@ -204,7 +204,9 @@ struct ctdb_vnn_map;
int ctdb_ctrl_getvnnmap(struct ctdb_context *ctdb,
struct timeval timeout, uint32_t destnode,
TALLOC_CTX *mem_ctx, struct ctdb_vnn_map **vnnmap);
int ctdb_ctrl_setvnnmap(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_vnn_map *vnnmap);
int ctdb_ctrl_setvnnmap(struct ctdb_context *ctdb,
struct timeval timeout, uint32_t destnode,
TALLOC_CTX *mem_ctx, struct ctdb_vnn_map *vnnmap);
/* table that contains a list of all dbids on a node
*/
@ -212,7 +214,9 @@ struct ctdb_dbid_map {
uint32_t num;
uint32_t dbids[1];
};
int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_dbid_map **dbmap);
int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb,
struct timeval timeout, uint32_t destnode,
TALLOC_CTX *mem_ctx, struct ctdb_dbid_map **dbmap);
/* table that contains a list of all nodes a ctdb knows about and their
@ -239,9 +243,14 @@ struct ctdb_key_list {
TDB_DATA *data;
};
int ctdb_ctrl_pulldb(struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid, uint32_t lmaster, TALLOC_CTX *mem_ctx, struct ctdb_key_list *keys);
int ctdb_ctrl_copydb(struct ctdb_context *ctdb, uint32_t sourcenode, uint32_t destnode, uint32_t dbid, uint32_t lmaster, TALLOC_CTX *mem_ctx);
int ctdb_ctrl_copydb(struct ctdb_context *ctdb,
struct timeval timeout, uint32_t sourcenode,
uint32_t destnode, uint32_t dbid, uint32_t lmaster,
TALLOC_CTX *mem_ctx);
int ctdb_ctrl_getdbpath(struct ctdb_context *ctdb, uint32_t dbid, TALLOC_CTX *mem_ctx, const char **path);
int ctdb_ctrl_getdbpath(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx, const char **path);
int ctdb_ctrl_getdbname(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx, const char **name);
int ctdb_ctrl_createdb(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, const char *name);
int ctdb_ctrl_process_exists(struct ctdb_context *ctdb, uint32_t destnode, pid_t pid);
@ -255,7 +264,9 @@ int ctdb_ctrl_set_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, uint3
/*
change dmaster for all keys in the database to the new value
*/
int ctdb_ctrl_setdmaster(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_CTX *mem_ctx, uint32_t dbid, uint32_t dmaster);
int ctdb_ctrl_setdmaster(struct ctdb_context *ctdb,
struct timeval timeout, uint32_t destnode,
TALLOC_CTX *mem_ctx, uint32_t dbid, uint32_t dmaster);
/*
delete all records from a tdb
@ -272,11 +283,11 @@ int ctdb_ctrl_write_record(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_
/*
get the recovery mode of a remote node
*/
int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, uint32_t destnode, uint32_t *recmode);
int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *recmode);
/*
set the recovery mode of a remote node
*/
int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, uint32_t destnode, uint32_t recmode);
int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmode);
uint32_t *ctdb_get_connected_nodes(struct ctdb_context *ctdb,
struct timeval timeout,

View File

@ -282,6 +282,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS,
CTDB_CONTROL_TRAVERSE_DATA,
CTDB_CONTROL_REGISTER_SRVID,
CTDB_CONTROL_DEREGISTER_SRVID,
CTDB_CONTROL_GET_DBNAME,
};

View File

@ -138,7 +138,7 @@ int main(int argc, const char *argv[])
ctdb_connect_wait(ctdb);
/* find the full path to the database file */
ctdb_ctrl_getdbpath(ctdb, ctdb_db->db_id, ctdb_db, &path);
ctdb_ctrl_getdbpath(ctdb, timeval_current_ofs(1, 0), CTDB_CURRENT_NODE, ctdb_db->db_id, ctdb_db, &path);
printf("path to database:[%s]\n",path);
ZERO_STRUCT(call);

View File

@ -67,6 +67,8 @@ bin/ctdb_control --socket=/tmp/ctdb.socket getdbmap 0 | egrep "^dbid:" | sed -e
done
done
exit
echo
echo
echo "killing off node #0"

View File

@ -284,7 +284,7 @@ static int control_recover(struct ctdb_context *ctdb, int argc, const char **arg
}
printf("setting node %d to recovery mode\n",nodemap->nodes[j].vnn);
ret = ctdb_ctrl_setrecmode(ctdb, nodemap->nodes[j].vnn, CTDB_RECOVERY_ACTIVE);
ret = ctdb_ctrl_setrecmode(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, CTDB_RECOVERY_ACTIVE);
if (ret != 0) {
printf("Unable to set recmode on node %u\n", nodemap->nodes[j].vnn);
return ret;
@ -293,7 +293,7 @@ static int control_recover(struct ctdb_context *ctdb, int argc, const char **arg
/* 4: get a list of all databases */
printf("\n4: getting list of databases to recover\n");
ret = ctdb_ctrl_getdbmap(ctdb, vnn, ctdb, &dbmap);
ret = ctdb_ctrl_getdbmap(ctdb, timeval_current_ofs(1, 0), vnn, ctdb, &dbmap);
if (ret != 0) {
printf("Unable to get dbids from node %u\n", vnn);
return ret;
@ -301,7 +301,7 @@ static int control_recover(struct ctdb_context *ctdb, int argc, const char **arg
for (i=0;i<dbmap->num;i++) {
const char *path;
ctdb_ctrl_getdbpath(ctdb, dbmap->dbids[i], ctdb, &path);
ctdb_ctrl_getdbpath(ctdb, timeval_current_ofs(1, 0), CTDB_CURRENT_NODE, dbmap->dbids[i], ctdb, &path);
printf("dbid:0x%08x path:%s\n", dbmap->dbids[i], path);
}
@ -322,7 +322,7 @@ static int control_recover(struct ctdb_context *ctdb, int argc, const char **arg
}
printf("merging all records from node %d for database 0x%08x\n", nodemap->nodes[j].vnn, dbmap->dbids[i]);
ret = ctdb_ctrl_copydb(ctdb, nodemap->nodes[j].vnn, vnn, dbmap->dbids[i], CTDB_LMASTER_ANY, ctdb);
ret = ctdb_ctrl_copydb(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, vnn, dbmap->dbids[i], CTDB_LMASTER_ANY, ctdb);
if (ret != 0) {
printf("Unable to copy db from node %u to node %u\n", nodemap->nodes[j].vnn, vnn);
return ret;
@ -342,7 +342,7 @@ static int control_recover(struct ctdb_context *ctdb, int argc, const char **arg
}
printf("setting dmaster to %d for node %d db 0x%08x\n",dmaster,nodemap->nodes[j].vnn,dbmap->dbids[i]);
ret = ctdb_ctrl_setdmaster(ctdb, nodemap->nodes[j].vnn, ctdb, dbmap->dbids[i], dmaster);
ret = ctdb_ctrl_setdmaster(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, ctdb, dbmap->dbids[i], dmaster);
if (ret != 0) {
printf("Unable to set dmaster for node %u db:0x%08x\n", nodemap->nodes[j].vnn, dbmap->dbids[i]);
return ret;
@ -365,7 +365,7 @@ static int control_recover(struct ctdb_context *ctdb, int argc, const char **arg
}
printf("pushing all records to node %d for database 0x%08x\n", nodemap->nodes[j].vnn, dbmap->dbids[i]);
ret = ctdb_ctrl_copydb(ctdb, vnn, nodemap->nodes[j].vnn, dbmap->dbids[i], CTDB_LMASTER_ANY, ctdb);
ret = ctdb_ctrl_copydb(ctdb, timeval_current_ofs(1, 0), vnn, nodemap->nodes[j].vnn, dbmap->dbids[i], CTDB_LMASTER_ANY, ctdb);
if (ret != 0) {
printf("Unable to copy db from node %u to node %u\n", vnn, nodemap->nodes[j].vnn);
return ret;
@ -404,7 +404,7 @@ static int control_recover(struct ctdb_context *ctdb, int argc, const char **arg
}
printf("setting new vnn map on node %d\n",nodemap->nodes[j].vnn);
ret = ctdb_ctrl_setvnnmap(ctdb, nodemap->nodes[j].vnn, ctdb, vnnmap);
ret = ctdb_ctrl_setvnnmap(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, ctdb, vnnmap);
if (ret != 0) {
printf("Unable to set vnnmap for node %u\n", vnn);
return ret;
@ -420,7 +420,7 @@ static int control_recover(struct ctdb_context *ctdb, int argc, const char **arg
}
printf("changing recovery mode back to normal for node %d\n",nodemap->nodes[j].vnn);
ret = ctdb_ctrl_setrecmode(ctdb, nodemap->nodes[j].vnn, CTDB_RECOVERY_NORMAL);
ret = ctdb_ctrl_setrecmode(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, CTDB_RECOVERY_NORMAL);
if (ret != 0) {
printf("Unable to set recmode on node %u\n", nodemap->nodes[j].vnn);
return ret;
@ -472,7 +472,7 @@ static int control_getrecmode(struct ctdb_context *ctdb, int argc, const char **
vnn = strtoul(argv[0], NULL, 0);
ret = ctdb_ctrl_getrecmode(ctdb, vnn, &recmode);
ret = ctdb_ctrl_getrecmode(ctdb, timeval_current_ofs(1, 0), vnn, &recmode);
if (ret != 0) {
printf("Unable to get recmode from node %u\n", vnn);
return ret;
@ -498,7 +498,7 @@ static int control_setrecmode(struct ctdb_context *ctdb, int argc, const char **
vnn = strtoul(argv[0], NULL, 0);
recmode = strtoul(argv[0], NULL, 0);
ret = ctdb_ctrl_setrecmode(ctdb, vnn, recmode);
ret = ctdb_ctrl_setrecmode(ctdb, timeval_current_ofs(1, 0), vnn, recmode);
if (ret != 0) {
printf("Unable to set recmode on node %u\n", vnn);
return ret;
@ -566,7 +566,7 @@ static int control_cpdb(struct ctdb_context *ctdb, int argc, const char **argv)
dbid = strtoul(argv[2], NULL, 0);
mem_ctx = talloc_new(ctdb);
ret = ctdb_ctrl_copydb(ctdb, fromvnn, tovnn, dbid, CTDB_LMASTER_ANY, mem_ctx);
ret = ctdb_ctrl_copydb(ctdb, timeval_current_ofs(1, 0), fromvnn, tovnn, dbid, CTDB_LMASTER_ANY, mem_ctx);
if (ret != 0) {
printf("Unable to copy db from node %u to node %u\n", fromvnn, tovnn);
return ret;
@ -591,7 +591,7 @@ static int control_getdbmap(struct ctdb_context *ctdb, int argc, const char **ar
vnn = strtoul(argv[0], NULL, 0);
ret = ctdb_ctrl_getdbmap(ctdb, vnn, ctdb, &dbmap);
ret = ctdb_ctrl_getdbmap(ctdb, timeval_current_ofs(1, 0), vnn, ctdb, &dbmap);
if (ret != 0) {
printf("Unable to get dbids from node %u\n", vnn);
return ret;
@ -601,7 +601,7 @@ static int control_getdbmap(struct ctdb_context *ctdb, int argc, const char **ar
for(i=0;i<dbmap->num;i++){
const char *path;
ctdb_ctrl_getdbpath(ctdb, dbmap->dbids[i], ctdb, &path);
ctdb_ctrl_getdbpath(ctdb, timeval_current_ofs(1, 0), CTDB_CURRENT_NODE, dbmap->dbids[i], ctdb, &path);
printf("dbid:0x%08x path:%s\n", dbmap->dbids[i], path);
}
@ -667,7 +667,7 @@ static int control_setvnnmap(struct ctdb_context *ctdb, int argc, const char **a
vnnmap->map[i] = strtoul(argv[3+i], NULL, 0);
}
ret = ctdb_ctrl_setvnnmap(ctdb, vnn, ctdb, vnnmap);
ret = ctdb_ctrl_setvnnmap(ctdb, timeval_current_ofs(1, 0), vnn, ctdb, vnnmap);
if (ret != 0) {
printf("Unable to set vnnmap for node %u\n", vnn);
return ret;
@ -720,7 +720,7 @@ static int control_setdmaster(struct ctdb_context *ctdb, int argc, const char **
dbid = strtoul(argv[1], NULL, 0);
dmaster = strtoul(argv[2], NULL, 0);
ret = ctdb_ctrl_setdmaster(ctdb, vnn, ctdb, dbid, dmaster);
ret = ctdb_ctrl_setdmaster(ctdb, timeval_current_ofs(1, 0), vnn, ctdb, dbid, dmaster);
if (ret != 0) {
printf("Unable to set dmaster for node %u db:0x%08x\n", vnn, dbid);
return ret;