mirror of
https://github.com/samba-team/samba.git
synced 2025-03-27 22:50:26 +03:00
merged from ronnie
(This used to be ctdb commit 49aad9fb09ca2c787e6f82ba03cb229cc51844f0)
This commit is contained in:
commit
d98d8d4de3
@ -541,11 +541,12 @@ struct ctdb_context *ctdb_init(struct event_context *ev)
|
||||
struct ctdb_context *ctdb;
|
||||
|
||||
ctdb = talloc_zero(ev, struct ctdb_context);
|
||||
ctdb->ev = ev;
|
||||
ctdb->recovery_mode = CTDB_RECOVERY_NORMAL;
|
||||
ctdb->upcalls = &ctdb_upcalls;
|
||||
ctdb->idr = idr_init(ctdb);
|
||||
ctdb->max_lacount = CTDB_DEFAULT_MAX_LACOUNT;
|
||||
ctdb->ev = ev;
|
||||
ctdb->recovery_mode = CTDB_RECOVERY_NORMAL;
|
||||
ctdb->recovery_master = 0;
|
||||
ctdb->upcalls = &ctdb_upcalls;
|
||||
ctdb->idr = idr_init(ctdb);
|
||||
ctdb->max_lacount = CTDB_DEFAULT_MAX_LACOUNT;
|
||||
ctdb->seqnum_frequency = CTDB_DEFAULT_SEQNUM_FREQUENCY;
|
||||
|
||||
return ctdb;
|
||||
|
@ -722,7 +722,7 @@ int ctdb_control(struct ctdb_context *ctdb, uint32_t destnode, uint64_t srvid,
|
||||
/* semi-async operation */
|
||||
timed_out = 0;
|
||||
if (timeout) {
|
||||
event_add_timed(ctdb->ev, mem_ctx, *timeout, timeout_func, &timed_out);
|
||||
event_add_timed(ctdb->ev, state, *timeout, timeout_func, &timed_out);
|
||||
}
|
||||
while ((state->state == CTDB_CALL_WAIT)
|
||||
&& (timed_out == 0) ){
|
||||
@ -732,7 +732,7 @@ int ctdb_control(struct ctdb_context *ctdb, uint32_t destnode, uint64_t srvid,
|
||||
talloc_free(state);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
if (outdata) {
|
||||
*outdata = state->outdata;
|
||||
outdata->dptr = talloc_memdup(mem_ctx, outdata->dptr, outdata->dsize);
|
||||
@ -836,12 +836,12 @@ int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint
|
||||
ret = ctdb_control(ctdb, destnode, 0,
|
||||
CTDB_CONTROL_GET_RECMODE, 0, data,
|
||||
ctdb, &outdata, &res, &timeout);
|
||||
if (ret != 0 || res != 0) {
|
||||
if (ret != 0) {
|
||||
DEBUG(0,(__location__ " ctdb_control for getrecmode failed\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
*recmode = ((uint32_t *)outdata.dptr)[0];
|
||||
*recmode = res;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -870,6 +870,54 @@ int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
get the recovery master of a remote node
|
||||
*/
|
||||
int ctdb_ctrl_getrecmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *recmaster)
|
||||
{
|
||||
int ret;
|
||||
TDB_DATA data, outdata;
|
||||
int32_t res;
|
||||
|
||||
ZERO_STRUCT(data);
|
||||
ret = ctdb_control(ctdb, destnode, 0,
|
||||
CTDB_CONTROL_GET_RECMASTER, 0, data,
|
||||
ctdb, &outdata, &res, &timeout);
|
||||
if (ret != 0) {
|
||||
DEBUG(0,(__location__ " ctdb_control for getrecmaster failed\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
*recmaster = res;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
set the recovery master of a remote node
|
||||
*/
|
||||
int ctdb_ctrl_setrecmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmaster)
|
||||
{
|
||||
int ret;
|
||||
TDB_DATA data, outdata;
|
||||
int32_t res;
|
||||
|
||||
ZERO_STRUCT(data);
|
||||
data.dsize = sizeof(uint32_t);
|
||||
data.dptr = (unsigned char *)&recmaster;
|
||||
|
||||
ret = ctdb_control(ctdb, destnode, 0,
|
||||
CTDB_CONTROL_SET_RECMASTER, 0, data,
|
||||
ctdb, &outdata, &res, &timeout);
|
||||
if (ret != 0 || res != 0) {
|
||||
DEBUG(0,(__location__ " ctdb_control for getrecmode failed\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
get a list of databases off a remote node
|
||||
*/
|
||||
@ -1569,3 +1617,26 @@ int ctdb_dump_db(struct ctdb_db_context *ctdb_db, FILE *f)
|
||||
return ctdb_traverse(ctdb_db, dumpdb_fn, f);
|
||||
}
|
||||
|
||||
/*
|
||||
get the pid of a ctdb daemon
|
||||
*/
|
||||
int ctdb_ctrl_getpid(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *pid)
|
||||
{
|
||||
int ret;
|
||||
TDB_DATA data, outdata;
|
||||
int32_t res;
|
||||
|
||||
ZERO_STRUCT(data);
|
||||
ret = ctdb_control(ctdb, destnode, 0,
|
||||
CTDB_CONTROL_GET_PID, 0, data,
|
||||
ctdb, &outdata, &res, &timeout);
|
||||
if (ret != 0) {
|
||||
DEBUG(0,(__location__ " ctdb_control for getpid failed\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
*pid = res;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -341,13 +341,23 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
|
||||
}
|
||||
|
||||
case CTDB_CONTROL_GET_RECMODE: {
|
||||
outdata->dsize = sizeof(uint32_t);
|
||||
outdata->dptr = (unsigned char *)talloc_array(outdata, uint32_t, 1);
|
||||
*((uint32_t *)(&outdata->dptr[0])) = ctdb->recovery_mode;
|
||||
return ctdb->recovery_mode;
|
||||
}
|
||||
|
||||
case CTDB_CONTROL_SET_RECMASTER: {
|
||||
ctdb->recovery_master = ((uint32_t *)(&indata.dptr[0]))[0];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
case CTDB_CONTROL_GET_RECMASTER: {
|
||||
return ctdb->recovery_master;
|
||||
}
|
||||
|
||||
case CTDB_CONTROL_GET_PID: {
|
||||
return getpid();
|
||||
}
|
||||
|
||||
case CTDB_CONTROL_CONFIG: {
|
||||
CHECK_CONTROL_DATA_SIZE(0);
|
||||
ctdb->status.controls.get_config++;
|
||||
|
@ -45,38 +45,9 @@ static void timeout_func(struct event_context *ev, struct timed_event *te,
|
||||
timed_out = 1;
|
||||
}
|
||||
|
||||
static int do_recovery(struct ctdb_context *ctdb, struct event_context *ev,
|
||||
TALLOC_CTX *mem_ctx, uint32_t vnn, uint32_t num_active,
|
||||
struct ctdb_node_map *nodemap, struct ctdb_vnn_map *vnnmap)
|
||||
static int set_recovery_mode(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t rec_mode)
|
||||
{
|
||||
int i, j, db, ret;
|
||||
uint32_t generation;
|
||||
struct ctdb_dbid_map *dbmap;
|
||||
struct ctdb_dbid_map *remote_dbmap;
|
||||
|
||||
printf("we need to do recovery !!!\n");
|
||||
|
||||
/* pick a new generation number */
|
||||
generation = random();
|
||||
|
||||
|
||||
/* change the vnnmap on this node to use the new generation
|
||||
number but not on any other nodes.
|
||||
this guarantees that if we abort the recovery prematurely
|
||||
for some reason (a node stops responding?)
|
||||
that we can just return immediately and we will reenter
|
||||
recovery shortly again.
|
||||
I.e. we deliberately leave the cluster with an inconsistent
|
||||
generation id to allow us to abort recovery at any stage and
|
||||
just restart it from scratch.
|
||||
*/
|
||||
vnnmap->generation = generation;
|
||||
ret = ctdb_ctrl_setvnnmap(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, vnnmap);
|
||||
if (ret != 0) {
|
||||
printf("Unable to set vnnmap for node %u\n", vnn);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int j, ret;
|
||||
|
||||
/* set recovery mode to active on all nodes */
|
||||
for (j=0; j<nodemap->num; j++) {
|
||||
@ -85,74 +56,41 @@ static int do_recovery(struct ctdb_context *ctdb, struct event_context *ev,
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = ctdb_ctrl_setrecmode(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, CTDB_RECOVERY_ACTIVE);
|
||||
ret = ctdb_ctrl_setrecmode(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, rec_mode);
|
||||
if (ret != 0) {
|
||||
printf("Unable to set recmode on node %u\n", nodemap->nodes[j].vnn);
|
||||
DEBUG(0, (__location__ "Unable to set recmode on node %u\n", nodemap->nodes[j].vnn));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* get a list of all databases */
|
||||
ret = ctdb_ctrl_getdbmap(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, &dbmap);
|
||||
if (ret != 0) {
|
||||
printf("Unable to get dbids from node %u\n", vnn);
|
||||
return -1;
|
||||
}
|
||||
static int set_recovery_master(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t vnn)
|
||||
{
|
||||
int j, ret;
|
||||
|
||||
|
||||
/* verify that we have all database any other node has */
|
||||
/* set recovery master to vnn on all nodes */
|
||||
for (j=0; j<nodemap->num; j++) {
|
||||
/* we dont need to ourself ourselves */
|
||||
if (nodemap->nodes[j].vnn == vnn) {
|
||||
continue;
|
||||
}
|
||||
/* dont check nodes that are unavailable */
|
||||
/* dont change it for nodes that are unavailable */
|
||||
if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = ctdb_ctrl_getdbmap(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, mem_ctx, &remote_dbmap);
|
||||
ret = ctdb_ctrl_setrecmaster(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, vnn);
|
||||
if (ret != 0) {
|
||||
printf("Unable to get dbids from node %u\n", vnn);
|
||||
DEBUG(0, (__location__ "Unable to set recmaster on node %u\n", nodemap->nodes[j].vnn));
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* step through all databases on the remote node */
|
||||
for (db=0; db<remote_dbmap->num;db++) {
|
||||
const char *name;
|
||||
|
||||
for (i=0;i<dbmap->num;i++) {
|
||||
if (remote_dbmap->dbids[db] == dbmap->dbids[i]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* we already have this db locally */
|
||||
if (i!=dbmap->num) {
|
||||
continue;
|
||||
}
|
||||
/* ok so we need to create this database and
|
||||
rebuild dbmap
|
||||
*/
|
||||
ctdb_ctrl_getdbname(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, remote_dbmap->dbids[db], mem_ctx, &name);
|
||||
if (ret != 0) {
|
||||
printf("Unable to get dbname from node %u\n", nodemap->nodes[j].vnn);
|
||||
return -1;
|
||||
}
|
||||
ctdb_ctrl_createdb(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, name);
|
||||
if (ret != 0) {
|
||||
printf("Unable to create local db:%s\n", name);
|
||||
return -1;
|
||||
}
|
||||
ret = ctdb_ctrl_getdbmap(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, &dbmap);
|
||||
if (ret != 0) {
|
||||
printf("Unable to reread dbmap on node %u\n", vnn);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int create_missing_remote_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t vnn, struct ctdb_dbid_map *dbmap, TALLOC_CTX *mem_ctx)
|
||||
{
|
||||
int i, j, db, ret;
|
||||
struct ctdb_dbid_map *remote_dbmap;
|
||||
|
||||
/* verify that all other nodes have all our databases */
|
||||
for (j=0; j<nodemap->num; j++) {
|
||||
@ -167,7 +105,7 @@ static int do_recovery(struct ctdb_context *ctdb, struct event_context *ev,
|
||||
|
||||
ret = ctdb_ctrl_getdbmap(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, mem_ctx, &remote_dbmap);
|
||||
if (ret != 0) {
|
||||
printf("Unable to get dbids from node %u\n", vnn);
|
||||
DEBUG(0, (__location__ "Unable to get dbids from node %u\n", vnn));
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -175,31 +113,99 @@ static int do_recovery(struct ctdb_context *ctdb, struct event_context *ev,
|
||||
for (db=0; db<dbmap->num;db++) {
|
||||
const char *name;
|
||||
|
||||
|
||||
for (i=0;i<remote_dbmap->num;i++) {
|
||||
if (dbmap->dbids[db] == remote_dbmap->dbids[i]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* the remote node already have this database */
|
||||
if (i!=dbmap->num) {
|
||||
if (i!=remote_dbmap->num) {
|
||||
continue;
|
||||
}
|
||||
/* ok so we need to create this database */
|
||||
ctdb_ctrl_getdbname(ctdb, timeval_current_ofs(1, 0), vnn, dbmap->dbids[db], mem_ctx, &name);
|
||||
if (ret != 0) {
|
||||
printf("Unable to get dbname from node %u\n", vnn);
|
||||
DEBUG(0, (__location__ "Unable to get dbname from node %u\n", vnn));
|
||||
return -1;
|
||||
}
|
||||
ctdb_ctrl_createdb(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, mem_ctx, name);
|
||||
if (ret != 0) {
|
||||
printf("Unable to create remote db:%s\n", name);
|
||||
DEBUG(0, (__location__ "Unable to create remote db:%s\n", name));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* pull all records from all other nodes across to this node
|
||||
|
||||
static int create_missing_local_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t vnn, struct ctdb_dbid_map **dbmap, TALLOC_CTX *mem_ctx)
|
||||
{
|
||||
int i, j, db, ret;
|
||||
struct ctdb_dbid_map *remote_dbmap;
|
||||
|
||||
/* verify that we have all database any other node has */
|
||||
for (j=0; j<nodemap->num; j++) {
|
||||
/* we dont need to ourself ourselves */
|
||||
if (nodemap->nodes[j].vnn == vnn) {
|
||||
continue;
|
||||
}
|
||||
/* dont check nodes that are unavailable */
|
||||
if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = ctdb_ctrl_getdbmap(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, mem_ctx, &remote_dbmap);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ "Unable to get dbids from node %u\n", vnn));
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* step through all databases on the remote node */
|
||||
for (db=0; db<remote_dbmap->num;db++) {
|
||||
const char *name;
|
||||
|
||||
for (i=0;i<(*dbmap)->num;i++) {
|
||||
if (remote_dbmap->dbids[db] == (*dbmap)->dbids[i]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* we already have this db locally */
|
||||
if (i!=(*dbmap)->num) {
|
||||
continue;
|
||||
}
|
||||
/* ok so we need to create this database and
|
||||
rebuild dbmap
|
||||
*/
|
||||
ctdb_ctrl_getdbname(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, remote_dbmap->dbids[db], mem_ctx, &name);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ "Unable to get dbname from node %u\n", nodemap->nodes[j].vnn));
|
||||
return -1;
|
||||
}
|
||||
ctdb_ctrl_createdb(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, name);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ "Unable to create local db:%s\n", name));
|
||||
return -1;
|
||||
}
|
||||
ret = ctdb_ctrl_getdbmap(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, dbmap);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ "Unable to reread dbmap on node %u\n", vnn));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int pull_all_remote_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t vnn, struct ctdb_dbid_map *dbmap, TALLOC_CTX *mem_ctx)
|
||||
{
|
||||
int i, j, ret;
|
||||
|
||||
/* pull all records from all other nodes across onto this node
|
||||
(this merges based on rsn)
|
||||
*/
|
||||
for (i=0;i<dbmap->num;i++) {
|
||||
@ -214,12 +220,20 @@ static int do_recovery(struct ctdb_context *ctdb, struct event_context *ev,
|
||||
}
|
||||
ret = ctdb_ctrl_copydb(ctdb, timeval_current_ofs(2, 0), nodemap->nodes[j].vnn, vnn, dbmap->dbids[i], CTDB_LMASTER_ANY, mem_ctx);
|
||||
if (ret != 0) {
|
||||
printf("Unable to copy db from node %u to node %u\n", nodemap->nodes[j].vnn, vnn);
|
||||
DEBUG(0, (__location__ "Unable to copy db from node %u to node %u\n", nodemap->nodes[j].vnn, vnn));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int update_dmaster_on_all_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t vnn, struct ctdb_dbid_map *dbmap, TALLOC_CTX *mem_ctx)
|
||||
{
|
||||
int i, j, ret;
|
||||
|
||||
/* update dmaster to point to this node for all databases/nodes */
|
||||
for (i=0;i<dbmap->num;i++) {
|
||||
@ -230,12 +244,19 @@ static int do_recovery(struct ctdb_context *ctdb, struct event_context *ev,
|
||||
}
|
||||
ret = ctdb_ctrl_setdmaster(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, ctdb, dbmap->dbids[i], vnn);
|
||||
if (ret != 0) {
|
||||
printf("Unable to set dmaster for node %u db:0x%08x\n", nodemap->nodes[j].vnn, dbmap->dbids[i]);
|
||||
DEBUG(0, (__location__ "Unable to set dmaster for node %u db:0x%08x\n", nodemap->nodes[j].vnn, dbmap->dbids[i]));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int push_all_local_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t vnn, struct ctdb_dbid_map *dbmap, TALLOC_CTX *mem_ctx)
|
||||
{
|
||||
int i, j, ret;
|
||||
|
||||
/* push all records out to the nodes again */
|
||||
for (i=0;i<dbmap->num;i++) {
|
||||
@ -250,27 +271,19 @@ static int do_recovery(struct ctdb_context *ctdb, struct event_context *ev,
|
||||
}
|
||||
ret = ctdb_ctrl_copydb(ctdb, timeval_current_ofs(1, 0), vnn, nodemap->nodes[j].vnn, dbmap->dbids[i], CTDB_LMASTER_ANY, mem_ctx);
|
||||
if (ret != 0) {
|
||||
printf("Unable to copy db from node %u to node %u\n", vnn, nodemap->nodes[j].vnn);
|
||||
DEBUG(0, (__location__ "Unable to copy db from node %u to node %u\n", vnn, nodemap->nodes[j].vnn));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* build a new vnn map */
|
||||
vnnmap = talloc_zero_size(mem_ctx, offsetof(struct ctdb_vnn_map, map) + 4*num_active);
|
||||
if (vnnmap == NULL) {
|
||||
DEBUG(0,(__location__ " Unable to allocate vnn_map structure\n"));
|
||||
exit(1);
|
||||
}
|
||||
vnnmap->generation = generation;
|
||||
vnnmap->size = num_active;
|
||||
for (i=j=0;i<nodemap->num;i++) {
|
||||
if (nodemap->nodes[i].flags&NODE_FLAGS_CONNECTED) {
|
||||
vnnmap->map[j++]=nodemap->nodes[i].vnn;
|
||||
}
|
||||
}
|
||||
|
||||
static int update_vnnmap_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t vnn, struct ctdb_vnn_map *vnnmap, TALLOC_CTX *mem_ctx)
|
||||
{
|
||||
int j, ret;
|
||||
|
||||
/* push the new vnn map out to all the nodes */
|
||||
for (j=0; j<nodemap->num; j++) {
|
||||
@ -281,33 +294,269 @@ static int do_recovery(struct ctdb_context *ctdb, struct event_context *ev,
|
||||
|
||||
ret = ctdb_ctrl_setvnnmap(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, mem_ctx, vnnmap);
|
||||
if (ret != 0) {
|
||||
printf("Unable to set vnnmap for node %u\n", vnn);
|
||||
DEBUG(0, (__location__ "Unable to set vnnmap for node %u\n", vnn));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* disable recovery mode */
|
||||
for (j=0; j<nodemap->num; j++) {
|
||||
/* dont push to nodes that are unavailable */
|
||||
if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = ctdb_ctrl_setrecmode(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, CTDB_RECOVERY_NORMAL);
|
||||
if (ret != 0) {
|
||||
printf("Unable to set recmode on node %u\n", nodemap->nodes[j].vnn);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void recoverd(struct ctdb_context *ctdb, struct event_context *ev)
|
||||
|
||||
static int do_recovery(struct ctdb_context *ctdb, struct event_context *ev,
|
||||
TALLOC_CTX *mem_ctx, uint32_t vnn, uint32_t num_active,
|
||||
struct ctdb_node_map *nodemap, struct ctdb_vnn_map *vnnmap)
|
||||
{
|
||||
uint32_t vnn, num_active;
|
||||
int i, j, ret;
|
||||
uint32_t generation;
|
||||
struct ctdb_dbid_map *dbmap;
|
||||
|
||||
DEBUG(0, (__location__ "Recovery initiated\n"));
|
||||
|
||||
/* pick a new generation number */
|
||||
generation = random();
|
||||
|
||||
/* change the vnnmap on this node to use the new generation
|
||||
number but not on any other nodes.
|
||||
this guarantees that if we abort the recovery prematurely
|
||||
for some reason (a node stops responding?)
|
||||
that we can just return immediately and we will reenter
|
||||
recovery shortly again.
|
||||
I.e. we deliberately leave the cluster with an inconsistent
|
||||
generation id to allow us to abort recovery at any stage and
|
||||
just restart it from scratch.
|
||||
*/
|
||||
vnnmap->generation = generation;
|
||||
ret = ctdb_ctrl_setvnnmap(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, vnnmap);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ "Unable to set vnnmap for node %u\n", vnn));
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/* set recovery mode to active on all nodes */
|
||||
ret = set_recovery_mode(ctdb, nodemap, CTDB_RECOVERY_ACTIVE);
|
||||
if (ret!=0) {
|
||||
DEBUG(0, (__location__ "Unable to set recovery mode to active on cluster\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/* get a list of all databases */
|
||||
ret = ctdb_ctrl_getdbmap(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, &dbmap);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ "Unable to get dbids from node :%d\n", vnn));
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* verify that all other nodes have all our databases */
|
||||
ret = create_missing_remote_databases(ctdb, nodemap, vnn, dbmap, mem_ctx);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ "Unable to create missing remote databases\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* verify that we have all the databases any other node has */
|
||||
ret = create_missing_local_databases(ctdb, nodemap, vnn, &dbmap, mem_ctx);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ "Unable to create missing local databases\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* verify that all other nodes have all our databases */
|
||||
ret = create_missing_remote_databases(ctdb, nodemap, vnn, dbmap, mem_ctx);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ "Unable to create missing remote databases\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* pull all remote databases onto the local node */
|
||||
ret = pull_all_remote_databases(ctdb, nodemap, vnn, dbmap, mem_ctx);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ "Unable to pull remote databases\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* repoint all local and remote database records to the local
|
||||
node as being dmaster
|
||||
*/
|
||||
ret = update_dmaster_on_all_databases(ctdb, nodemap, vnn, dbmap, mem_ctx);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ "Unable to update dmaster on all databases\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* push all local databases to the remote nodes */
|
||||
ret = push_all_local_databases(ctdb, nodemap, vnn, dbmap, mem_ctx);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ "Unable to push local databases\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* build a new vnn map with all the currently active nodes */
|
||||
vnnmap = talloc_zero_size(mem_ctx, offsetof(struct ctdb_vnn_map, map) + 4*num_active);
|
||||
if (vnnmap == NULL) {
|
||||
DEBUG(0,(__location__ " Unable to allocate vnn_map structure\n"));
|
||||
return -1;
|
||||
}
|
||||
vnnmap->generation = generation;
|
||||
vnnmap->size = num_active;
|
||||
for (i=j=0;i<nodemap->num;i++) {
|
||||
if (nodemap->nodes[i].flags&NODE_FLAGS_CONNECTED) {
|
||||
vnnmap->map[j++]=nodemap->nodes[i].vnn;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* update to the new vnnmap on all nodes */
|
||||
ret = update_vnnmap_on_all_nodes(ctdb, nodemap, vnn, vnnmap, mem_ctx);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ "Unable to update vnnmap on all nodes\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/* update recmaster to point to us for all nodes */
|
||||
ret = set_recovery_master(ctdb, nodemap, vnn);
|
||||
if (ret!=0) {
|
||||
DEBUG(0, (__location__ "Unable to set recovery master\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/* disable recovery mode */
|
||||
ret = set_recovery_mode(ctdb, nodemap, CTDB_RECOVERY_NORMAL);
|
||||
if (ret!=0) {
|
||||
DEBUG(0, (__location__ "Unable to set recovery mode to normal on cluster\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
DEBUG(0, (__location__ "Recovery complete\n"));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
struct election_message {
|
||||
uint32_t vnn;
|
||||
};
|
||||
|
||||
static int send_election_request(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, uint32_t vnn)
|
||||
{
|
||||
int ret;
|
||||
TDB_DATA election_data;
|
||||
struct election_message emsg;
|
||||
uint64_t srvid;
|
||||
|
||||
srvid = CTDB_SRVTYPE_RECOVERY;
|
||||
srvid <<= 32;
|
||||
|
||||
emsg.vnn = vnn;
|
||||
|
||||
election_data.dsize = sizeof(struct election_message);
|
||||
election_data.dptr = (unsigned char *)&emsg;
|
||||
|
||||
|
||||
/* first we assume we will win the election and set
|
||||
recoverymaster to be ourself on the current node
|
||||
*/
|
||||
ret = ctdb_ctrl_setrecmaster(ctdb, timeval_current_ofs(1, 0), vnn, vnn);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ "failed to send recmaster election request"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/* send an election message to all active nodes */
|
||||
ctdb_send_message(ctdb, CTDB_BROADCAST_ALL, srvid, election_data);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
handler for recovery master elections
|
||||
*/
|
||||
static void election_handler(struct ctdb_context *ctdb, uint64_t srvid,
|
||||
TDB_DATA data, void *private_data)
|
||||
{
|
||||
int ret;
|
||||
struct election_message *em = (struct election_message *)data.dptr;
|
||||
TALLOC_CTX *mem_ctx;
|
||||
|
||||
mem_ctx = talloc_new(ctdb);
|
||||
|
||||
if (em->vnn==ctdb_get_vnn(ctdb)) {
|
||||
talloc_free(mem_ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
/* someone called an election. check their election data
|
||||
and if we disagree and we would rather be the elected node,
|
||||
send a new election message to all other nodes
|
||||
*/
|
||||
/* for now we just check the vnn number and allow the lowest
|
||||
vnn number to become recovery master
|
||||
*/
|
||||
if (em->vnn > ctdb_get_vnn(ctdb)) {
|
||||
ret = send_election_request(ctdb, mem_ctx, ctdb_get_vnn(ctdb));
|
||||
if (ret!=0) {
|
||||
DEBUG(0, (__location__ "failed to initiate recmaster election"));
|
||||
}
|
||||
talloc_free(mem_ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
/* ok, let that guy become recmaster then */
|
||||
ret = ctdb_ctrl_setrecmaster(ctdb, timeval_current_ofs(1, 0), ctdb_get_vnn(ctdb), em->vnn);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ "failed to send recmaster election request"));
|
||||
talloc_free(mem_ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
talloc_free(mem_ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
static void force_election(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, uint32_t vnn, struct ctdb_node_map *nodemap)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = send_election_request(ctdb, mem_ctx, vnn);
|
||||
if (ret!=0) {
|
||||
DEBUG(0, (__location__ "failed to initiate recmaster election"));
|
||||
return;
|
||||
}
|
||||
|
||||
/* wait for one second to collect all responses */
|
||||
timed_out = 0;
|
||||
event_add_timed(ctdb->ev, mem_ctx, timeval_current_ofs(1, 0), timeout_func, ctdb);
|
||||
while (!timed_out) {
|
||||
event_loop_once(ctdb->ev);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void monitor_cluster(struct ctdb_context *ctdb, struct event_context *ev)
|
||||
{
|
||||
uint32_t vnn, num_active, recmode, recmaster;
|
||||
TALLOC_CTX *mem_ctx=NULL;
|
||||
struct ctdb_node_map *nodemap=NULL;
|
||||
struct ctdb_node_map *remote_nodemap=NULL;
|
||||
@ -316,7 +565,6 @@ void recoverd(struct ctdb_context *ctdb, struct event_context *ev)
|
||||
int i, j, ret;
|
||||
|
||||
again:
|
||||
printf("check if we need to do recovery\n");
|
||||
if (mem_ctx) {
|
||||
talloc_free(mem_ctx);
|
||||
mem_ctx = NULL;
|
||||
@ -327,7 +575,6 @@ again:
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
|
||||
/* we only check for recovery once every second */
|
||||
timed_out = 0;
|
||||
event_add_timed(ctdb->ev, mem_ctx, timeval_current_ofs(1, 0), timeout_func, ctdb);
|
||||
@ -339,13 +586,23 @@ again:
|
||||
/* get our vnn number */
|
||||
vnn = ctdb_get_vnn(ctdb);
|
||||
|
||||
|
||||
/* get the vnnmap */
|
||||
ret = ctdb_ctrl_getvnnmap(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, &vnnmap);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ "Unable to get vnnmap from node %u\n", vnn));
|
||||
goto again;
|
||||
}
|
||||
|
||||
|
||||
/* get number of nodes */
|
||||
ret = ctdb_ctrl_getnodemap(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, &nodemap);
|
||||
if (ret != 0) {
|
||||
printf("Unable to get nodemap from node %u\n", vnn);
|
||||
DEBUG(0, (__location__ "Unable to get nodemap from node %u\n", vnn));
|
||||
goto again;
|
||||
}
|
||||
|
||||
|
||||
/* count how many active nodes there are */
|
||||
num_active = 0;
|
||||
for (i=0; i<nodemap->num; i++) {
|
||||
@ -355,6 +612,79 @@ again:
|
||||
}
|
||||
|
||||
|
||||
/* check which node is the recovery master */
|
||||
ret = ctdb_ctrl_getrecmaster(ctdb, timeval_current_ofs(1, 0), vnn, &recmaster);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ "Unable to get recmaster from node %u\n", vnn));
|
||||
goto again;
|
||||
}
|
||||
|
||||
|
||||
/* verify that the recmaster node is still active */
|
||||
for (j=0; j<nodemap->num; j++) {
|
||||
if (nodemap->nodes[j].vnn==recmaster) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
|
||||
DEBUG(0, ("Recmaster node %u no longer available. Force reelection\n", nodemap->nodes[j].vnn));
|
||||
force_election(ctdb, mem_ctx, vnn, nodemap);
|
||||
goto again;
|
||||
}
|
||||
|
||||
|
||||
/* if we are not the recmaster then we do not need to check
|
||||
if recovery is needed
|
||||
*/
|
||||
if (vnn!=recmaster) {
|
||||
goto again;
|
||||
}
|
||||
|
||||
|
||||
/* verify that all active nodes agree that we are the recmaster */
|
||||
for (j=0; j<nodemap->num; j++) {
|
||||
if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
|
||||
continue;
|
||||
}
|
||||
if (nodemap->nodes[j].vnn == vnn) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = ctdb_ctrl_getrecmaster(ctdb, timeval_current_ofs(1, 0), vnn, &recmaster);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ "Unable to get recmaster from node %u\n", vnn));
|
||||
goto again;
|
||||
}
|
||||
|
||||
if (recmaster!=vnn) {
|
||||
DEBUG(0, ("Node %d does not agree we are the recmaster. Force reelection\n", nodemap->nodes[j].vnn));
|
||||
force_election(ctdb, mem_ctx, vnn, nodemap);
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* verify that all active nodes are in normal mode
|
||||
and not in recovery mode
|
||||
*/
|
||||
for (j=0; j<nodemap->num; j++) {
|
||||
if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = ctdb_ctrl_getrecmode(ctdb, timeval_current_ofs(1, 0), vnn, &recmode);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, ("Unable to get recmode from node %u\n", vnn));
|
||||
goto again;
|
||||
}
|
||||
if (recmode!=CTDB_RECOVERY_NORMAL) {
|
||||
DEBUG(0, (__location__ "Node:%d was in recovery mode. Restart recovery process\n", nodemap->nodes[j].vnn));
|
||||
do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap);
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* get the nodemap for all active remote nodes and verify
|
||||
they are the same as for this node
|
||||
*/
|
||||
@ -368,7 +698,7 @@ again:
|
||||
|
||||
ret = ctdb_ctrl_getnodemap(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, mem_ctx, &remote_nodemap);
|
||||
if (ret != 0) {
|
||||
printf("Unable to get nodemap from remote node %u\n", nodemap->nodes[j].vnn);
|
||||
DEBUG(0, (__location__ "Unable to get nodemap from remote node %u\n", nodemap->nodes[j].vnn));
|
||||
goto again;
|
||||
}
|
||||
|
||||
@ -376,7 +706,7 @@ again:
|
||||
then this is a good reason to try recovery
|
||||
*/
|
||||
if (remote_nodemap->num != nodemap->num) {
|
||||
printf("Remote node:%d has different node count. %d vs %d of the local node\n", nodemap->nodes[j].vnn, remote_nodemap->num, nodemap->num);
|
||||
DEBUG(0, (__location__ "Remote node:%d has different node count. %d vs %d of the local node\n", nodemap->nodes[j].vnn, remote_nodemap->num, nodemap->num));
|
||||
do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap);
|
||||
goto again;
|
||||
}
|
||||
@ -387,7 +717,7 @@ again:
|
||||
for (i=0;i<nodemap->num;i++) {
|
||||
if ((remote_nodemap->nodes[i].vnn != nodemap->nodes[i].vnn)
|
||||
|| (remote_nodemap->nodes[i].flags != nodemap->nodes[i].flags)) {
|
||||
printf("Remote node:%d has different nodemap.\n", nodemap->nodes[j].vnn);
|
||||
DEBUG(0, (__location__ "Remote node:%d has different nodemap.\n", nodemap->nodes[j].vnn));
|
||||
do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap);
|
||||
goto again;
|
||||
}
|
||||
@ -395,18 +725,12 @@ again:
|
||||
|
||||
}
|
||||
|
||||
/* get the vnnmap */
|
||||
ret = ctdb_ctrl_getvnnmap(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, &vnnmap);
|
||||
if (ret != 0) {
|
||||
printf("Unable to get vnnmap from node %u\n", vnn);
|
||||
goto again;
|
||||
}
|
||||
|
||||
/* there better be the same number of lmasters in the vnn map
|
||||
as there are active nodes or well have to do a recovery
|
||||
*/
|
||||
if (vnnmap->size != num_active) {
|
||||
printf("The vnnmap count is different from the number of active nodes. %d vs %d\n", vnnmap->size, num_active);
|
||||
DEBUG(0, (__location__ "The vnnmap count is different from the number of active nodes. %d vs %d\n", vnnmap->size, num_active));
|
||||
do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap);
|
||||
goto again;
|
||||
}
|
||||
@ -428,7 +752,7 @@ again:
|
||||
}
|
||||
}
|
||||
if (i==vnnmap->size) {
|
||||
printf("Node %d is active in the nodemap but did not exist in the vnnmap\n", nodemap->nodes[j].vnn);
|
||||
DEBUG(0, (__location__ "Node %d is active in the nodemap but did not exist in the vnnmap\n", nodemap->nodes[j].vnn));
|
||||
do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap);
|
||||
goto again;
|
||||
}
|
||||
@ -448,20 +772,20 @@ again:
|
||||
|
||||
ret = ctdb_ctrl_getvnnmap(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, mem_ctx, &remote_vnnmap);
|
||||
if (ret != 0) {
|
||||
printf("Unable to get vnnmap from remote node %u\n", nodemap->nodes[j].vnn);
|
||||
DEBUG(0, (__location__ "Unable to get vnnmap from remote node %u\n", nodemap->nodes[j].vnn));
|
||||
goto again;
|
||||
}
|
||||
|
||||
/* verify the vnnmap generation is the same */
|
||||
if (vnnmap->generation != remote_vnnmap->generation) {
|
||||
printf("Remote node %d has different generation of vnnmap. %d vs %d (ours)\n", nodemap->nodes[j].vnn, remote_vnnmap->generation, vnnmap->generation);
|
||||
DEBUG(0, (__location__ "Remote node %d has different generation of vnnmap. %d vs %d (ours)\n", nodemap->nodes[j].vnn, remote_vnnmap->generation, vnnmap->generation));
|
||||
do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap);
|
||||
goto again;
|
||||
}
|
||||
|
||||
/* verify the vnnmap size is the same */
|
||||
if (vnnmap->size != remote_vnnmap->size) {
|
||||
printf("Remote node %d has different size of vnnmap. %d vs %d (ours)\n", nodemap->nodes[j].vnn, remote_vnnmap->size, vnnmap->size);
|
||||
DEBUG(0, (__location__ "Remote node %d has different size of vnnmap. %d vs %d (ours)\n", nodemap->nodes[j].vnn, remote_vnnmap->size, vnnmap->size));
|
||||
do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap);
|
||||
goto again;
|
||||
}
|
||||
@ -469,14 +793,13 @@ again:
|
||||
/* verify the vnnmap is the same */
|
||||
for (i=0;i<vnnmap->size;i++) {
|
||||
if (remote_vnnmap->map[i] != vnnmap->map[i]) {
|
||||
printf("Remote node %d has different vnnmap.\n", nodemap->nodes[j].vnn);
|
||||
DEBUG(0, (__location__ "Remote node %d has different vnnmap.\n", nodemap->nodes[j].vnn));
|
||||
do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap);
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
printf("no we did not need to do recovery\n");
|
||||
goto again;
|
||||
|
||||
}
|
||||
@ -498,6 +821,7 @@ int main(int argc, const char *argv[])
|
||||
int ret;
|
||||
poptContext pc;
|
||||
struct event_context *ev;
|
||||
uint64_t srvid;
|
||||
|
||||
pc = poptGetContext(argv[0], argc, argv, popt_options, POPT_CONTEXT_KEEP_FIRST);
|
||||
|
||||
@ -528,12 +852,18 @@ int main(int argc, const char *argv[])
|
||||
/* initialise ctdb */
|
||||
ctdb = ctdb_cmdline_client(ev);
|
||||
if (ctdb == NULL) {
|
||||
printf("Failed to init ctdb\n");
|
||||
DEBUG(0, (__location__ "Failed to init ctdb\n"));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
recoverd(ctdb, ev);
|
||||
/* register a message port for recovery elections */
|
||||
srvid = CTDB_SRVTYPE_RECOVERY;
|
||||
srvid <<= 32;
|
||||
ctdb_set_message_handler(ctdb, srvid, election_handler, NULL);
|
||||
|
||||
|
||||
monitor_cluster(ctdb, ev);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -57,6 +57,10 @@ struct ctdb_call_info {
|
||||
a message handler ID meaning "give me all messages"
|
||||
*/
|
||||
#define CTDB_SRVID_ALL (~(uint64_t)0)
|
||||
/*
|
||||
srvid type : RECOVERY
|
||||
*/
|
||||
#define CTDB_SRVTYPE_RECOVERY 0x64766372
|
||||
|
||||
struct event_context;
|
||||
|
||||
@ -289,6 +293,15 @@ int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint
|
||||
*/
|
||||
int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmode);
|
||||
|
||||
/*
|
||||
get the recovery master of a remote node
|
||||
*/
|
||||
int ctdb_ctrl_getrecmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *recmaster);
|
||||
/*
|
||||
set the recovery master of a remote node
|
||||
*/
|
||||
int ctdb_ctrl_setrecmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmaster);
|
||||
|
||||
uint32_t *ctdb_get_connected_nodes(struct ctdb_context *ctdb,
|
||||
struct timeval timeout,
|
||||
TALLOC_CTX *mem_ctx,
|
||||
@ -303,4 +316,9 @@ int ctdb_traverse(struct ctdb_db_context *ctdb_db, ctdb_traverse_func fn, void *
|
||||
|
||||
int ctdb_dump_db(struct ctdb_db_context *ctdb_db, FILE *f);
|
||||
|
||||
/*
|
||||
get the pid of a ctdb daemon
|
||||
*/
|
||||
int ctdb_ctrl_getpid(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *pid);
|
||||
|
||||
#endif
|
||||
|
@ -234,6 +234,7 @@ struct ctdb_context {
|
||||
struct ctdb_vnn_map *vnn_map;
|
||||
uint32_t num_clients;
|
||||
uint32_t seqnum_frequency;
|
||||
uint32_t recovery_master;
|
||||
};
|
||||
|
||||
struct ctdb_db_context {
|
||||
@ -312,6 +313,9 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS,
|
||||
CTDB_CONTROL_UPDATE_SEQNUM,
|
||||
CTDB_CONTROL_SET_SEQNUM_FREQUENCY,
|
||||
CTDB_CONTROL_DUMP_MEMORY,
|
||||
CTDB_CONTROL_GET_PID,
|
||||
CTDB_CONTROL_GET_RECMASTER,
|
||||
CTDB_CONTROL_SET_RECMASTER,
|
||||
};
|
||||
|
||||
|
||||
|
@ -1,23 +1,27 @@
|
||||
#!/bin/sh
|
||||
|
||||
killall -q ctdbd
|
||||
killall -q recoverd
|
||||
|
||||
echo "Starting 4 ctdb daemons"
|
||||
bin/ctdbd --nlist direct/4nodes.txt
|
||||
bin/ctdbd --nlist direct/4nodes.txt
|
||||
bin/ctdbd --nlist direct/4nodes.txt
|
||||
bin/ctdbd --nlist direct/4nodes.txt
|
||||
bin/ctdbd --nlist direct/4nodes.txt --listen=127.0.0.2:9001 --socket=/tmp/ctdb.socket.127.0.0.2
|
||||
bin/ctdbd --nlist direct/4nodes.txt --listen=127.0.0.3:9001 --socket=/tmp/ctdb.socket.127.0.0.3
|
||||
bin/ctdbd --nlist direct/4nodes.txt --listen=127.0.0.4:9001 --socket=/tmp/ctdb.socket.127.0.0.4
|
||||
echo "Starting one recovery daemon on node 0"
|
||||
bin/recoverd --socket=/tmp/ctdb.socket >/dev/null 2>/dev/null &
|
||||
|
||||
echo
|
||||
echo "Attaching to some databases"
|
||||
bin/ctdb_control --socket=/tmp/ctdb.socket attach test1.tdb || exit 1
|
||||
bin/ctdb_control --socket=/tmp/ctdb.socket attach test2.tdb || exit 1
|
||||
bin/ctdb_control --socket=/tmp/ctdb.socket attach test3.tdb || exit 1
|
||||
bin/ctdb_control --socket=/tmp/ctdb.socket attach test4.tdb || exit 1
|
||||
bin/ctdb_control attach test1.tdb || exit 1
|
||||
bin/ctdb_control attach test2.tdb || exit 1
|
||||
bin/ctdb_control attach test3.tdb || exit 1
|
||||
bin/ctdb_control attach test4.tdb || exit 1
|
||||
|
||||
echo "Clearing all databases to make sure they are all empty"
|
||||
bin/ctdb_control --socket=/tmp/ctdb.socket getdbmap 0 | egrep "^dbid:" | sed -e "s/^dbid://" -e "s/ .*$//" | while read DB; do
|
||||
bin/ctdb_control getdbmap 0 | egrep "^dbid:" | sed -e "s/^dbid://" -e "s/ .*$//" | while read DB; do
|
||||
seq 0 3 | while read NODE; do
|
||||
bin/ctdb_control --socket=/tmp/ctdb.socket cleardb $NODE $DB
|
||||
bin/ctdb_control cleardb $NODE $DB
|
||||
done
|
||||
done
|
||||
|
||||
@ -26,60 +30,60 @@ echo
|
||||
echo
|
||||
echo "Printing all databases on all nodes. they should all be empty"
|
||||
echo "============================================================="
|
||||
bin/ctdb_control --socket=/tmp/ctdb.socket getdbmap 0 | egrep "^dbid:" | sed -e "s/^dbid://" -e "s/ .*$//" | while read DB; do
|
||||
bin/ctdb_control getdbmap 0 | egrep "^dbid:" | sed -e "s/^.*name://" -e "s/ .*$//" | while read DBNAME; do
|
||||
seq 0 3 | while read NODE; do
|
||||
echo "Content of DB:$DB NODE:$NODE :"
|
||||
bin/ctdb_control --socket=/tmp/ctdb.socket catdb $NODE $DB
|
||||
echo "Content of DBNAME:$DBNAME NODE:$NODE :"
|
||||
bin/ctdb_control catdb $DBNAME $NODE
|
||||
done
|
||||
done
|
||||
|
||||
|
||||
echo
|
||||
echo
|
||||
echo "Populating the databases"
|
||||
./bin/ctdb_control --socket=/tmp/ctdb.socket writerecord 0 0x220c2a7b testkey1 testdata1
|
||||
./bin/ctdb_control --socket=/tmp/ctdb.socket setdmaster 0 0x220c2a7b 1
|
||||
./bin/ctdb_control writerecord 0 0x220c2a7b testkey1 testdata1
|
||||
./bin/ctdb_control setdmaster 0 0x220c2a7b 1
|
||||
|
||||
./bin/ctdb_control --socket=/tmp/ctdb.socket writerecord 1 0x220c2a7b testkey1 testdata1
|
||||
./bin/ctdb_control --socket=/tmp/ctdb.socket writerecord 1 0x220c2a7b testkey1 testdata1
|
||||
./bin/ctdb_control --socket=/tmp/ctdb.socket setdmaster 1 0x220c2a7b 2
|
||||
./bin/ctdb_control writerecord 1 0x220c2a7b testkey1 testdata1
|
||||
./bin/ctdb_control writerecord 1 0x220c2a7b testkey1 testdata1
|
||||
./bin/ctdb_control setdmaster 1 0x220c2a7b 2
|
||||
|
||||
./bin/ctdb_control --socket=/tmp/ctdb.socket writerecord 2 0x220c2a7b testkey1 testdata1
|
||||
./bin/ctdb_control --socket=/tmp/ctdb.socket writerecord 2 0x220c2a7b testkey1 testdata1
|
||||
./bin/ctdb_control --socket=/tmp/ctdb.socket writerecord 2 0x220c2a7b testkey1 testdata1
|
||||
./bin/ctdb_control --socket=/tmp/ctdb.socket setdmaster 2 0x220c2a7b 3
|
||||
./bin/ctdb_control writerecord 2 0x220c2a7b testkey1 testdata1
|
||||
./bin/ctdb_control writerecord 2 0x220c2a7b testkey1 testdata1
|
||||
./bin/ctdb_control writerecord 2 0x220c2a7b testkey1 testdata1
|
||||
./bin/ctdb_control setdmaster 2 0x220c2a7b 3
|
||||
|
||||
./bin/ctdb_control --socket=/tmp/ctdb.socket writerecord 3 0x220c2a7b testkey1 testdata1
|
||||
./bin/ctdb_control --socket=/tmp/ctdb.socket writerecord 3 0x220c2a7b testkey1 testdata1
|
||||
./bin/ctdb_control --socket=/tmp/ctdb.socket writerecord 3 0x220c2a7b testkey1 testdata1
|
||||
./bin/ctdb_control --socket=/tmp/ctdb.socket writerecord 3 0x220c2a7b testkey1 testdata1
|
||||
./bin/ctdb_control --socket=/tmp/ctdb.socket setdmaster 3 0x220c2a7b 3
|
||||
./bin/ctdb_control writerecord 3 0x220c2a7b testkey1 testdata1
|
||||
./bin/ctdb_control writerecord 3 0x220c2a7b testkey1 testdata1
|
||||
./bin/ctdb_control writerecord 3 0x220c2a7b testkey1 testdata1
|
||||
./bin/ctdb_control writerecord 3 0x220c2a7b testkey1 testdata1
|
||||
./bin/ctdb_control setdmaster 3 0x220c2a7b 3
|
||||
|
||||
|
||||
echo
|
||||
echo
|
||||
echo "Printing all databases on all nodes. there should be a record there"
|
||||
echo "============================================================="
|
||||
bin/ctdb_control --socket=/tmp/ctdb.socket getdbmap 0 | egrep "^dbid:" | sed -e "s/^dbid://" -e "s/ .*$//" | while read DB; do
|
||||
bin/ctdb_control getdbmap 0 | egrep "^dbid:" | sed -e "s/^.*name://" -e "s/ .*$//" | while read DBNAME; do
|
||||
seq 0 3 | while read NODE; do
|
||||
echo "Content of DB:$DB NODE:$NODE :"
|
||||
bin/ctdb_control --socket=/tmp/ctdb.socket catdb $NODE $DB
|
||||
echo "Content of DBNAME:$DBNAME NODE:$NODE :"
|
||||
bin/ctdb_control catdb $DBNAME $NODE
|
||||
done
|
||||
done
|
||||
|
||||
echo
|
||||
echo
|
||||
echo "killing off node #0"
|
||||
echo "killing off node #2"
|
||||
echo "==================="
|
||||
CTDBPID=`ps aux | grep ctdbd | grep -v grep | head -1 | sed -e "s/^[^ ]* *//" -e "s/ .*$//"`
|
||||
CTDBPID=`./bin/ctdb_control getpid 2 | sed -e "s/Pid://"`
|
||||
kill $CTDBPID
|
||||
sleep 1
|
||||
|
||||
|
||||
echo
|
||||
echo
|
||||
echo "Recovery the cluster"
|
||||
echo "===================="
|
||||
./bin/ctdb_control --socket=/tmp/ctdb.socket recover 2 0x220c2a7b
|
||||
echo "wait 3 seconds to let the recovery daemon do its job"
|
||||
echo "===================================================="
|
||||
sleep 3
|
||||
|
||||
echo
|
||||
echo
|
||||
@ -87,15 +91,19 @@ echo "Printing all databases on all nodes."
|
||||
echo "The databases should be the same now on all nodes"
|
||||
echo "and the record will have been migrated to node 0"
|
||||
echo "================================================="
|
||||
echo "Node 0:"
|
||||
bin/ctdb_control catdb test4.tdb 0
|
||||
echo "Node 1:"
|
||||
bin/ctdb_control --socket=/tmp/ctdb.socket catdb 1 0x220c2a7b
|
||||
echo "Node 2:"
|
||||
bin/ctdb_control --socket=/tmp/ctdb.socket catdb 2 0x220c2a7b
|
||||
bin/ctdb_control catdb test4.tdb 1
|
||||
echo "Node 3:"
|
||||
bin/ctdb_control --socket=/tmp/ctdb.socket catdb 3 0x220c2a7b
|
||||
bin/ctdb_control catdb test4.tdb 3
|
||||
echo "nodemap:"
|
||||
bin/ctdb_control --socket=/tmp/ctdb.socket getnodemap 3
|
||||
bin/ctdb_control getnodemap 0
|
||||
|
||||
echo
|
||||
echo
|
||||
echo "Traverse the cluster and dump the database"
|
||||
bin/ctdb_control catdb test4.tdb
|
||||
|
||||
|
||||
#leave the ctdb daemons running so one can look at the box in more detail
|
||||
|
@ -46,15 +46,19 @@ static void usage(void)
|
||||
" getdbmap <vnn> lists databases on a node\n"
|
||||
" getnodemap <vnn> lists nodes known to a ctdb daemon\n"
|
||||
" createdb <vnn> <dbname> create a database\n"
|
||||
" catdb <dbname> lists all keys/data in a db\n"
|
||||
" catdb <dbname> [vnn] lists all keys/data in a db\n"
|
||||
" cpdb <fromvnn> <tovnn> <dbid> lists all keys in a remote tdb\n"
|
||||
" setdmaster <vnn> <dbid> <dmaster> sets new dmaster for all records in the database\n"
|
||||
" cleardb <vnn> <dbid> deletes all records in a db\n"
|
||||
" getrecmode <vnn> get recovery mode\n"
|
||||
" setrecmode <vnn> <mode> set recovery mode\n"
|
||||
" getrecmaster <vnn> get recovery master\n"
|
||||
" setrecmaster <vnn> <master_vnn> set recovery master\n"
|
||||
" writerecord <vnn> <dbid> <key> <data>\n"
|
||||
" recover <vnn> recover the cluster\n"
|
||||
" attach <dbname> attach a database\n");
|
||||
" attach <dbname> attach a database\n"
|
||||
" getpid <vnn> get the pid of a ctdb daemon\n"
|
||||
);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
@ -345,9 +349,11 @@ static int control_recover(struct ctdb_context *ctdb, int argc, const char **arg
|
||||
}
|
||||
for (i=0;i<dbmap->num;i++) {
|
||||
const char *path;
|
||||
const char *name;
|
||||
|
||||
ctdb_ctrl_getdbpath(ctdb, timeval_current_ofs(1, 0), CTDB_CURRENT_NODE, dbmap->dbids[i], ctdb, &path);
|
||||
printf("dbid:0x%08x path:%s\n", dbmap->dbids[i], path);
|
||||
ctdb_ctrl_getdbname(ctdb, timeval_current_ofs(1, 0), CTDB_CURRENT_NODE, dbmap->dbids[i], ctdb, &name);
|
||||
printf("dbid:0x%08x name:%s path:%s\n", dbmap->dbids[i], name, path);
|
||||
}
|
||||
|
||||
/* 5: pull all records from all other nodes across to this node
|
||||
@ -502,6 +508,31 @@ static int control_getvnnmap(struct ctdb_context *ctdb, int argc, const char **a
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
display pid of a ctdb daemon
|
||||
*/
|
||||
static int control_getpid(struct ctdb_context *ctdb, int argc, const char **argv)
|
||||
{
|
||||
uint32_t vnn, pid;
|
||||
int ret;
|
||||
|
||||
|
||||
if (argc < 1) {
|
||||
usage();
|
||||
}
|
||||
|
||||
vnn = strtoul(argv[0], NULL, 0);
|
||||
|
||||
ret = ctdb_ctrl_getpid(ctdb, timeval_current_ofs(1, 0), vnn, &pid);
|
||||
if (ret != 0) {
|
||||
printf("Unable to get daemon pid from node %u\n", vnn);
|
||||
return ret;
|
||||
}
|
||||
printf("Pid:%d\n",pid);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
display recovery mode of a remote node
|
||||
*/
|
||||
@ -541,7 +572,7 @@ static int control_setrecmode(struct ctdb_context *ctdb, int argc, const char **
|
||||
}
|
||||
|
||||
vnn = strtoul(argv[0], NULL, 0);
|
||||
recmode = strtoul(argv[0], NULL, 0);
|
||||
recmode = strtoul(argv[1], NULL, 0);
|
||||
|
||||
ret = ctdb_ctrl_setrecmode(ctdb, timeval_current_ofs(1, 0), vnn, recmode);
|
||||
if (ret != 0) {
|
||||
@ -552,6 +583,56 @@ static int control_setrecmode(struct ctdb_context *ctdb, int argc, const char **
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
display recovery master of a remote node
|
||||
*/
|
||||
static int control_getrecmaster(struct ctdb_context *ctdb, int argc, const char **argv)
|
||||
{
|
||||
uint32_t vnn, recmaster;
|
||||
int ret;
|
||||
|
||||
|
||||
if (argc < 1) {
|
||||
usage();
|
||||
}
|
||||
|
||||
vnn = strtoul(argv[0], NULL, 0);
|
||||
|
||||
ret = ctdb_ctrl_getrecmaster(ctdb, timeval_current_ofs(1, 0), vnn, &recmaster);
|
||||
if (ret != 0) {
|
||||
printf("Unable to get recmaster from node %u\n", vnn);
|
||||
return ret;
|
||||
}
|
||||
printf("Recovery master:%d\n",recmaster);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
set recovery master of a remote node
|
||||
*/
|
||||
static int control_setrecmaster(struct ctdb_context *ctdb, int argc, const char **argv)
|
||||
{
|
||||
uint32_t vnn, recmaster;
|
||||
int ret;
|
||||
|
||||
|
||||
if (argc < 2) {
|
||||
usage();
|
||||
}
|
||||
|
||||
vnn = strtoul(argv[0], NULL, 0);
|
||||
recmaster = strtoul(argv[1], NULL, 0);
|
||||
|
||||
ret = ctdb_ctrl_setrecmaster(ctdb, timeval_current_ofs(1, 0), vnn, recmaster);
|
||||
if (ret != 0) {
|
||||
printf("Unable to set recmaster on node %u\n", vnn);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
display remote list of keys/data for a db
|
||||
*/
|
||||
@ -559,6 +640,7 @@ static int control_catdb(struct ctdb_context *ctdb, int argc, const char **argv)
|
||||
{
|
||||
const char *db_name;
|
||||
struct ctdb_db_context *ctdb_db;
|
||||
uint32_t vnn;
|
||||
int ret;
|
||||
|
||||
if (argc < 1) {
|
||||
@ -572,12 +654,35 @@ static int control_catdb(struct ctdb_context *ctdb, int argc, const char **argv)
|
||||
return -1;
|
||||
}
|
||||
|
||||
ret = ctdb_dump_db(ctdb_db, stdout);
|
||||
if (ret == -1) {
|
||||
printf("Unable to dump database\n");
|
||||
return -1;
|
||||
}
|
||||
if (argc==1) {
|
||||
/* traverse and dump the cluster tdb */
|
||||
ret = ctdb_dump_db(ctdb_db, stdout);
|
||||
if (ret == -1) {
|
||||
printf("Unable to dump database\n");
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
struct ctdb_key_list keys;
|
||||
int i;
|
||||
|
||||
/* dump only the local tdb of a specific node */
|
||||
vnn = strtoul(argv[1], NULL, 0);
|
||||
ret = ctdb_ctrl_pulldb(ctdb, vnn, ctdb_db->db_id, CTDB_LMASTER_ANY, ctdb, &keys);
|
||||
if (ret == -1) {
|
||||
printf("Unable to pull remote database\n");
|
||||
return -1;
|
||||
}
|
||||
for(i=0;i<keys.num;i++){
|
||||
char *keystr, *datastr;
|
||||
|
||||
keystr = hex_encode(ctdb, keys.keys[i].dptr, keys.keys[i].dsize);
|
||||
datastr = hex_encode(ctdb, keys.data[i].dptr, keys.data[i].dsize);
|
||||
|
||||
printf("rsn:%llu lmaster:%d dmaster:%d key:%s data:%s\n", keys.headers[i].rsn, keys.lmasters[i], keys.headers[i].dmaster, keystr, datastr);
|
||||
ret++;
|
||||
}
|
||||
}
|
||||
|
||||
talloc_free(ctdb_db);
|
||||
|
||||
printf("Dumped %d records\n", ret);
|
||||
@ -636,9 +741,11 @@ static int control_getdbmap(struct ctdb_context *ctdb, int argc, const char **ar
|
||||
printf("Number of databases:%d\n", dbmap->num);
|
||||
for(i=0;i<dbmap->num;i++){
|
||||
const char *path;
|
||||
const char *name;
|
||||
|
||||
ctdb_ctrl_getdbpath(ctdb, timeval_current_ofs(1, 0), CTDB_CURRENT_NODE, dbmap->dbids[i], ctdb, &path);
|
||||
printf("dbid:0x%08x path:%s\n", dbmap->dbids[i], path);
|
||||
ctdb_ctrl_getdbname(ctdb, timeval_current_ofs(1, 0), CTDB_CURRENT_NODE, dbmap->dbids[i], ctdb, &name);
|
||||
printf("dbid:0x%08x name:%s path:%s\n", dbmap->dbids[i], name, path);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -993,6 +1100,8 @@ int main(int argc, const char *argv[])
|
||||
{ "cleardb", control_cleardb },
|
||||
{ "getrecmode", control_getrecmode },
|
||||
{ "setrecmode", control_setrecmode },
|
||||
{ "getrecmaster", control_getrecmaster },
|
||||
{ "setrecmaster", control_setrecmaster },
|
||||
{ "ping", control_ping },
|
||||
{ "debug", control_debug },
|
||||
{ "debuglevel", control_debuglevel },
|
||||
@ -1000,6 +1109,7 @@ int main(int argc, const char *argv[])
|
||||
{ "writerecord", control_writerecord },
|
||||
{ "attach", control_attach },
|
||||
{ "dumpmemory", control_dumpmemory },
|
||||
{ "getpid", control_getpid },
|
||||
};
|
||||
|
||||
pc = poptGetContext(argv[0], argc, argv, popt_options, POPT_CONTEXT_KEEP_FIRST);
|
||||
|
Loading…
x
Reference in New Issue
Block a user