1
0
mirror of https://github.com/samba-team/samba.git synced 2025-03-27 22:50:26 +03:00

merged from ronnie

(This used to be ctdb commit 49aad9fb09ca2c787e6f82ba03cb229cc51844f0)
This commit is contained in:
Andrew Tridgell 2007-05-07 07:56:38 +10:00
commit d98d8d4de3
8 changed files with 756 additions and 204 deletions

View File

@ -541,11 +541,12 @@ struct ctdb_context *ctdb_init(struct event_context *ev)
struct ctdb_context *ctdb;
ctdb = talloc_zero(ev, struct ctdb_context);
ctdb->ev = ev;
ctdb->recovery_mode = CTDB_RECOVERY_NORMAL;
ctdb->upcalls = &ctdb_upcalls;
ctdb->idr = idr_init(ctdb);
ctdb->max_lacount = CTDB_DEFAULT_MAX_LACOUNT;
ctdb->ev = ev;
ctdb->recovery_mode = CTDB_RECOVERY_NORMAL;
ctdb->recovery_master = 0;
ctdb->upcalls = &ctdb_upcalls;
ctdb->idr = idr_init(ctdb);
ctdb->max_lacount = CTDB_DEFAULT_MAX_LACOUNT;
ctdb->seqnum_frequency = CTDB_DEFAULT_SEQNUM_FREQUENCY;
return ctdb;

View File

@ -722,7 +722,7 @@ int ctdb_control(struct ctdb_context *ctdb, uint32_t destnode, uint64_t srvid,
/* semi-async operation */
timed_out = 0;
if (timeout) {
event_add_timed(ctdb->ev, mem_ctx, *timeout, timeout_func, &timed_out);
event_add_timed(ctdb->ev, state, *timeout, timeout_func, &timed_out);
}
while ((state->state == CTDB_CALL_WAIT)
&& (timed_out == 0) ){
@ -732,7 +732,7 @@ int ctdb_control(struct ctdb_context *ctdb, uint32_t destnode, uint64_t srvid,
talloc_free(state);
return -1;
}
if (outdata) {
*outdata = state->outdata;
outdata->dptr = talloc_memdup(mem_ctx, outdata->dptr, outdata->dsize);
@ -836,12 +836,12 @@ int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_GET_RECMODE, 0, data,
ctdb, &outdata, &res, &timeout);
if (ret != 0 || res != 0) {
if (ret != 0) {
DEBUG(0,(__location__ " ctdb_control for getrecmode failed\n"));
return -1;
}
*recmode = ((uint32_t *)outdata.dptr)[0];
*recmode = res;
return 0;
}
@ -870,6 +870,54 @@ int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint
return 0;
}
/*
get the recovery master of a remote node
*/
int ctdb_ctrl_getrecmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *recmaster)
{
int ret;
TDB_DATA data, outdata;
int32_t res;
ZERO_STRUCT(data);
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_GET_RECMASTER, 0, data,
ctdb, &outdata, &res, &timeout);
if (ret != 0) {
DEBUG(0,(__location__ " ctdb_control for getrecmaster failed\n"));
return -1;
}
*recmaster = res;
return 0;
}
/*
set the recovery master of a remote node
*/
int ctdb_ctrl_setrecmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmaster)
{
int ret;
TDB_DATA data, outdata;
int32_t res;
ZERO_STRUCT(data);
data.dsize = sizeof(uint32_t);
data.dptr = (unsigned char *)&recmaster;
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_SET_RECMASTER, 0, data,
ctdb, &outdata, &res, &timeout);
if (ret != 0 || res != 0) {
DEBUG(0,(__location__ " ctdb_control for getrecmode failed\n"));
return -1;
}
return 0;
}
/*
get a list of databases off a remote node
*/
@ -1569,3 +1617,26 @@ int ctdb_dump_db(struct ctdb_db_context *ctdb_db, FILE *f)
return ctdb_traverse(ctdb_db, dumpdb_fn, f);
}
/*
get the pid of a ctdb daemon
*/
int ctdb_ctrl_getpid(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *pid)
{
int ret;
TDB_DATA data, outdata;
int32_t res;
ZERO_STRUCT(data);
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_GET_PID, 0, data,
ctdb, &outdata, &res, &timeout);
if (ret != 0) {
DEBUG(0,(__location__ " ctdb_control for getpid failed\n"));
return -1;
}
*pid = res;
return 0;
}

View File

@ -341,13 +341,23 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
}
case CTDB_CONTROL_GET_RECMODE: {
outdata->dsize = sizeof(uint32_t);
outdata->dptr = (unsigned char *)talloc_array(outdata, uint32_t, 1);
*((uint32_t *)(&outdata->dptr[0])) = ctdb->recovery_mode;
return ctdb->recovery_mode;
}
case CTDB_CONTROL_SET_RECMASTER: {
ctdb->recovery_master = ((uint32_t *)(&indata.dptr[0]))[0];
return 0;
}
case CTDB_CONTROL_GET_RECMASTER: {
return ctdb->recovery_master;
}
case CTDB_CONTROL_GET_PID: {
return getpid();
}
case CTDB_CONTROL_CONFIG: {
CHECK_CONTROL_DATA_SIZE(0);
ctdb->status.controls.get_config++;

View File

@ -45,38 +45,9 @@ static void timeout_func(struct event_context *ev, struct timed_event *te,
timed_out = 1;
}
static int do_recovery(struct ctdb_context *ctdb, struct event_context *ev,
TALLOC_CTX *mem_ctx, uint32_t vnn, uint32_t num_active,
struct ctdb_node_map *nodemap, struct ctdb_vnn_map *vnnmap)
static int set_recovery_mode(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t rec_mode)
{
int i, j, db, ret;
uint32_t generation;
struct ctdb_dbid_map *dbmap;
struct ctdb_dbid_map *remote_dbmap;
printf("we need to do recovery !!!\n");
/* pick a new generation number */
generation = random();
/* change the vnnmap on this node to use the new generation
number but not on any other nodes.
this guarantees that if we abort the recovery prematurely
for some reason (a node stops responding?)
that we can just return immediately and we will reenter
recovery shortly again.
I.e. we deliberately leave the cluster with an inconsistent
generation id to allow us to abort recovery at any stage and
just restart it from scratch.
*/
vnnmap->generation = generation;
ret = ctdb_ctrl_setvnnmap(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, vnnmap);
if (ret != 0) {
printf("Unable to set vnnmap for node %u\n", vnn);
return -1;
}
int j, ret;
/* set recovery mode to active on all nodes */
for (j=0; j<nodemap->num; j++) {
@ -85,74 +56,41 @@ static int do_recovery(struct ctdb_context *ctdb, struct event_context *ev,
continue;
}
ret = ctdb_ctrl_setrecmode(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, CTDB_RECOVERY_ACTIVE);
ret = ctdb_ctrl_setrecmode(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, rec_mode);
if (ret != 0) {
printf("Unable to set recmode on node %u\n", nodemap->nodes[j].vnn);
DEBUG(0, (__location__ "Unable to set recmode on node %u\n", nodemap->nodes[j].vnn));
return -1;
}
}
return 0;
}
/* get a list of all databases */
ret = ctdb_ctrl_getdbmap(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, &dbmap);
if (ret != 0) {
printf("Unable to get dbids from node %u\n", vnn);
return -1;
}
static int set_recovery_master(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t vnn)
{
int j, ret;
/* verify that we have all database any other node has */
/* set recovery master to vnn on all nodes */
for (j=0; j<nodemap->num; j++) {
/* we dont need to ourself ourselves */
if (nodemap->nodes[j].vnn == vnn) {
continue;
}
/* dont check nodes that are unavailable */
/* dont change it for nodes that are unavailable */
if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
continue;
}
ret = ctdb_ctrl_getdbmap(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, mem_ctx, &remote_dbmap);
ret = ctdb_ctrl_setrecmaster(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, vnn);
if (ret != 0) {
printf("Unable to get dbids from node %u\n", vnn);
DEBUG(0, (__location__ "Unable to set recmaster on node %u\n", nodemap->nodes[j].vnn));
return -1;
}
/* step through all databases on the remote node */
for (db=0; db<remote_dbmap->num;db++) {
const char *name;
for (i=0;i<dbmap->num;i++) {
if (remote_dbmap->dbids[db] == dbmap->dbids[i]) {
break;
}
}
/* we already have this db locally */
if (i!=dbmap->num) {
continue;
}
/* ok so we need to create this database and
rebuild dbmap
*/
ctdb_ctrl_getdbname(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, remote_dbmap->dbids[db], mem_ctx, &name);
if (ret != 0) {
printf("Unable to get dbname from node %u\n", nodemap->nodes[j].vnn);
return -1;
}
ctdb_ctrl_createdb(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, name);
if (ret != 0) {
printf("Unable to create local db:%s\n", name);
return -1;
}
ret = ctdb_ctrl_getdbmap(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, &dbmap);
if (ret != 0) {
printf("Unable to reread dbmap on node %u\n", vnn);
return -1;
}
}
}
return 0;
}
static int create_missing_remote_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t vnn, struct ctdb_dbid_map *dbmap, TALLOC_CTX *mem_ctx)
{
int i, j, db, ret;
struct ctdb_dbid_map *remote_dbmap;
/* verify that all other nodes have all our databases */
for (j=0; j<nodemap->num; j++) {
@ -167,7 +105,7 @@ static int do_recovery(struct ctdb_context *ctdb, struct event_context *ev,
ret = ctdb_ctrl_getdbmap(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, mem_ctx, &remote_dbmap);
if (ret != 0) {
printf("Unable to get dbids from node %u\n", vnn);
DEBUG(0, (__location__ "Unable to get dbids from node %u\n", vnn));
return -1;
}
@ -175,31 +113,99 @@ static int do_recovery(struct ctdb_context *ctdb, struct event_context *ev,
for (db=0; db<dbmap->num;db++) {
const char *name;
for (i=0;i<remote_dbmap->num;i++) {
if (dbmap->dbids[db] == remote_dbmap->dbids[i]) {
break;
}
}
/* the remote node already have this database */
if (i!=dbmap->num) {
if (i!=remote_dbmap->num) {
continue;
}
/* ok so we need to create this database */
ctdb_ctrl_getdbname(ctdb, timeval_current_ofs(1, 0), vnn, dbmap->dbids[db], mem_ctx, &name);
if (ret != 0) {
printf("Unable to get dbname from node %u\n", vnn);
DEBUG(0, (__location__ "Unable to get dbname from node %u\n", vnn));
return -1;
}
ctdb_ctrl_createdb(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, mem_ctx, name);
if (ret != 0) {
printf("Unable to create remote db:%s\n", name);
DEBUG(0, (__location__ "Unable to create remote db:%s\n", name));
return -1;
}
}
}
return 0;
}
/* pull all records from all other nodes across to this node
static int create_missing_local_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t vnn, struct ctdb_dbid_map **dbmap, TALLOC_CTX *mem_ctx)
{
int i, j, db, ret;
struct ctdb_dbid_map *remote_dbmap;
/* verify that we have all database any other node has */
for (j=0; j<nodemap->num; j++) {
/* we dont need to ourself ourselves */
if (nodemap->nodes[j].vnn == vnn) {
continue;
}
/* dont check nodes that are unavailable */
if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
continue;
}
ret = ctdb_ctrl_getdbmap(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, mem_ctx, &remote_dbmap);
if (ret != 0) {
DEBUG(0, (__location__ "Unable to get dbids from node %u\n", vnn));
return -1;
}
/* step through all databases on the remote node */
for (db=0; db<remote_dbmap->num;db++) {
const char *name;
for (i=0;i<(*dbmap)->num;i++) {
if (remote_dbmap->dbids[db] == (*dbmap)->dbids[i]) {
break;
}
}
/* we already have this db locally */
if (i!=(*dbmap)->num) {
continue;
}
/* ok so we need to create this database and
rebuild dbmap
*/
ctdb_ctrl_getdbname(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, remote_dbmap->dbids[db], mem_ctx, &name);
if (ret != 0) {
DEBUG(0, (__location__ "Unable to get dbname from node %u\n", nodemap->nodes[j].vnn));
return -1;
}
ctdb_ctrl_createdb(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, name);
if (ret != 0) {
DEBUG(0, (__location__ "Unable to create local db:%s\n", name));
return -1;
}
ret = ctdb_ctrl_getdbmap(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, dbmap);
if (ret != 0) {
DEBUG(0, (__location__ "Unable to reread dbmap on node %u\n", vnn));
return -1;
}
}
}
return 0;
}
static int pull_all_remote_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t vnn, struct ctdb_dbid_map *dbmap, TALLOC_CTX *mem_ctx)
{
int i, j, ret;
/* pull all records from all other nodes across onto this node
(this merges based on rsn)
*/
for (i=0;i<dbmap->num;i++) {
@ -214,12 +220,20 @@ static int do_recovery(struct ctdb_context *ctdb, struct event_context *ev,
}
ret = ctdb_ctrl_copydb(ctdb, timeval_current_ofs(2, 0), nodemap->nodes[j].vnn, vnn, dbmap->dbids[i], CTDB_LMASTER_ANY, mem_ctx);
if (ret != 0) {
printf("Unable to copy db from node %u to node %u\n", nodemap->nodes[j].vnn, vnn);
DEBUG(0, (__location__ "Unable to copy db from node %u to node %u\n", nodemap->nodes[j].vnn, vnn));
return -1;
}
}
}
return 0;
}
static int update_dmaster_on_all_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t vnn, struct ctdb_dbid_map *dbmap, TALLOC_CTX *mem_ctx)
{
int i, j, ret;
/* update dmaster to point to this node for all databases/nodes */
for (i=0;i<dbmap->num;i++) {
@ -230,12 +244,19 @@ static int do_recovery(struct ctdb_context *ctdb, struct event_context *ev,
}
ret = ctdb_ctrl_setdmaster(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, ctdb, dbmap->dbids[i], vnn);
if (ret != 0) {
printf("Unable to set dmaster for node %u db:0x%08x\n", nodemap->nodes[j].vnn, dbmap->dbids[i]);
DEBUG(0, (__location__ "Unable to set dmaster for node %u db:0x%08x\n", nodemap->nodes[j].vnn, dbmap->dbids[i]));
return -1;
}
}
}
return 0;
}
static int push_all_local_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t vnn, struct ctdb_dbid_map *dbmap, TALLOC_CTX *mem_ctx)
{
int i, j, ret;
/* push all records out to the nodes again */
for (i=0;i<dbmap->num;i++) {
@ -250,27 +271,19 @@ static int do_recovery(struct ctdb_context *ctdb, struct event_context *ev,
}
ret = ctdb_ctrl_copydb(ctdb, timeval_current_ofs(1, 0), vnn, nodemap->nodes[j].vnn, dbmap->dbids[i], CTDB_LMASTER_ANY, mem_ctx);
if (ret != 0) {
printf("Unable to copy db from node %u to node %u\n", vnn, nodemap->nodes[j].vnn);
DEBUG(0, (__location__ "Unable to copy db from node %u to node %u\n", vnn, nodemap->nodes[j].vnn));
return -1;
}
}
}
return 0;
}
/* build a new vnn map */
vnnmap = talloc_zero_size(mem_ctx, offsetof(struct ctdb_vnn_map, map) + 4*num_active);
if (vnnmap == NULL) {
DEBUG(0,(__location__ " Unable to allocate vnn_map structure\n"));
exit(1);
}
vnnmap->generation = generation;
vnnmap->size = num_active;
for (i=j=0;i<nodemap->num;i++) {
if (nodemap->nodes[i].flags&NODE_FLAGS_CONNECTED) {
vnnmap->map[j++]=nodemap->nodes[i].vnn;
}
}
static int update_vnnmap_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t vnn, struct ctdb_vnn_map *vnnmap, TALLOC_CTX *mem_ctx)
{
int j, ret;
/* push the new vnn map out to all the nodes */
for (j=0; j<nodemap->num; j++) {
@ -281,33 +294,269 @@ static int do_recovery(struct ctdb_context *ctdb, struct event_context *ev,
ret = ctdb_ctrl_setvnnmap(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, mem_ctx, vnnmap);
if (ret != 0) {
printf("Unable to set vnnmap for node %u\n", vnn);
DEBUG(0, (__location__ "Unable to set vnnmap for node %u\n", vnn));
return -1;
}
}
/* disable recovery mode */
for (j=0; j<nodemap->num; j++) {
/* dont push to nodes that are unavailable */
if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
continue;
}
ret = ctdb_ctrl_setrecmode(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, CTDB_RECOVERY_NORMAL);
if (ret != 0) {
printf("Unable to set recmode on node %u\n", nodemap->nodes[j].vnn);
return -1;
}
}
return 0;
}
void recoverd(struct ctdb_context *ctdb, struct event_context *ev)
static int do_recovery(struct ctdb_context *ctdb, struct event_context *ev,
TALLOC_CTX *mem_ctx, uint32_t vnn, uint32_t num_active,
struct ctdb_node_map *nodemap, struct ctdb_vnn_map *vnnmap)
{
uint32_t vnn, num_active;
int i, j, ret;
uint32_t generation;
struct ctdb_dbid_map *dbmap;
DEBUG(0, (__location__ "Recovery initiated\n"));
/* pick a new generation number */
generation = random();
/* change the vnnmap on this node to use the new generation
number but not on any other nodes.
this guarantees that if we abort the recovery prematurely
for some reason (a node stops responding?)
that we can just return immediately and we will reenter
recovery shortly again.
I.e. we deliberately leave the cluster with an inconsistent
generation id to allow us to abort recovery at any stage and
just restart it from scratch.
*/
vnnmap->generation = generation;
ret = ctdb_ctrl_setvnnmap(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, vnnmap);
if (ret != 0) {
DEBUG(0, (__location__ "Unable to set vnnmap for node %u\n", vnn));
return -1;
}
/* set recovery mode to active on all nodes */
ret = set_recovery_mode(ctdb, nodemap, CTDB_RECOVERY_ACTIVE);
if (ret!=0) {
DEBUG(0, (__location__ "Unable to set recovery mode to active on cluster\n"));
return -1;
}
/* get a list of all databases */
ret = ctdb_ctrl_getdbmap(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, &dbmap);
if (ret != 0) {
DEBUG(0, (__location__ "Unable to get dbids from node :%d\n", vnn));
return -1;
}
/* verify that all other nodes have all our databases */
ret = create_missing_remote_databases(ctdb, nodemap, vnn, dbmap, mem_ctx);
if (ret != 0) {
DEBUG(0, (__location__ "Unable to create missing remote databases\n"));
return -1;
}
/* verify that we have all the databases any other node has */
ret = create_missing_local_databases(ctdb, nodemap, vnn, &dbmap, mem_ctx);
if (ret != 0) {
DEBUG(0, (__location__ "Unable to create missing local databases\n"));
return -1;
}
/* verify that all other nodes have all our databases */
ret = create_missing_remote_databases(ctdb, nodemap, vnn, dbmap, mem_ctx);
if (ret != 0) {
DEBUG(0, (__location__ "Unable to create missing remote databases\n"));
return -1;
}
/* pull all remote databases onto the local node */
ret = pull_all_remote_databases(ctdb, nodemap, vnn, dbmap, mem_ctx);
if (ret != 0) {
DEBUG(0, (__location__ "Unable to pull remote databases\n"));
return -1;
}
/* repoint all local and remote database records to the local
node as being dmaster
*/
ret = update_dmaster_on_all_databases(ctdb, nodemap, vnn, dbmap, mem_ctx);
if (ret != 0) {
DEBUG(0, (__location__ "Unable to update dmaster on all databases\n"));
return -1;
}
/* push all local databases to the remote nodes */
ret = push_all_local_databases(ctdb, nodemap, vnn, dbmap, mem_ctx);
if (ret != 0) {
DEBUG(0, (__location__ "Unable to push local databases\n"));
return -1;
}
/* build a new vnn map with all the currently active nodes */
vnnmap = talloc_zero_size(mem_ctx, offsetof(struct ctdb_vnn_map, map) + 4*num_active);
if (vnnmap == NULL) {
DEBUG(0,(__location__ " Unable to allocate vnn_map structure\n"));
return -1;
}
vnnmap->generation = generation;
vnnmap->size = num_active;
for (i=j=0;i<nodemap->num;i++) {
if (nodemap->nodes[i].flags&NODE_FLAGS_CONNECTED) {
vnnmap->map[j++]=nodemap->nodes[i].vnn;
}
}
/* update to the new vnnmap on all nodes */
ret = update_vnnmap_on_all_nodes(ctdb, nodemap, vnn, vnnmap, mem_ctx);
if (ret != 0) {
DEBUG(0, (__location__ "Unable to update vnnmap on all nodes\n"));
return -1;
}
/* update recmaster to point to us for all nodes */
ret = set_recovery_master(ctdb, nodemap, vnn);
if (ret!=0) {
DEBUG(0, (__location__ "Unable to set recovery master\n"));
return -1;
}
/* disable recovery mode */
ret = set_recovery_mode(ctdb, nodemap, CTDB_RECOVERY_NORMAL);
if (ret!=0) {
DEBUG(0, (__location__ "Unable to set recovery mode to normal on cluster\n"));
return -1;
}
DEBUG(0, (__location__ "Recovery complete\n"));
return 0;
}
struct election_message {
uint32_t vnn;
};
static int send_election_request(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, uint32_t vnn)
{
int ret;
TDB_DATA election_data;
struct election_message emsg;
uint64_t srvid;
srvid = CTDB_SRVTYPE_RECOVERY;
srvid <<= 32;
emsg.vnn = vnn;
election_data.dsize = sizeof(struct election_message);
election_data.dptr = (unsigned char *)&emsg;
/* first we assume we will win the election and set
recoverymaster to be ourself on the current node
*/
ret = ctdb_ctrl_setrecmaster(ctdb, timeval_current_ofs(1, 0), vnn, vnn);
if (ret != 0) {
DEBUG(0, (__location__ "failed to send recmaster election request"));
return -1;
}
/* send an election message to all active nodes */
ctdb_send_message(ctdb, CTDB_BROADCAST_ALL, srvid, election_data);
return 0;
}
/*
handler for recovery master elections
*/
static void election_handler(struct ctdb_context *ctdb, uint64_t srvid,
TDB_DATA data, void *private_data)
{
int ret;
struct election_message *em = (struct election_message *)data.dptr;
TALLOC_CTX *mem_ctx;
mem_ctx = talloc_new(ctdb);
if (em->vnn==ctdb_get_vnn(ctdb)) {
talloc_free(mem_ctx);
return;
}
/* someone called an election. check their election data
and if we disagree and we would rather be the elected node,
send a new election message to all other nodes
*/
/* for now we just check the vnn number and allow the lowest
vnn number to become recovery master
*/
if (em->vnn > ctdb_get_vnn(ctdb)) {
ret = send_election_request(ctdb, mem_ctx, ctdb_get_vnn(ctdb));
if (ret!=0) {
DEBUG(0, (__location__ "failed to initiate recmaster election"));
}
talloc_free(mem_ctx);
return;
}
/* ok, let that guy become recmaster then */
ret = ctdb_ctrl_setrecmaster(ctdb, timeval_current_ofs(1, 0), ctdb_get_vnn(ctdb), em->vnn);
if (ret != 0) {
DEBUG(0, (__location__ "failed to send recmaster election request"));
talloc_free(mem_ctx);
return;
}
talloc_free(mem_ctx);
return;
}
static void force_election(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, uint32_t vnn, struct ctdb_node_map *nodemap)
{
int ret;
ret = send_election_request(ctdb, mem_ctx, vnn);
if (ret!=0) {
DEBUG(0, (__location__ "failed to initiate recmaster election"));
return;
}
/* wait for one second to collect all responses */
timed_out = 0;
event_add_timed(ctdb->ev, mem_ctx, timeval_current_ofs(1, 0), timeout_func, ctdb);
while (!timed_out) {
event_loop_once(ctdb->ev);
}
}
void monitor_cluster(struct ctdb_context *ctdb, struct event_context *ev)
{
uint32_t vnn, num_active, recmode, recmaster;
TALLOC_CTX *mem_ctx=NULL;
struct ctdb_node_map *nodemap=NULL;
struct ctdb_node_map *remote_nodemap=NULL;
@ -316,7 +565,6 @@ void recoverd(struct ctdb_context *ctdb, struct event_context *ev)
int i, j, ret;
again:
printf("check if we need to do recovery\n");
if (mem_ctx) {
talloc_free(mem_ctx);
mem_ctx = NULL;
@ -327,7 +575,6 @@ again:
exit(-1);
}
/* we only check for recovery once every second */
timed_out = 0;
event_add_timed(ctdb->ev, mem_ctx, timeval_current_ofs(1, 0), timeout_func, ctdb);
@ -339,13 +586,23 @@ again:
/* get our vnn number */
vnn = ctdb_get_vnn(ctdb);
/* get the vnnmap */
ret = ctdb_ctrl_getvnnmap(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, &vnnmap);
if (ret != 0) {
DEBUG(0, (__location__ "Unable to get vnnmap from node %u\n", vnn));
goto again;
}
/* get number of nodes */
ret = ctdb_ctrl_getnodemap(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, &nodemap);
if (ret != 0) {
printf("Unable to get nodemap from node %u\n", vnn);
DEBUG(0, (__location__ "Unable to get nodemap from node %u\n", vnn));
goto again;
}
/* count how many active nodes there are */
num_active = 0;
for (i=0; i<nodemap->num; i++) {
@ -355,6 +612,79 @@ again:
}
/* check which node is the recovery master */
ret = ctdb_ctrl_getrecmaster(ctdb, timeval_current_ofs(1, 0), vnn, &recmaster);
if (ret != 0) {
DEBUG(0, (__location__ "Unable to get recmaster from node %u\n", vnn));
goto again;
}
/* verify that the recmaster node is still active */
for (j=0; j<nodemap->num; j++) {
if (nodemap->nodes[j].vnn==recmaster) {
break;
}
}
if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
DEBUG(0, ("Recmaster node %u no longer available. Force reelection\n", nodemap->nodes[j].vnn));
force_election(ctdb, mem_ctx, vnn, nodemap);
goto again;
}
/* if we are not the recmaster then we do not need to check
if recovery is needed
*/
if (vnn!=recmaster) {
goto again;
}
/* verify that all active nodes agree that we are the recmaster */
for (j=0; j<nodemap->num; j++) {
if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
continue;
}
if (nodemap->nodes[j].vnn == vnn) {
continue;
}
ret = ctdb_ctrl_getrecmaster(ctdb, timeval_current_ofs(1, 0), vnn, &recmaster);
if (ret != 0) {
DEBUG(0, (__location__ "Unable to get recmaster from node %u\n", vnn));
goto again;
}
if (recmaster!=vnn) {
DEBUG(0, ("Node %d does not agree we are the recmaster. Force reelection\n", nodemap->nodes[j].vnn));
force_election(ctdb, mem_ctx, vnn, nodemap);
goto again;
}
}
/* verify that all active nodes are in normal mode
and not in recovery mode
*/
for (j=0; j<nodemap->num; j++) {
if (!(nodemap->nodes[j].flags&NODE_FLAGS_CONNECTED)) {
continue;
}
ret = ctdb_ctrl_getrecmode(ctdb, timeval_current_ofs(1, 0), vnn, &recmode);
if (ret != 0) {
DEBUG(0, ("Unable to get recmode from node %u\n", vnn));
goto again;
}
if (recmode!=CTDB_RECOVERY_NORMAL) {
DEBUG(0, (__location__ "Node:%d was in recovery mode. Restart recovery process\n", nodemap->nodes[j].vnn));
do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap);
goto again;
}
}
/* get the nodemap for all active remote nodes and verify
they are the same as for this node
*/
@ -368,7 +698,7 @@ again:
ret = ctdb_ctrl_getnodemap(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, mem_ctx, &remote_nodemap);
if (ret != 0) {
printf("Unable to get nodemap from remote node %u\n", nodemap->nodes[j].vnn);
DEBUG(0, (__location__ "Unable to get nodemap from remote node %u\n", nodemap->nodes[j].vnn));
goto again;
}
@ -376,7 +706,7 @@ again:
then this is a good reason to try recovery
*/
if (remote_nodemap->num != nodemap->num) {
printf("Remote node:%d has different node count. %d vs %d of the local node\n", nodemap->nodes[j].vnn, remote_nodemap->num, nodemap->num);
DEBUG(0, (__location__ "Remote node:%d has different node count. %d vs %d of the local node\n", nodemap->nodes[j].vnn, remote_nodemap->num, nodemap->num));
do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap);
goto again;
}
@ -387,7 +717,7 @@ again:
for (i=0;i<nodemap->num;i++) {
if ((remote_nodemap->nodes[i].vnn != nodemap->nodes[i].vnn)
|| (remote_nodemap->nodes[i].flags != nodemap->nodes[i].flags)) {
printf("Remote node:%d has different nodemap.\n", nodemap->nodes[j].vnn);
DEBUG(0, (__location__ "Remote node:%d has different nodemap.\n", nodemap->nodes[j].vnn));
do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap);
goto again;
}
@ -395,18 +725,12 @@ again:
}
/* get the vnnmap */
ret = ctdb_ctrl_getvnnmap(ctdb, timeval_current_ofs(1, 0), vnn, mem_ctx, &vnnmap);
if (ret != 0) {
printf("Unable to get vnnmap from node %u\n", vnn);
goto again;
}
/* there better be the same number of lmasters in the vnn map
as there are active nodes or well have to do a recovery
*/
if (vnnmap->size != num_active) {
printf("The vnnmap count is different from the number of active nodes. %d vs %d\n", vnnmap->size, num_active);
DEBUG(0, (__location__ "The vnnmap count is different from the number of active nodes. %d vs %d\n", vnnmap->size, num_active));
do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap);
goto again;
}
@ -428,7 +752,7 @@ again:
}
}
if (i==vnnmap->size) {
printf("Node %d is active in the nodemap but did not exist in the vnnmap\n", nodemap->nodes[j].vnn);
DEBUG(0, (__location__ "Node %d is active in the nodemap but did not exist in the vnnmap\n", nodemap->nodes[j].vnn));
do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap);
goto again;
}
@ -448,20 +772,20 @@ again:
ret = ctdb_ctrl_getvnnmap(ctdb, timeval_current_ofs(1, 0), nodemap->nodes[j].vnn, mem_ctx, &remote_vnnmap);
if (ret != 0) {
printf("Unable to get vnnmap from remote node %u\n", nodemap->nodes[j].vnn);
DEBUG(0, (__location__ "Unable to get vnnmap from remote node %u\n", nodemap->nodes[j].vnn));
goto again;
}
/* verify the vnnmap generation is the same */
if (vnnmap->generation != remote_vnnmap->generation) {
printf("Remote node %d has different generation of vnnmap. %d vs %d (ours)\n", nodemap->nodes[j].vnn, remote_vnnmap->generation, vnnmap->generation);
DEBUG(0, (__location__ "Remote node %d has different generation of vnnmap. %d vs %d (ours)\n", nodemap->nodes[j].vnn, remote_vnnmap->generation, vnnmap->generation));
do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap);
goto again;
}
/* verify the vnnmap size is the same */
if (vnnmap->size != remote_vnnmap->size) {
printf("Remote node %d has different size of vnnmap. %d vs %d (ours)\n", nodemap->nodes[j].vnn, remote_vnnmap->size, vnnmap->size);
DEBUG(0, (__location__ "Remote node %d has different size of vnnmap. %d vs %d (ours)\n", nodemap->nodes[j].vnn, remote_vnnmap->size, vnnmap->size));
do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap);
goto again;
}
@ -469,14 +793,13 @@ again:
/* verify the vnnmap is the same */
for (i=0;i<vnnmap->size;i++) {
if (remote_vnnmap->map[i] != vnnmap->map[i]) {
printf("Remote node %d has different vnnmap.\n", nodemap->nodes[j].vnn);
DEBUG(0, (__location__ "Remote node %d has different vnnmap.\n", nodemap->nodes[j].vnn));
do_recovery(ctdb, ev, mem_ctx, vnn, num_active, nodemap, vnnmap);
goto again;
}
}
}
printf("no we did not need to do recovery\n");
goto again;
}
@ -498,6 +821,7 @@ int main(int argc, const char *argv[])
int ret;
poptContext pc;
struct event_context *ev;
uint64_t srvid;
pc = poptGetContext(argv[0], argc, argv, popt_options, POPT_CONTEXT_KEEP_FIRST);
@ -528,12 +852,18 @@ int main(int argc, const char *argv[])
/* initialise ctdb */
ctdb = ctdb_cmdline_client(ev);
if (ctdb == NULL) {
printf("Failed to init ctdb\n");
DEBUG(0, (__location__ "Failed to init ctdb\n"));
exit(1);
}
recoverd(ctdb, ev);
/* register a message port for recovery elections */
srvid = CTDB_SRVTYPE_RECOVERY;
srvid <<= 32;
ctdb_set_message_handler(ctdb, srvid, election_handler, NULL);
monitor_cluster(ctdb, ev);
return ret;
}

View File

@ -57,6 +57,10 @@ struct ctdb_call_info {
a message handler ID meaning "give me all messages"
*/
#define CTDB_SRVID_ALL (~(uint64_t)0)
/*
srvid type : RECOVERY
*/
#define CTDB_SRVTYPE_RECOVERY 0x64766372
struct event_context;
@ -289,6 +293,15 @@ int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint
*/
int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmode);
/*
get the recovery master of a remote node
*/
int ctdb_ctrl_getrecmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *recmaster);
/*
set the recovery master of a remote node
*/
int ctdb_ctrl_setrecmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmaster);
uint32_t *ctdb_get_connected_nodes(struct ctdb_context *ctdb,
struct timeval timeout,
TALLOC_CTX *mem_ctx,
@ -303,4 +316,9 @@ int ctdb_traverse(struct ctdb_db_context *ctdb_db, ctdb_traverse_func fn, void *
int ctdb_dump_db(struct ctdb_db_context *ctdb_db, FILE *f);
/*
get the pid of a ctdb daemon
*/
int ctdb_ctrl_getpid(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *pid);
#endif

View File

@ -234,6 +234,7 @@ struct ctdb_context {
struct ctdb_vnn_map *vnn_map;
uint32_t num_clients;
uint32_t seqnum_frequency;
uint32_t recovery_master;
};
struct ctdb_db_context {
@ -312,6 +313,9 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS,
CTDB_CONTROL_UPDATE_SEQNUM,
CTDB_CONTROL_SET_SEQNUM_FREQUENCY,
CTDB_CONTROL_DUMP_MEMORY,
CTDB_CONTROL_GET_PID,
CTDB_CONTROL_GET_RECMASTER,
CTDB_CONTROL_SET_RECMASTER,
};

View File

@ -1,23 +1,27 @@
#!/bin/sh
killall -q ctdbd
killall -q recoverd
echo "Starting 4 ctdb daemons"
bin/ctdbd --nlist direct/4nodes.txt
bin/ctdbd --nlist direct/4nodes.txt
bin/ctdbd --nlist direct/4nodes.txt
bin/ctdbd --nlist direct/4nodes.txt
bin/ctdbd --nlist direct/4nodes.txt --listen=127.0.0.2:9001 --socket=/tmp/ctdb.socket.127.0.0.2
bin/ctdbd --nlist direct/4nodes.txt --listen=127.0.0.3:9001 --socket=/tmp/ctdb.socket.127.0.0.3
bin/ctdbd --nlist direct/4nodes.txt --listen=127.0.0.4:9001 --socket=/tmp/ctdb.socket.127.0.0.4
echo "Starting one recovery daemon on node 0"
bin/recoverd --socket=/tmp/ctdb.socket >/dev/null 2>/dev/null &
echo
echo "Attaching to some databases"
bin/ctdb_control --socket=/tmp/ctdb.socket attach test1.tdb || exit 1
bin/ctdb_control --socket=/tmp/ctdb.socket attach test2.tdb || exit 1
bin/ctdb_control --socket=/tmp/ctdb.socket attach test3.tdb || exit 1
bin/ctdb_control --socket=/tmp/ctdb.socket attach test4.tdb || exit 1
bin/ctdb_control attach test1.tdb || exit 1
bin/ctdb_control attach test2.tdb || exit 1
bin/ctdb_control attach test3.tdb || exit 1
bin/ctdb_control attach test4.tdb || exit 1
echo "Clearing all databases to make sure they are all empty"
bin/ctdb_control --socket=/tmp/ctdb.socket getdbmap 0 | egrep "^dbid:" | sed -e "s/^dbid://" -e "s/ .*$//" | while read DB; do
bin/ctdb_control getdbmap 0 | egrep "^dbid:" | sed -e "s/^dbid://" -e "s/ .*$//" | while read DB; do
seq 0 3 | while read NODE; do
bin/ctdb_control --socket=/tmp/ctdb.socket cleardb $NODE $DB
bin/ctdb_control cleardb $NODE $DB
done
done
@ -26,60 +30,60 @@ echo
echo
echo "Printing all databases on all nodes. they should all be empty"
echo "============================================================="
bin/ctdb_control --socket=/tmp/ctdb.socket getdbmap 0 | egrep "^dbid:" | sed -e "s/^dbid://" -e "s/ .*$//" | while read DB; do
bin/ctdb_control getdbmap 0 | egrep "^dbid:" | sed -e "s/^.*name://" -e "s/ .*$//" | while read DBNAME; do
seq 0 3 | while read NODE; do
echo "Content of DB:$DB NODE:$NODE :"
bin/ctdb_control --socket=/tmp/ctdb.socket catdb $NODE $DB
echo "Content of DBNAME:$DBNAME NODE:$NODE :"
bin/ctdb_control catdb $DBNAME $NODE
done
done
echo
echo
echo "Populating the databases"
./bin/ctdb_control --socket=/tmp/ctdb.socket writerecord 0 0x220c2a7b testkey1 testdata1
./bin/ctdb_control --socket=/tmp/ctdb.socket setdmaster 0 0x220c2a7b 1
./bin/ctdb_control writerecord 0 0x220c2a7b testkey1 testdata1
./bin/ctdb_control setdmaster 0 0x220c2a7b 1
./bin/ctdb_control --socket=/tmp/ctdb.socket writerecord 1 0x220c2a7b testkey1 testdata1
./bin/ctdb_control --socket=/tmp/ctdb.socket writerecord 1 0x220c2a7b testkey1 testdata1
./bin/ctdb_control --socket=/tmp/ctdb.socket setdmaster 1 0x220c2a7b 2
./bin/ctdb_control writerecord 1 0x220c2a7b testkey1 testdata1
./bin/ctdb_control writerecord 1 0x220c2a7b testkey1 testdata1
./bin/ctdb_control setdmaster 1 0x220c2a7b 2
./bin/ctdb_control --socket=/tmp/ctdb.socket writerecord 2 0x220c2a7b testkey1 testdata1
./bin/ctdb_control --socket=/tmp/ctdb.socket writerecord 2 0x220c2a7b testkey1 testdata1
./bin/ctdb_control --socket=/tmp/ctdb.socket writerecord 2 0x220c2a7b testkey1 testdata1
./bin/ctdb_control --socket=/tmp/ctdb.socket setdmaster 2 0x220c2a7b 3
./bin/ctdb_control writerecord 2 0x220c2a7b testkey1 testdata1
./bin/ctdb_control writerecord 2 0x220c2a7b testkey1 testdata1
./bin/ctdb_control writerecord 2 0x220c2a7b testkey1 testdata1
./bin/ctdb_control setdmaster 2 0x220c2a7b 3
./bin/ctdb_control --socket=/tmp/ctdb.socket writerecord 3 0x220c2a7b testkey1 testdata1
./bin/ctdb_control --socket=/tmp/ctdb.socket writerecord 3 0x220c2a7b testkey1 testdata1
./bin/ctdb_control --socket=/tmp/ctdb.socket writerecord 3 0x220c2a7b testkey1 testdata1
./bin/ctdb_control --socket=/tmp/ctdb.socket writerecord 3 0x220c2a7b testkey1 testdata1
./bin/ctdb_control --socket=/tmp/ctdb.socket setdmaster 3 0x220c2a7b 3
./bin/ctdb_control writerecord 3 0x220c2a7b testkey1 testdata1
./bin/ctdb_control writerecord 3 0x220c2a7b testkey1 testdata1
./bin/ctdb_control writerecord 3 0x220c2a7b testkey1 testdata1
./bin/ctdb_control writerecord 3 0x220c2a7b testkey1 testdata1
./bin/ctdb_control setdmaster 3 0x220c2a7b 3
echo
echo
echo "Printing all databases on all nodes. there should be a record there"
echo "============================================================="
bin/ctdb_control --socket=/tmp/ctdb.socket getdbmap 0 | egrep "^dbid:" | sed -e "s/^dbid://" -e "s/ .*$//" | while read DB; do
bin/ctdb_control getdbmap 0 | egrep "^dbid:" | sed -e "s/^.*name://" -e "s/ .*$//" | while read DBNAME; do
seq 0 3 | while read NODE; do
echo "Content of DB:$DB NODE:$NODE :"
bin/ctdb_control --socket=/tmp/ctdb.socket catdb $NODE $DB
echo "Content of DBNAME:$DBNAME NODE:$NODE :"
bin/ctdb_control catdb $DBNAME $NODE
done
done
echo
echo
echo "killing off node #0"
echo "killing off node #2"
echo "==================="
CTDBPID=`ps aux | grep ctdbd | grep -v grep | head -1 | sed -e "s/^[^ ]* *//" -e "s/ .*$//"`
CTDBPID=`./bin/ctdb_control getpid 2 | sed -e "s/Pid://"`
kill $CTDBPID
sleep 1
echo
echo
echo "Recovery the cluster"
echo "===================="
./bin/ctdb_control --socket=/tmp/ctdb.socket recover 2 0x220c2a7b
echo "wait 3 seconds to let the recovery daemon do its job"
echo "===================================================="
sleep 3
echo
echo
@ -87,15 +91,19 @@ echo "Printing all databases on all nodes."
echo "The databases should be the same now on all nodes"
echo "and the record will have been migrated to node 0"
echo "================================================="
echo "Node 0:"
bin/ctdb_control catdb test4.tdb 0
echo "Node 1:"
bin/ctdb_control --socket=/tmp/ctdb.socket catdb 1 0x220c2a7b
echo "Node 2:"
bin/ctdb_control --socket=/tmp/ctdb.socket catdb 2 0x220c2a7b
bin/ctdb_control catdb test4.tdb 1
echo "Node 3:"
bin/ctdb_control --socket=/tmp/ctdb.socket catdb 3 0x220c2a7b
bin/ctdb_control catdb test4.tdb 3
echo "nodemap:"
bin/ctdb_control --socket=/tmp/ctdb.socket getnodemap 3
bin/ctdb_control getnodemap 0
echo
echo
echo "Traverse the cluster and dump the database"
bin/ctdb_control catdb test4.tdb
#leave the ctdb daemons running so one can look at the box in more detail

View File

@ -46,15 +46,19 @@ static void usage(void)
" getdbmap <vnn> lists databases on a node\n"
" getnodemap <vnn> lists nodes known to a ctdb daemon\n"
" createdb <vnn> <dbname> create a database\n"
" catdb <dbname> lists all keys/data in a db\n"
" catdb <dbname> [vnn] lists all keys/data in a db\n"
" cpdb <fromvnn> <tovnn> <dbid> lists all keys in a remote tdb\n"
" setdmaster <vnn> <dbid> <dmaster> sets new dmaster for all records in the database\n"
" cleardb <vnn> <dbid> deletes all records in a db\n"
" getrecmode <vnn> get recovery mode\n"
" setrecmode <vnn> <mode> set recovery mode\n"
" getrecmaster <vnn> get recovery master\n"
" setrecmaster <vnn> <master_vnn> set recovery master\n"
" writerecord <vnn> <dbid> <key> <data>\n"
" recover <vnn> recover the cluster\n"
" attach <dbname> attach a database\n");
" attach <dbname> attach a database\n"
" getpid <vnn> get the pid of a ctdb daemon\n"
);
exit(1);
}
@ -345,9 +349,11 @@ static int control_recover(struct ctdb_context *ctdb, int argc, const char **arg
}
for (i=0;i<dbmap->num;i++) {
const char *path;
const char *name;
ctdb_ctrl_getdbpath(ctdb, timeval_current_ofs(1, 0), CTDB_CURRENT_NODE, dbmap->dbids[i], ctdb, &path);
printf("dbid:0x%08x path:%s\n", dbmap->dbids[i], path);
ctdb_ctrl_getdbname(ctdb, timeval_current_ofs(1, 0), CTDB_CURRENT_NODE, dbmap->dbids[i], ctdb, &name);
printf("dbid:0x%08x name:%s path:%s\n", dbmap->dbids[i], name, path);
}
/* 5: pull all records from all other nodes across to this node
@ -502,6 +508,31 @@ static int control_getvnnmap(struct ctdb_context *ctdb, int argc, const char **a
return 0;
}
/*
display pid of a ctdb daemon
*/
static int control_getpid(struct ctdb_context *ctdb, int argc, const char **argv)
{
uint32_t vnn, pid;
int ret;
if (argc < 1) {
usage();
}
vnn = strtoul(argv[0], NULL, 0);
ret = ctdb_ctrl_getpid(ctdb, timeval_current_ofs(1, 0), vnn, &pid);
if (ret != 0) {
printf("Unable to get daemon pid from node %u\n", vnn);
return ret;
}
printf("Pid:%d\n",pid);
return 0;
}
/*
display recovery mode of a remote node
*/
@ -541,7 +572,7 @@ static int control_setrecmode(struct ctdb_context *ctdb, int argc, const char **
}
vnn = strtoul(argv[0], NULL, 0);
recmode = strtoul(argv[0], NULL, 0);
recmode = strtoul(argv[1], NULL, 0);
ret = ctdb_ctrl_setrecmode(ctdb, timeval_current_ofs(1, 0), vnn, recmode);
if (ret != 0) {
@ -552,6 +583,56 @@ static int control_setrecmode(struct ctdb_context *ctdb, int argc, const char **
return 0;
}
/*
display recovery master of a remote node
*/
static int control_getrecmaster(struct ctdb_context *ctdb, int argc, const char **argv)
{
uint32_t vnn, recmaster;
int ret;
if (argc < 1) {
usage();
}
vnn = strtoul(argv[0], NULL, 0);
ret = ctdb_ctrl_getrecmaster(ctdb, timeval_current_ofs(1, 0), vnn, &recmaster);
if (ret != 0) {
printf("Unable to get recmaster from node %u\n", vnn);
return ret;
}
printf("Recovery master:%d\n",recmaster);
return 0;
}
/*
set recovery master of a remote node
*/
static int control_setrecmaster(struct ctdb_context *ctdb, int argc, const char **argv)
{
uint32_t vnn, recmaster;
int ret;
if (argc < 2) {
usage();
}
vnn = strtoul(argv[0], NULL, 0);
recmaster = strtoul(argv[1], NULL, 0);
ret = ctdb_ctrl_setrecmaster(ctdb, timeval_current_ofs(1, 0), vnn, recmaster);
if (ret != 0) {
printf("Unable to set recmaster on node %u\n", vnn);
return ret;
}
return 0;
}
/*
display remote list of keys/data for a db
*/
@ -559,6 +640,7 @@ static int control_catdb(struct ctdb_context *ctdb, int argc, const char **argv)
{
const char *db_name;
struct ctdb_db_context *ctdb_db;
uint32_t vnn;
int ret;
if (argc < 1) {
@ -572,12 +654,35 @@ static int control_catdb(struct ctdb_context *ctdb, int argc, const char **argv)
return -1;
}
ret = ctdb_dump_db(ctdb_db, stdout);
if (ret == -1) {
printf("Unable to dump database\n");
return -1;
}
if (argc==1) {
/* traverse and dump the cluster tdb */
ret = ctdb_dump_db(ctdb_db, stdout);
if (ret == -1) {
printf("Unable to dump database\n");
return -1;
}
} else {
struct ctdb_key_list keys;
int i;
/* dump only the local tdb of a specific node */
vnn = strtoul(argv[1], NULL, 0);
ret = ctdb_ctrl_pulldb(ctdb, vnn, ctdb_db->db_id, CTDB_LMASTER_ANY, ctdb, &keys);
if (ret == -1) {
printf("Unable to pull remote database\n");
return -1;
}
for(i=0;i<keys.num;i++){
char *keystr, *datastr;
keystr = hex_encode(ctdb, keys.keys[i].dptr, keys.keys[i].dsize);
datastr = hex_encode(ctdb, keys.data[i].dptr, keys.data[i].dsize);
printf("rsn:%llu lmaster:%d dmaster:%d key:%s data:%s\n", keys.headers[i].rsn, keys.lmasters[i], keys.headers[i].dmaster, keystr, datastr);
ret++;
}
}
talloc_free(ctdb_db);
printf("Dumped %d records\n", ret);
@ -636,9 +741,11 @@ static int control_getdbmap(struct ctdb_context *ctdb, int argc, const char **ar
printf("Number of databases:%d\n", dbmap->num);
for(i=0;i<dbmap->num;i++){
const char *path;
const char *name;
ctdb_ctrl_getdbpath(ctdb, timeval_current_ofs(1, 0), CTDB_CURRENT_NODE, dbmap->dbids[i], ctdb, &path);
printf("dbid:0x%08x path:%s\n", dbmap->dbids[i], path);
ctdb_ctrl_getdbname(ctdb, timeval_current_ofs(1, 0), CTDB_CURRENT_NODE, dbmap->dbids[i], ctdb, &name);
printf("dbid:0x%08x name:%s path:%s\n", dbmap->dbids[i], name, path);
}
return 0;
@ -993,6 +1100,8 @@ int main(int argc, const char *argv[])
{ "cleardb", control_cleardb },
{ "getrecmode", control_getrecmode },
{ "setrecmode", control_setrecmode },
{ "getrecmaster", control_getrecmaster },
{ "setrecmaster", control_setrecmaster },
{ "ping", control_ping },
{ "debug", control_debug },
{ "debuglevel", control_debuglevel },
@ -1000,6 +1109,7 @@ int main(int argc, const char *argv[])
{ "writerecord", control_writerecord },
{ "attach", control_attach },
{ "dumpmemory", control_dumpmemory },
{ "getpid", control_getpid },
};
pc = poptGetContext(argv[0], argc, argv, popt_options, POPT_CONTEXT_KEEP_FIRST);