mirror of
https://github.com/samba-team/samba.git
synced 2025-02-28 01:58:17 +03:00
new simpler and much faster recovery code based on tdb transactions
(This used to be ctdb commit 9ef2268a1674b01f60c58fed72af8ac982fe77a3)
This commit is contained in:
parent
4f5b717aa3
commit
c08f2616cd
@ -879,10 +879,16 @@ int ctdb_control_recv(struct ctdb_context *ctdb,
|
||||
TALLOC_CTX *mem_ctx,
|
||||
TDB_DATA *outdata, int32_t *status, char **errormsg)
|
||||
{
|
||||
TALLOC_CTX *tmp_ctx;
|
||||
|
||||
if (state == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* prevent double free of state */
|
||||
tmp_ctx = talloc_new(ctdb);
|
||||
talloc_steal(tmp_ctx, state);
|
||||
|
||||
/* loop one event at a time until we either timeout or the control
|
||||
completes.
|
||||
*/
|
||||
@ -895,7 +901,7 @@ int ctdb_control_recv(struct ctdb_context *ctdb,
|
||||
if (state->async.fn) {
|
||||
state->async.fn(state);
|
||||
}
|
||||
talloc_free(state);
|
||||
talloc_free(tmp_ctx);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -907,7 +913,7 @@ int ctdb_control_recv(struct ctdb_context *ctdb,
|
||||
if (state->async.fn) {
|
||||
state->async.fn(state);
|
||||
}
|
||||
talloc_free(state);
|
||||
talloc_free(tmp_ctx);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -920,12 +926,11 @@ int ctdb_control_recv(struct ctdb_context *ctdb,
|
||||
*status = state->status;
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (state->async.fn) {
|
||||
state->async.fn(state);
|
||||
}
|
||||
talloc_free(state);
|
||||
|
||||
talloc_free(tmp_ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1344,85 +1349,6 @@ int ctdb_ctrl_pulldb(struct ctdb_context *ctdb, uint32_t destnode,
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
async send for pushdb
|
||||
*/
|
||||
struct ctdb_client_control_state *ctdb_ctrl_pushdb_send(
|
||||
struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid,
|
||||
TALLOC_CTX *mem_ctx, struct timeval timeout, TDB_DATA indata)
|
||||
{
|
||||
return ctdb_control_send(ctdb, destnode, 0,
|
||||
CTDB_CONTROL_PUSH_DB, 0, indata,
|
||||
mem_ctx, NULL, &timeout, NULL);
|
||||
}
|
||||
|
||||
int ctdb_ctrl_pushdb_recv(
|
||||
struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx,
|
||||
struct ctdb_client_control_state *state)
|
||||
{
|
||||
int ret;
|
||||
int32_t res;
|
||||
|
||||
ret = ctdb_control_recv(ctdb, state, mem_ctx, NULL, &res, NULL);
|
||||
if ( (ret != 0) || (res != 0) ){
|
||||
DEBUG(0,(__location__ " ctdb_ctrl_pushdb_recv failed\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
push all records to a specific database on a node
|
||||
*/
|
||||
int ctdb_ctrl_pushdb(struct ctdb_context *ctdb, uint32_t destnode,
|
||||
uint32_t dbid,
|
||||
TALLOC_CTX *mem_ctx, struct timeval timeout,
|
||||
TDB_DATA indata)
|
||||
{
|
||||
struct ctdb_client_control_state *state;
|
||||
|
||||
state = ctdb_ctrl_pushdb_send(ctdb, destnode, dbid, mem_ctx,
|
||||
timeout, indata);
|
||||
|
||||
return ctdb_ctrl_pushdb_recv(ctdb, mem_ctx, state);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
copy a tdb from one node to another node
|
||||
*/
|
||||
int ctdb_ctrl_copydb(struct ctdb_context *ctdb, struct timeval timeout, uint32_t sourcenode,
|
||||
uint32_t destnode, uint32_t dbid, uint32_t lmaster, TALLOC_CTX *mem_ctx)
|
||||
{
|
||||
int ret;
|
||||
TDB_DATA outdata;
|
||||
|
||||
DEBUG(3,("pulling dbid 0x%x from %u\n", dbid, sourcenode));
|
||||
|
||||
ret = ctdb_ctrl_pulldb(ctdb, sourcenode, dbid, lmaster, mem_ctx,
|
||||
timeout, &outdata);
|
||||
if (ret != 0) {
|
||||
DEBUG(0,(__location__ " ctdb_control for pulldb failed\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
DEBUG(3,("pushing dbid 0x%x to %u\n", dbid, destnode));
|
||||
|
||||
ret = ctdb_ctrl_pushdb(ctdb, destnode, dbid, mem_ctx, timeout, outdata);
|
||||
talloc_free(outdata.dptr);
|
||||
if (ret != 0) {
|
||||
DEBUG(0,(__location__ " ctdb_control for pushdb failed\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
DEBUG(3,("copydb for dbid 0x%x done for %u to %u\n",
|
||||
dbid, sourcenode, destnode));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
change dmaster for all keys in the database to the new value
|
||||
*/
|
||||
@ -1993,133 +1919,6 @@ int ctdb_ctrl_getmonmode(struct ctdb_context *ctdb, struct timeval timeout, uint
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
get maximum rsn for a db on a node
|
||||
*/
|
||||
int ctdb_ctrl_get_max_rsn(struct ctdb_context *ctdb, struct timeval timeout,
|
||||
uint32_t destnode, uint32_t db_id, uint64_t *max_rsn)
|
||||
{
|
||||
TDB_DATA data, outdata;
|
||||
int ret;
|
||||
int32_t res;
|
||||
|
||||
data.dptr = (uint8_t *)&db_id;
|
||||
data.dsize = sizeof(db_id);
|
||||
|
||||
ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_MAX_RSN, 0, data, ctdb,
|
||||
&outdata, &res, &timeout, NULL);
|
||||
if (ret != 0 || res != 0 || outdata.dsize != sizeof(uint64_t)) {
|
||||
DEBUG(0,(__location__ " ctdb_control for get_max_rsn failed\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
*max_rsn = *(uint64_t *)outdata.dptr;
|
||||
talloc_free(outdata.dptr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
set the rsn on non-empty records to the given rsn
|
||||
*/
|
||||
struct ctdb_client_control_state *ctdb_ctrl_set_rsn_nonempty_send(
|
||||
struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout,
|
||||
uint32_t destnode, uint32_t db_id, uint64_t rsn)
|
||||
{
|
||||
TDB_DATA data;
|
||||
struct ctdb_control_set_rsn_nonempty p;
|
||||
|
||||
memset(&p, 0, sizeof(p));
|
||||
p.db_id = db_id;
|
||||
p.rsn = rsn;
|
||||
|
||||
data.dptr = (uint8_t *)&p;
|
||||
data.dsize = sizeof(p);
|
||||
|
||||
return ctdb_control_send(ctdb, destnode, 0, CTDB_CONTROL_SET_RSN_NONEMPTY, 0, data, mem_ctx,
|
||||
NULL, &timeout, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
set the rsn on non-empty records to the given rsn
|
||||
*/
|
||||
int ctdb_ctrl_set_rsn_nonempty_recv(struct ctdb_context *ctdb,
|
||||
struct ctdb_client_control_state *state)
|
||||
{
|
||||
int32_t res;
|
||||
int ret;
|
||||
|
||||
ret = ctdb_control_recv(ctdb, state, NULL, NULL, &res, NULL);
|
||||
if (ret != 0 || res != 0) {
|
||||
DEBUG(0,(__location__ " ctdb_control for set_rsn_nonempty failed\n"));
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
set the rsn on non-empty records to the given rsn
|
||||
*/
|
||||
int ctdb_ctrl_set_rsn_nonempty(struct ctdb_context *ctdb, struct timeval timeout,
|
||||
uint32_t destnode, uint32_t db_id, uint64_t rsn)
|
||||
{
|
||||
struct ctdb_client_control_state *state;
|
||||
state = ctdb_ctrl_set_rsn_nonempty_send(ctdb, ctdb, timeout, destnode, db_id, rsn);
|
||||
return ctdb_ctrl_set_rsn_nonempty_recv(ctdb, state);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
set the rsn on non-empty records to the given rsn
|
||||
*/
|
||||
struct ctdb_client_control_state *ctdb_ctrl_delete_low_rsn_send(
|
||||
struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout,
|
||||
uint32_t destnode, uint32_t db_id, uint64_t rsn)
|
||||
{
|
||||
TDB_DATA data;
|
||||
struct ctdb_control_delete_low_rsn p;
|
||||
|
||||
memset(&p, 0, sizeof(p));
|
||||
p.db_id = db_id;
|
||||
p.rsn = rsn;
|
||||
|
||||
data.dptr = (uint8_t *)&p;
|
||||
data.dsize = sizeof(p);
|
||||
|
||||
return ctdb_control_send(ctdb, destnode, 0, CTDB_CONTROL_DELETE_LOW_RSN, 0, data, mem_ctx,
|
||||
NULL, &timeout, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
set the rsn on non-empty records to the given rsn
|
||||
*/
|
||||
int ctdb_ctrl_delete_low_rsn_recv(struct ctdb_context *ctdb,
|
||||
struct ctdb_client_control_state *state)
|
||||
{
|
||||
int32_t res;
|
||||
int ret;
|
||||
|
||||
ret = ctdb_control_recv(ctdb, state, NULL, NULL, &res, NULL);
|
||||
if (ret != 0 || res != 0) {
|
||||
DEBUG(0,(__location__ " ctdb_control for delete_low_rsn failed\n"));
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
set the rsn on non-empty records to the given rsn
|
||||
*/
|
||||
int ctdb_ctrl_delete_low_rsn(struct ctdb_context *ctdb, struct timeval timeout,
|
||||
uint32_t destnode, uint32_t db_id, uint64_t rsn)
|
||||
{
|
||||
struct ctdb_client_control_state *state;
|
||||
state = ctdb_ctrl_delete_low_rsn_send(ctdb, ctdb, timeout, destnode, db_id, rsn);
|
||||
return ctdb_ctrl_delete_low_rsn_recv(ctdb, state);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
sent to a node to make it take over an ip address
|
||||
*/
|
||||
|
@ -454,9 +454,9 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0,
|
||||
CTDB_CONTROL_SHUTDOWN = 36,
|
||||
CTDB_CONTROL_GET_MONMODE = 37,
|
||||
/* #38 removed */
|
||||
CTDB_CONTROL_MAX_RSN = 39,
|
||||
CTDB_CONTROL_SET_RSN_NONEMPTY = 40,
|
||||
CTDB_CONTROL_DELETE_LOW_RSN = 41,
|
||||
/* #39 removed */
|
||||
/* #40 removed */
|
||||
/* #41 removed */
|
||||
CTDB_CONTROL_TAKEOVER_IP = 42,
|
||||
CTDB_CONTROL_RELEASE_IP = 43,
|
||||
CTDB_CONTROL_TCP_CLIENT = 44,
|
||||
@ -480,24 +480,11 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0,
|
||||
CTDB_CONTROL_PERSISTENT_STORE = 62,
|
||||
CTDB_CONTROL_UPDATE_RECORD = 63,
|
||||
CTDB_CONTROL_SEND_GRATIOUS_ARP = 64,
|
||||
CTDB_CONTROL_TRANSACTION_START = 65,
|
||||
CTDB_CONTROL_TRANSACTION_COMMIT = 66,
|
||||
CTDB_CONTROL_WIPE_DATABASE = 67,
|
||||
};
|
||||
|
||||
/*
|
||||
structure passed in ctdb_control_set_rsn_nonempty
|
||||
*/
|
||||
struct ctdb_control_set_rsn_nonempty {
|
||||
uint32_t db_id;
|
||||
uint64_t rsn;
|
||||
};
|
||||
|
||||
/*
|
||||
structure passed in ctdb_control_delete_low_rsn
|
||||
*/
|
||||
struct ctdb_control_delete_low_rsn {
|
||||
uint32_t db_id;
|
||||
uint64_t rsn;
|
||||
};
|
||||
|
||||
/*
|
||||
structure passed in set_call control
|
||||
*/
|
||||
@ -1058,32 +1045,6 @@ void ctdb_call_resend_all(struct ctdb_context *ctdb);
|
||||
void ctdb_node_dead(struct ctdb_node *node);
|
||||
void ctdb_node_connected(struct ctdb_node *node);
|
||||
bool ctdb_blocking_freeze(struct ctdb_context *ctdb);
|
||||
int32_t ctdb_control_max_rsn(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata);
|
||||
|
||||
struct ctdb_client_control_state *ctdb_ctrl_set_rsn_nonempty_send(
|
||||
struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout,
|
||||
uint32_t destnode, uint32_t db_id, uint64_t rsn);
|
||||
int ctdb_ctrl_set_rsn_nonempty_recv(struct ctdb_context *ctdb,
|
||||
struct ctdb_client_control_state *state);
|
||||
int ctdb_ctrl_set_rsn_nonempty(struct ctdb_context *ctdb, struct timeval timeout,
|
||||
uint32_t destnode, uint32_t db_id, uint64_t rsn);
|
||||
struct ctdb_client_control_state *ctdb_ctrl_delete_low_rsn_send(
|
||||
struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout,
|
||||
uint32_t destnode, uint32_t db_id, uint64_t rsn);
|
||||
int ctdb_ctrl_delete_low_rsn_recv(struct ctdb_context *ctdb,
|
||||
struct ctdb_client_control_state *state);
|
||||
int ctdb_ctrl_delete_low_rsn(struct ctdb_context *ctdb, struct timeval timeout,
|
||||
uint32_t destnode, uint32_t db_id, uint64_t rsn);
|
||||
|
||||
int32_t ctdb_control_set_rsn_nonempty(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata);
|
||||
int32_t ctdb_control_delete_low_rsn(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata);
|
||||
|
||||
int ctdb_ctrl_get_max_rsn(struct ctdb_context *ctdb, struct timeval timeout,
|
||||
uint32_t destnode, uint32_t db_id, uint64_t *max_rsn);
|
||||
int ctdb_ctrl_set_rsn_nonempty(struct ctdb_context *ctdb, struct timeval timeout,
|
||||
uint32_t destnode, uint32_t db_id, uint64_t rsn);
|
||||
int ctdb_ctrl_delete_low_rsn(struct ctdb_context *ctdb, struct timeval timeout,
|
||||
uint32_t destnode, uint32_t db_id, uint64_t rsn);
|
||||
void ctdb_set_scheduler(struct ctdb_context *ctdb);
|
||||
void ctdb_restore_scheduler(struct ctdb_context *ctdb);
|
||||
int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
|
||||
@ -1216,5 +1177,8 @@ int32_t ctdb_control_update_record(struct ctdb_context *ctdb,
|
||||
struct ctdb_req_control *c, TDB_DATA recdata,
|
||||
bool *async_reply);
|
||||
|
||||
int32_t ctdb_control_transaction_start(struct ctdb_context *ctdb);
|
||||
int32_t ctdb_control_transaction_commit(struct ctdb_context *ctdb);
|
||||
int32_t ctdb_control_wipe_database(struct ctdb_context *ctdb, TDB_DATA indata);
|
||||
|
||||
#endif
|
||||
|
@ -235,14 +235,6 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
|
||||
DEBUG(0,("Received SHUTDOWN command. Stopping CTDB daemon.\n"));
|
||||
exit(0);
|
||||
|
||||
case CTDB_CONTROL_MAX_RSN:
|
||||
CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
|
||||
return ctdb_control_max_rsn(ctdb, indata, outdata);
|
||||
|
||||
case CTDB_CONTROL_SET_RSN_NONEMPTY:
|
||||
CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_control_set_rsn_nonempty));
|
||||
return ctdb_control_set_rsn_nonempty(ctdb, indata, outdata);
|
||||
|
||||
case CTDB_CONTROL_TAKEOVER_IP:
|
||||
CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_public_ip));
|
||||
return ctdb_control_takeover_ip(ctdb, c, indata, async_reply);
|
||||
@ -255,10 +247,6 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
|
||||
CHECK_CONTROL_DATA_SIZE(0);
|
||||
return ctdb_control_get_public_ips(ctdb, c, outdata);
|
||||
|
||||
case CTDB_CONTROL_DELETE_LOW_RSN:
|
||||
CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_control_delete_low_rsn));
|
||||
return ctdb_control_delete_low_rsn(ctdb, indata, outdata);
|
||||
|
||||
case CTDB_CONTROL_TCP_CLIENT:
|
||||
CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_control_tcp));
|
||||
return ctdb_control_tcp_client(ctdb, client_id, indata);
|
||||
@ -321,6 +309,16 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
|
||||
case CTDB_CONTROL_SEND_GRATIOUS_ARP:
|
||||
return ctdb_control_send_gratious_arp(ctdb, indata);
|
||||
|
||||
case CTDB_CONTROL_TRANSACTION_START:
|
||||
return ctdb_control_transaction_start(ctdb);
|
||||
|
||||
case CTDB_CONTROL_TRANSACTION_COMMIT:
|
||||
return ctdb_control_transaction_commit(ctdb);
|
||||
|
||||
case CTDB_CONTROL_WIPE_DATABASE:
|
||||
CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
|
||||
return ctdb_control_wipe_database(ctdb, indata);
|
||||
|
||||
default:
|
||||
DEBUG(0,(__location__ " Unknown CTDB control opcode %u\n", opcode));
|
||||
return -1;
|
||||
|
@ -58,6 +58,7 @@ struct ctdb_freeze_handle {
|
||||
pid_t child;
|
||||
int fd;
|
||||
struct ctdb_freeze_waiter *waiters;
|
||||
bool transaction_started;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -249,12 +250,144 @@ bool ctdb_blocking_freeze(struct ctdb_context *ctdb)
|
||||
*/
|
||||
int32_t ctdb_control_thaw(struct ctdb_context *ctdb)
|
||||
{
|
||||
/* cancel any pending transactions */
|
||||
if (ctdb->freeze_handle && ctdb->freeze_handle->transaction_started) {
|
||||
struct ctdb_db_context *ctdb_db;
|
||||
|
||||
for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
|
||||
tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
|
||||
if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
|
||||
DEBUG(0,(__location__ " Failed to cancel transaction for db '%s'\n",
|
||||
ctdb_db->db_name));
|
||||
}
|
||||
tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* this hack can be used to get a copy of the databases at the end of a recovery */
|
||||
system("mkdir -p /var/ctdb.saved; /usr/bin/rsync --delete -a /var/ctdb/ /var/ctdb.saved/$$ 2>&1 > /dev/null");
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
/* and this one for local testing */
|
||||
system("mkdir -p test.db.saved; /usr/bin/rsync --delete -a test.db/ test.db.saved/$$ 2>&1 > /dev/null");
|
||||
#endif
|
||||
|
||||
|
||||
talloc_free(ctdb->freeze_handle);
|
||||
ctdb->freeze_handle = NULL;
|
||||
ctdb_call_resend_all(ctdb);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
start a transaction on all databases - used for recovery
|
||||
*/
|
||||
int32_t ctdb_control_transaction_start(struct ctdb_context *ctdb)
|
||||
{
|
||||
struct ctdb_db_context *ctdb_db;
|
||||
|
||||
if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
|
||||
DEBUG(0,(__location__ " Failed transaction_start while not frozen\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
|
||||
int ret;
|
||||
|
||||
tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
|
||||
|
||||
if (ctdb->freeze_handle->transaction_started) {
|
||||
if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
|
||||
DEBUG(0,(__location__ " Failed to cancel transaction for db '%s'\n",
|
||||
ctdb_db->db_name));
|
||||
/* not a fatal error */
|
||||
}
|
||||
}
|
||||
|
||||
ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
|
||||
|
||||
tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
|
||||
|
||||
if (ret != 0) {
|
||||
DEBUG(0,(__location__ " Failed to start transaction for db '%s'\n",
|
||||
ctdb_db->db_name));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
ctdb->freeze_handle->transaction_started = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
commit transactions on all databases
|
||||
*/
|
||||
int32_t ctdb_control_transaction_commit(struct ctdb_context *ctdb)
|
||||
{
|
||||
struct ctdb_db_context *ctdb_db;
|
||||
|
||||
if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
|
||||
DEBUG(0,(__location__ " Failed transaction_start while not frozen\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!ctdb->freeze_handle->transaction_started) {
|
||||
DEBUG(0,(__location__ " transaction not started\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
|
||||
tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
|
||||
if (tdb_transaction_commit(ctdb_db->ltdb->tdb) != 0) {
|
||||
DEBUG(0,(__location__ " Failed to commit transaction for db '%s'\n",
|
||||
ctdb_db->db_name));
|
||||
/* this has to be fatal to maintain integrity - it should only
|
||||
happen if we run out of disk space */
|
||||
ctdb_fatal(ctdb, "Unable to commit transactions\n");
|
||||
return -1;
|
||||
}
|
||||
tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
|
||||
}
|
||||
|
||||
ctdb->freeze_handle->transaction_started = false;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
wipe a database - only possible when in a frozen transaction
|
||||
*/
|
||||
int32_t ctdb_control_wipe_database(struct ctdb_context *ctdb, TDB_DATA indata)
|
||||
{
|
||||
struct ctdb_db_context *ctdb_db;
|
||||
uint32_t db_id = *(uint32_t *)indata.dptr;
|
||||
|
||||
if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
|
||||
DEBUG(0,(__location__ " Failed transaction_start while not frozen\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!ctdb->freeze_handle->transaction_started) {
|
||||
DEBUG(0,(__location__ " transaction not started\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
ctdb_db = find_ctdb_db(ctdb, db_id);
|
||||
if (!ctdb_db) {
|
||||
DEBUG(0,(__location__ " Unknown db 0x%x\n", db_id));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (tdb_wipe_all(ctdb_db->ltdb->tdb) != 0) {
|
||||
DEBUG(0,(__location__ " Failed to wipe database for db '%s'\n",
|
||||
ctdb_db->db_name));
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -308,7 +308,7 @@ int32_t ctdb_control_push_db(struct ctdb_context *ctdb, TDB_DATA indata)
|
||||
|
||||
for (i=0;i<reply->count;i++) {
|
||||
TDB_DATA key, data;
|
||||
struct ctdb_ltdb_header *hdr, header;
|
||||
struct ctdb_ltdb_header *hdr;
|
||||
|
||||
key.dptr = &rec->data[0];
|
||||
key.dsize = rec->keylen;
|
||||
@ -323,24 +323,12 @@ int32_t ctdb_control_push_db(struct ctdb_context *ctdb, TDB_DATA indata)
|
||||
data.dptr += sizeof(*hdr);
|
||||
data.dsize -= sizeof(*hdr);
|
||||
|
||||
ret = ctdb_ltdb_fetch(ctdb_db, key, &header, NULL, NULL);
|
||||
ret = ctdb_ltdb_store(ctdb_db, key, hdr, data);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ " Unable to fetch record\n"));
|
||||
DEBUG(0, (__location__ " Unable to store record\n"));
|
||||
goto failed;
|
||||
}
|
||||
|
||||
/* The check for dmaster gives priority to the dmaster
|
||||
if the rsn values are equal */
|
||||
if (ctdb->pnn != ctdb->recovery_master ||
|
||||
header.rsn < hdr->rsn ||
|
||||
(header.dmaster != ctdb->recovery_master && header.rsn == hdr->rsn)) {
|
||||
ret = ctdb_ltdb_store(ctdb_db, key, hdr, data);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ " Unable to store record\n"));
|
||||
goto failed;
|
||||
}
|
||||
}
|
||||
|
||||
rec = (struct ctdb_rec_data *)(rec->length + (uint8_t *)rec);
|
||||
}
|
||||
|
||||
@ -607,171 +595,6 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
callback for ctdb_control_max_rsn
|
||||
*/
|
||||
static int traverse_max_rsn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *p)
|
||||
{
|
||||
struct ctdb_ltdb_header *h = (struct ctdb_ltdb_header *)data.dptr;
|
||||
uint64_t *max_rsn = (uint64_t *)p;
|
||||
|
||||
if (data.dsize >= sizeof(*h)) {
|
||||
(*max_rsn) = MAX(*max_rsn, h->rsn);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
get max rsn across an entire db
|
||||
*/
|
||||
int32_t ctdb_control_max_rsn(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
|
||||
{
|
||||
struct ctdb_db_context *ctdb_db;
|
||||
uint32_t db_id = *(uint32_t *)indata.dptr;
|
||||
uint64_t max_rsn = 0;
|
||||
int ret;
|
||||
|
||||
if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
|
||||
DEBUG(0,("rejecting ctdb_control_max_rsn when not frozen\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
ctdb_db = find_ctdb_db(ctdb, db_id);
|
||||
if (!ctdb_db) {
|
||||
DEBUG(0,(__location__ " Unknown db\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (ctdb_lock_all_databases_mark(ctdb) != 0) {
|
||||
DEBUG(0,(__location__ " Failed to get lock on entired db - failing\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
ret = tdb_traverse_read(ctdb_db->ltdb->tdb, traverse_max_rsn, &max_rsn);
|
||||
if (ret < 0) {
|
||||
DEBUG(0,(__location__ " traverse failed in ctdb_control_max_rsn\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
ctdb_lock_all_databases_unmark(ctdb);
|
||||
|
||||
outdata->dptr = (uint8_t *)talloc(outdata, uint64_t);
|
||||
if (!outdata->dptr) {
|
||||
return -1;
|
||||
}
|
||||
(*(uint64_t *)outdata->dptr) = max_rsn;
|
||||
outdata->dsize = sizeof(uint64_t);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
callback for ctdb_control_set_rsn_nonempty
|
||||
*/
|
||||
static int traverse_set_rsn_nonempty(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *p)
|
||||
{
|
||||
struct ctdb_ltdb_header *h = (struct ctdb_ltdb_header *)data.dptr;
|
||||
uint64_t *rsn = (uint64_t *)p;
|
||||
|
||||
if (data.dsize > sizeof(*h)) {
|
||||
h->rsn = *rsn;
|
||||
if (tdb_store(tdb, key, data, TDB_REPLACE) != 0) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
set rsn for all non-empty records in a database to a given rsn
|
||||
*/
|
||||
int32_t ctdb_control_set_rsn_nonempty(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
|
||||
{
|
||||
struct ctdb_control_set_rsn_nonempty *p = (struct ctdb_control_set_rsn_nonempty *)indata.dptr;
|
||||
struct ctdb_db_context *ctdb_db;
|
||||
int ret;
|
||||
|
||||
if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
|
||||
DEBUG(0,("rejecting ctdb_control_set_rsn_nonempty when not frozen\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
ctdb_db = find_ctdb_db(ctdb, p->db_id);
|
||||
if (!ctdb_db) {
|
||||
DEBUG(0,(__location__ " Unknown db\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (ctdb_lock_all_databases_mark(ctdb) != 0) {
|
||||
DEBUG(0,(__location__ " Failed to get lock on entired db - failing\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
ret = tdb_traverse(ctdb_db->ltdb->tdb, traverse_set_rsn_nonempty, &p->rsn);
|
||||
if (ret < 0) {
|
||||
DEBUG(0,(__location__ " traverse failed in ctdb_control_set_rsn_nonempty\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
ctdb_lock_all_databases_unmark(ctdb);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
callback for ctdb_control_delete_low_rsn
|
||||
*/
|
||||
static int traverse_delete_low_rsn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *p)
|
||||
{
|
||||
struct ctdb_ltdb_header *h = (struct ctdb_ltdb_header *)data.dptr;
|
||||
uint64_t *rsn = (uint64_t *)p;
|
||||
|
||||
if (data.dsize < sizeof(*h) || h->rsn < *rsn) {
|
||||
if (tdb_delete(tdb, key) != 0) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
delete any records with a rsn < the given rsn
|
||||
*/
|
||||
int32_t ctdb_control_delete_low_rsn(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
|
||||
{
|
||||
struct ctdb_control_delete_low_rsn *p = (struct ctdb_control_delete_low_rsn *)indata.dptr;
|
||||
struct ctdb_db_context *ctdb_db;
|
||||
int ret;
|
||||
|
||||
if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
|
||||
DEBUG(0,("rejecting ctdb_control_delete_low_rsn when not frozen\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
ctdb_db = find_ctdb_db(ctdb, p->db_id);
|
||||
if (!ctdb_db) {
|
||||
DEBUG(0,(__location__ " Unknown db\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (ctdb_lock_all_databases_mark(ctdb) != 0) {
|
||||
DEBUG(0,(__location__ " Failed to get lock on entired db - failing\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
ret = tdb_traverse(ctdb_db->ltdb->tdb, traverse_delete_low_rsn, &p->rsn);
|
||||
if (ret < 0) {
|
||||
DEBUG(0,(__location__ " traverse failed in ctdb_control_delete_low_rsn\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
ctdb_lock_all_databases_unmark(ctdb);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
try and get the recovery lock in shared storage - should only work
|
||||
@ -817,3 +640,5 @@ bool ctdb_recovery_lock(struct ctdb_context *ctdb, bool keep)
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include "cmdline.h"
|
||||
#include "../include/ctdb.h"
|
||||
#include "../include/ctdb_private.h"
|
||||
#include "db_wrap.h"
|
||||
|
||||
|
||||
struct ban_state {
|
||||
@ -63,6 +64,8 @@ struct async_data {
|
||||
static void async_callback(struct ctdb_client_control_state *state)
|
||||
{
|
||||
struct async_data *data = talloc_get_type(state->async.private_data, struct async_data);
|
||||
int ret;
|
||||
int32_t res;
|
||||
|
||||
/* one more node has responded with recmode data */
|
||||
data->count--;
|
||||
@ -73,6 +76,15 @@ static void async_callback(struct ctdb_client_control_state *state)
|
||||
if (state->state != CTDB_CONTROL_DONE) {
|
||||
DEBUG(0,("Async operation failed with state %d\n", state->state));
|
||||
data->fail_count++;
|
||||
return;
|
||||
}
|
||||
|
||||
state->async.fn = NULL;
|
||||
|
||||
ret = ctdb_control_recv(state->ctdb, state, data, NULL, &res, NULL);
|
||||
if ((ret != 0) || (res != 0)) {
|
||||
DEBUG(0,("Async operation failed with ret=%d res=%d\n", ret, (int)res));
|
||||
data->fail_count++;
|
||||
}
|
||||
}
|
||||
|
||||
@ -241,88 +253,83 @@ static void ctdb_ban_node(struct ctdb_recoverd *rec, uint32_t pnn, uint32_t ban_
|
||||
enum monitor_result { MONITOR_OK, MONITOR_RECOVERY_NEEDED, MONITOR_ELECTION_NEEDED, MONITOR_FAILED};
|
||||
|
||||
|
||||
|
||||
|
||||
/* freeze all nodes */
|
||||
static enum monitor_result freeze_all_nodes(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
|
||||
/*
|
||||
perform a simple control on all active nodes. The control cannot return data
|
||||
*/
|
||||
static int async_control_on_active_nodes(struct ctdb_context *ctdb, enum ctdb_controls opcode,
|
||||
struct ctdb_node_map *nodemap, TDB_DATA data, bool include_self)
|
||||
{
|
||||
struct async_data *async_data;
|
||||
TALLOC_CTX *mem_ctx = talloc_new(ctdb);
|
||||
struct ctdb_client_control_state *state;
|
||||
int j;
|
||||
struct timeval timeout = CONTROL_TIMEOUT();
|
||||
|
||||
async_data = talloc_zero(mem_ctx, struct async_data);
|
||||
async_data = talloc_zero(ctdb, struct async_data);
|
||||
CTDB_NO_MEMORY_FATAL(ctdb, async_data);
|
||||
|
||||
/* loop over all active nodes and send an async freeze call to
|
||||
them*/
|
||||
/* loop over all active nodes and send an async control to each of them */
|
||||
for (j=0; j<nodemap->num; j++) {
|
||||
if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
|
||||
continue;
|
||||
}
|
||||
state = ctdb_ctrl_freeze_send(ctdb, mem_ctx,
|
||||
CONTROL_TIMEOUT(),
|
||||
nodemap->nodes[j].pnn);
|
||||
if (nodemap->nodes[j].pnn == ctdb->pnn && !include_self) {
|
||||
continue;
|
||||
}
|
||||
state = ctdb_control_send(ctdb, nodemap->nodes[j].pnn, 0, opcode,
|
||||
0, data, async_data, NULL, &timeout, NULL);
|
||||
if (state == NULL) {
|
||||
/* we failed to send the control, treat this as
|
||||
an error and try again next iteration
|
||||
*/
|
||||
DEBUG(0,("Failed to call ctdb_ctrl_freeze_send during recovery\n"));
|
||||
talloc_free(mem_ctx);
|
||||
return MONITOR_RECOVERY_NEEDED;
|
||||
DEBUG(0,(__location__ " Failed to call async control %u\n", (unsigned)opcode));
|
||||
talloc_free(async_data);
|
||||
return -1;
|
||||
}
|
||||
|
||||
async_add(async_data, state);
|
||||
}
|
||||
|
||||
if (async_wait(ctdb, async_data) != 0) {
|
||||
DEBUG(0,(__location__ " Failed async freeze call\n"));
|
||||
talloc_free(mem_ctx);
|
||||
return MONITOR_RECOVERY_NEEDED;
|
||||
DEBUG(0,(__location__ " Failed async control %u\n", (unsigned)opcode));
|
||||
talloc_free(async_data);
|
||||
return -1;
|
||||
}
|
||||
|
||||
talloc_free(mem_ctx);
|
||||
return MONITOR_OK;
|
||||
talloc_free(async_data);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
change recovery mode on all nodes
|
||||
*/
|
||||
static int set_recovery_mode(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t rec_mode)
|
||||
{
|
||||
int j, ret;
|
||||
TDB_DATA data;
|
||||
|
||||
/* freeze all nodes */
|
||||
if (rec_mode == CTDB_RECOVERY_ACTIVE) {
|
||||
ret = freeze_all_nodes(ctdb, nodemap);
|
||||
if (ret != MONITOR_OK) {
|
||||
if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_FREEZE,
|
||||
nodemap, tdb_null, true) != 0) {
|
||||
DEBUG(0, (__location__ " Unable to freeze nodes. Recovery failed.\n"));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* set recovery mode to active on all nodes */
|
||||
for (j=0; j<nodemap->num; j++) {
|
||||
/* dont change it for nodes that are unavailable */
|
||||
if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
|
||||
continue;
|
||||
}
|
||||
data.dsize = sizeof(uint32_t);
|
||||
data.dptr = (unsigned char *)&rec_mode;
|
||||
|
||||
ret = ctdb_ctrl_setrecmode(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, rec_mode);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ " Unable to set recmode on node %u\n", nodemap->nodes[j].pnn));
|
||||
if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_SET_RECMODE,
|
||||
nodemap, data, true) != 0) {
|
||||
DEBUG(0, (__location__ " Unable to set recovery mode. Recovery failed.\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (rec_mode == CTDB_RECOVERY_NORMAL) {
|
||||
if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_THAW,
|
||||
nodemap, tdb_null, true) != 0) {
|
||||
DEBUG(0, (__location__ " Unable to thaw nodes. Recovery failed.\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (rec_mode == CTDB_RECOVERY_NORMAL) {
|
||||
ret = ctdb_ctrl_thaw(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ " Unable to thaw node %u\n", nodemap->nodes[j].pnn));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -333,20 +340,15 @@ static int set_recovery_mode(struct ctdb_context *ctdb, struct ctdb_node_map *no
|
||||
*/
|
||||
static int set_recovery_master(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t pnn)
|
||||
{
|
||||
int j, ret;
|
||||
TDB_DATA data;
|
||||
|
||||
/* set recovery master to pnn on all nodes */
|
||||
for (j=0; j<nodemap->num; j++) {
|
||||
/* dont change it for nodes that are unavailable */
|
||||
if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
|
||||
continue;
|
||||
}
|
||||
data.dsize = sizeof(uint32_t);
|
||||
data.dptr = (unsigned char *)&pnn;
|
||||
|
||||
ret = ctdb_ctrl_setrecmaster(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, pnn);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ " Unable to set recmaster on node %u\n", nodemap->nodes[j].pnn));
|
||||
return -1;
|
||||
}
|
||||
if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_SET_RECMASTER,
|
||||
nodemap, data, true) != 0) {
|
||||
DEBUG(0, (__location__ " Unable to set recmaster. Recovery failed.\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -483,59 +485,111 @@ static int create_missing_local_databases(struct ctdb_context *ctdb, struct ctdb
|
||||
|
||||
|
||||
/*
|
||||
pull all the remote database contents into ours
|
||||
pull the remote database contents from one node into the recdb
|
||||
*/
|
||||
static int pull_all_remote_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
|
||||
uint32_t pnn, struct ctdb_dbid_map *dbmap, TALLOC_CTX *mem_ctx)
|
||||
static int pull_one_remote_database(struct ctdb_context *ctdb, uint32_t srcnode,
|
||||
struct tdb_wrap *recdb, uint32_t dbid)
|
||||
{
|
||||
int i, j, ret;
|
||||
int ret;
|
||||
TDB_DATA outdata;
|
||||
struct ctdb_control_pulldb_reply *reply;
|
||||
struct ctdb_rec_data *rec;
|
||||
int i;
|
||||
TALLOC_CTX *tmp_ctx = talloc_new(recdb);
|
||||
|
||||
/* pull all records from all other nodes across onto this node
|
||||
(this merges based on rsn)
|
||||
*/
|
||||
for (i=0;i<dbmap->num;i++) {
|
||||
for (j=0; j<nodemap->num; j++) {
|
||||
/* we dont need to merge with ourselves */
|
||||
if (nodemap->nodes[j].pnn == pnn) {
|
||||
continue;
|
||||
}
|
||||
/* dont merge from nodes that are unavailable */
|
||||
if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
|
||||
continue;
|
||||
}
|
||||
ret = ctdb_ctrl_copydb(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn,
|
||||
pnn, dbmap->dbs[i].dbid, CTDB_LMASTER_ANY, mem_ctx);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ " Unable to copy db from node %u to node %u\n",
|
||||
nodemap->nodes[j].pnn, pnn));
|
||||
ret = ctdb_ctrl_pulldb(ctdb, srcnode, dbid, CTDB_LMASTER_ANY, tmp_ctx,
|
||||
CONTROL_TIMEOUT(), &outdata);
|
||||
if (ret != 0) {
|
||||
DEBUG(0,(__location__ " Unable to copy db from node %u\n", srcnode));
|
||||
talloc_free(tmp_ctx);
|
||||
return -1;
|
||||
}
|
||||
|
||||
reply = (struct ctdb_control_pulldb_reply *)outdata.dptr;
|
||||
|
||||
if (outdata.dsize < offsetof(struct ctdb_control_pulldb_reply, data)) {
|
||||
DEBUG(0,(__location__ " invalid data in pulldb reply\n"));
|
||||
talloc_free(tmp_ctx);
|
||||
return -1;
|
||||
}
|
||||
|
||||
rec = (struct ctdb_rec_data *)&reply->data[0];
|
||||
|
||||
for (i=0;
|
||||
i<reply->count;
|
||||
rec = (struct ctdb_rec_data *)(rec->length + (uint8_t *)rec), i++) {
|
||||
TDB_DATA key, data;
|
||||
struct ctdb_ltdb_header *hdr;
|
||||
TDB_DATA existing;
|
||||
|
||||
key.dptr = &rec->data[0];
|
||||
key.dsize = rec->keylen;
|
||||
data.dptr = &rec->data[key.dsize];
|
||||
data.dsize = rec->datalen;
|
||||
|
||||
hdr = (struct ctdb_ltdb_header *)data.dptr;
|
||||
|
||||
if (data.dsize < sizeof(struct ctdb_ltdb_header)) {
|
||||
DEBUG(0,(__location__ " bad ltdb record\n"));
|
||||
talloc_free(tmp_ctx);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* fetch the existing record, if any */
|
||||
existing = tdb_fetch(recdb->tdb, key);
|
||||
|
||||
if (existing.dptr != NULL) {
|
||||
struct ctdb_ltdb_header header;
|
||||
if (existing.dsize < sizeof(struct ctdb_ltdb_header)) {
|
||||
DEBUG(0,(__location__ " Bad record size %u from node %u\n",
|
||||
existing.dsize, srcnode));
|
||||
free(existing.dptr);
|
||||
talloc_free(tmp_ctx);
|
||||
return -1;
|
||||
}
|
||||
header = *(struct ctdb_ltdb_header *)existing.dptr;
|
||||
free(existing.dptr);
|
||||
if (!(header.rsn < hdr->rsn ||
|
||||
(header.dmaster != ctdb->recovery_master && header.rsn == hdr->rsn))) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (tdb_store(recdb->tdb, key, data, TDB_REPLACE) != 0) {
|
||||
DEBUG(0,(__location__ " Failed to store record\n"));
|
||||
talloc_free(tmp_ctx);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
talloc_free(tmp_ctx);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
change the dmaster on all databases to point to us
|
||||
pull all the remote database contents into the recdb
|
||||
*/
|
||||
static int update_dmaster_on_our_databases(struct ctdb_context *ctdb, uint32_t pnn,
|
||||
struct ctdb_dbid_map *dbmap, TALLOC_CTX *mem_ctx)
|
||||
static int pull_remote_database(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
|
||||
struct tdb_wrap *recdb, uint32_t dbid)
|
||||
{
|
||||
int i, ret;
|
||||
int j;
|
||||
|
||||
/* update dmaster to point to this node for all databases/nodes */
|
||||
for (i=0;i<dbmap->num;i++) {
|
||||
ret = ctdb_ctrl_setdmaster(ctdb, CONTROL_TIMEOUT(), pnn,
|
||||
ctdb, dbmap->dbs[i].dbid, pnn);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ " Unable to set dmaster for node %u db:0x%08x\n",
|
||||
pnn, dbmap->dbs[i].dbid));
|
||||
/* pull all records from all other nodes across onto this node
|
||||
(this merges based on rsn)
|
||||
*/
|
||||
for (j=0; j<nodemap->num; j++) {
|
||||
/* dont merge from nodes that are unavailable */
|
||||
if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
|
||||
continue;
|
||||
}
|
||||
if (pull_one_remote_database(ctdb, nodemap->nodes[j].pnn, recdb, dbid) != 0) {
|
||||
DEBUG(0,(__location__ " Failed to pull remote database from node %u\n",
|
||||
nodemap->nodes[j].pnn));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -564,161 +618,6 @@ static int update_flags_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_node
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
vacuum one database
|
||||
*/
|
||||
static int vacuum_db(struct ctdb_context *ctdb, uint32_t db_id, struct ctdb_node_map *nodemap)
|
||||
{
|
||||
uint64_t max_rsn;
|
||||
int ret, i;
|
||||
TALLOC_CTX *mem_ctx = talloc_new(ctdb);
|
||||
struct async_data *async_data;
|
||||
struct ctdb_client_control_state *state;
|
||||
|
||||
/* find max rsn on our local node for this db */
|
||||
ret = ctdb_ctrl_get_max_rsn(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, db_id, &max_rsn);
|
||||
if (ret != 0) {
|
||||
talloc_free(mem_ctx);
|
||||
return -1;
|
||||
}
|
||||
|
||||
async_data = talloc_zero(mem_ctx, struct async_data);
|
||||
CTDB_NO_MEMORY_FATAL(ctdb, async_data);
|
||||
|
||||
/* set rsn on non-empty records to max_rsn+1 */
|
||||
for (i=0;i<nodemap->num;i++) {
|
||||
if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
|
||||
continue;
|
||||
}
|
||||
state = ctdb_ctrl_set_rsn_nonempty_send(ctdb, async_data, CONTROL_TIMEOUT(), nodemap->nodes[i].pnn,
|
||||
db_id, max_rsn+1);
|
||||
if (state == NULL) {
|
||||
DEBUG(0,(__location__ " Failed to set rsn on node %u to %llu\n",
|
||||
nodemap->nodes[i].pnn, (unsigned long long)max_rsn+1));
|
||||
talloc_free(mem_ctx);
|
||||
return -1;
|
||||
}
|
||||
async_add(async_data, state);
|
||||
}
|
||||
|
||||
if (async_wait(ctdb, async_data) != 0) {
|
||||
DEBUG(0,(__location__ " Failed async calls to set rsn nonempty\n"));
|
||||
talloc_free(mem_ctx);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/* delete records with rsn < max_rsn+1 on all nodes */
|
||||
for (i=0;i<nodemap->num;i++) {
|
||||
if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
|
||||
continue;
|
||||
}
|
||||
state = ctdb_ctrl_delete_low_rsn_send(ctdb, async_data, CONTROL_TIMEOUT(), nodemap->nodes[i].pnn,
|
||||
db_id, max_rsn+1);
|
||||
if (state == NULL) {
|
||||
DEBUG(0,(__location__ " Failed to delete records on node %u with rsn below %llu\n",
|
||||
nodemap->nodes[i].pnn, (unsigned long long)max_rsn+1));
|
||||
talloc_free(mem_ctx);
|
||||
return -1;
|
||||
}
|
||||
async_add(async_data, state);
|
||||
}
|
||||
|
||||
if (async_wait(ctdb, async_data) != 0) {
|
||||
DEBUG(0,(__location__ " Failed async calls to delete low rsn\n"));
|
||||
talloc_free(mem_ctx);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
vacuum all attached databases
|
||||
*/
|
||||
static int vacuum_all_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
|
||||
struct ctdb_dbid_map *dbmap)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* update dmaster to point to this node for all databases/nodes */
|
||||
for (i=0;i<dbmap->num;i++) {
|
||||
if (vacuum_db(ctdb, dbmap->dbs[i].dbid, nodemap) != 0) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
push out all our database contents to all other nodes
|
||||
*/
|
||||
static int push_all_local_databases(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
|
||||
uint32_t pnn, struct ctdb_dbid_map *dbmap, TALLOC_CTX *mem_ctx)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* push all records out to the nodes again */
|
||||
for (i=0;i<dbmap->num;i++) {
|
||||
int j, ret;
|
||||
TDB_DATA outdata;
|
||||
struct async_data *async_data;
|
||||
struct ctdb_client_control_state *state;
|
||||
|
||||
DEBUG(3,("pulling dbid 0x%x from local node %u\n",
|
||||
dbmap->dbs[i].dbid, pnn));
|
||||
|
||||
async_data = talloc_zero(mem_ctx, struct async_data);
|
||||
CTDB_NO_MEMORY_FATAL(ctdb, async_data);
|
||||
|
||||
ret = ctdb_ctrl_pulldb(ctdb, pnn, dbmap->dbs[i].dbid,
|
||||
CTDB_LMASTER_ANY,
|
||||
async_data, CONTROL_TIMEOUT(), &outdata);
|
||||
if (ret != 0) {
|
||||
DEBUG(0,(__location__ " ctdb_control for pulldb failed\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (j=0; j<nodemap->num; j++) {
|
||||
/* we dont need to push to ourselves */
|
||||
if (nodemap->nodes[j].pnn == pnn) {
|
||||
continue;
|
||||
}
|
||||
/* dont push to nodes that are unavailable */
|
||||
if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUG(3,("starting async push of dbid 0x%x to %u\n",
|
||||
dbmap->dbs[i].dbid,
|
||||
nodemap->nodes[j].pnn));
|
||||
|
||||
state = ctdb_ctrl_pushdb_send(ctdb,
|
||||
nodemap->nodes[j].pnn,
|
||||
dbmap->dbs[i].dbid, async_data,
|
||||
CONTROL_TIMEOUT(), outdata);
|
||||
if (state == NULL) {
|
||||
DEBUG(0,(__location__ " async control for pushdb for dbid 0x%08x to node %u failed\n", dbmap->dbs[i].dbid, nodemap->nodes[j].pnn));
|
||||
talloc_free(async_data);
|
||||
return -1;
|
||||
}
|
||||
|
||||
async_add(async_data, state);
|
||||
}
|
||||
|
||||
if (async_wait(ctdb, async_data) != 0) {
|
||||
DEBUG(0,("Async push of database 0x%08x failed\n", dbmap->dbs[i].dbid));
|
||||
talloc_free(async_data);
|
||||
return -1;
|
||||
}
|
||||
|
||||
talloc_free(async_data);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
ensure all nodes have the same vnnmap we do
|
||||
@ -965,6 +864,169 @@ static uint32_t new_generation(void)
|
||||
return generation;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
create a temporary working database
|
||||
*/
|
||||
static struct tdb_wrap *create_recdb(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx)
|
||||
{
|
||||
char *name;
|
||||
struct tdb_wrap *recdb;
|
||||
|
||||
/* open up the temporary recovery database */
|
||||
name = talloc_asprintf(mem_ctx, "%s/recdb.tdb", ctdb->db_directory);
|
||||
if (name == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
unlink(name);
|
||||
recdb = tdb_wrap_open(mem_ctx, name, ctdb->tunable.database_hash_size,
|
||||
TDB_NOLOCK, O_RDWR|O_CREAT|O_EXCL, 0600);
|
||||
if (recdb == NULL) {
|
||||
DEBUG(0,(__location__ " Failed to create temp recovery database '%s'\n", name));
|
||||
}
|
||||
|
||||
talloc_free(name);
|
||||
|
||||
return recdb;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
a traverse function for pulling all relevent records from recdb
|
||||
*/
|
||||
struct recdb_data {
|
||||
struct ctdb_context *ctdb;
|
||||
struct ctdb_control_pulldb_reply *recdata;
|
||||
uint32_t len;
|
||||
};
|
||||
|
||||
static int traverse_recdb(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *p)
|
||||
{
|
||||
struct recdb_data *params = (struct recdb_data *)p;
|
||||
struct ctdb_rec_data *rec;
|
||||
struct ctdb_ltdb_header *hdr;
|
||||
|
||||
/* skip empty records */
|
||||
if (data.dsize <= sizeof(struct ctdb_ltdb_header)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* update the dmaster field to point to us */
|
||||
hdr = (struct ctdb_ltdb_header *)data.dptr;
|
||||
hdr->dmaster = params->ctdb->pnn;
|
||||
|
||||
/* add the record to the blob ready to send to the nodes */
|
||||
rec = ctdb_marshall_record(params->recdata, 0, key, NULL, data);
|
||||
params->recdata = talloc_realloc_size(NULL, params->recdata, rec->length + params->len);
|
||||
if (params->recdata == NULL) {
|
||||
DEBUG(0,(__location__ " Failed to expand recdata to %u (%u records)\n",
|
||||
rec->length + params->len, params->recdata->count));
|
||||
return -1;
|
||||
}
|
||||
params->recdata->count++;
|
||||
memcpy(params->len+(uint8_t *)params->recdata, rec, rec->length);
|
||||
params->len += rec->length;
|
||||
talloc_free(rec);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
push the recdb database out to all nodes
|
||||
*/
|
||||
static int push_recdb_database(struct ctdb_context *ctdb, uint32_t dbid,
|
||||
struct tdb_wrap *recdb, struct ctdb_node_map *nodemap)
|
||||
{
|
||||
struct recdb_data params;
|
||||
struct ctdb_control_pulldb_reply *recdata;
|
||||
TDB_DATA outdata;
|
||||
|
||||
recdata = talloc_zero(recdb, struct ctdb_control_pulldb_reply);
|
||||
CTDB_NO_MEMORY(ctdb, recdata);
|
||||
|
||||
recdata->db_id = dbid;
|
||||
|
||||
params.ctdb = ctdb;
|
||||
params.recdata = recdata;
|
||||
params.len = offsetof(struct ctdb_control_pulldb_reply, data);
|
||||
|
||||
if (tdb_traverse_read(recdb->tdb, traverse_recdb, ¶ms) == -1) {
|
||||
DEBUG(0,(__location__ " Failed to traverse recdb database\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
recdata = params.recdata;
|
||||
|
||||
outdata.dptr = (void *)recdata;
|
||||
outdata.dsize = params.len;
|
||||
|
||||
if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_PUSH_DB, nodemap, outdata, true) != 0) {
|
||||
DEBUG(0,(__location__ " Failed to push recdb records to nodes for db 0x%x\n", dbid));
|
||||
talloc_free(recdata);
|
||||
return -1;
|
||||
}
|
||||
|
||||
DEBUG(0, (__location__ " Recovery - pushed remote database 0x%x of size %u\n",
|
||||
dbid, recdata->count));
|
||||
|
||||
talloc_free(recdata);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
go through a full recovery on one database
|
||||
*/
|
||||
static int recover_database(struct ctdb_recoverd *rec,
|
||||
TALLOC_CTX *mem_ctx,
|
||||
uint32_t dbid,
|
||||
uint32_t pnn,
|
||||
struct ctdb_node_map *nodemap)
|
||||
{
|
||||
struct tdb_wrap *recdb;
|
||||
int ret;
|
||||
struct ctdb_context *ctdb = rec->ctdb;
|
||||
TDB_DATA data;
|
||||
|
||||
recdb = create_recdb(ctdb, mem_ctx);
|
||||
if (recdb == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* pull all remote databases onto the recdb */
|
||||
ret = pull_remote_database(ctdb, nodemap, recdb, dbid);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ " Unable to pull remote database 0x%x\n", dbid));
|
||||
return -1;
|
||||
}
|
||||
|
||||
DEBUG(0, (__location__ " Recovery - pulled remote database 0x%x\n", dbid));
|
||||
|
||||
/* wipe all the remote databases. This is safe as we are in a transaction */
|
||||
data.dptr = (void *)&dbid;
|
||||
data.dsize = sizeof(uint32_t);
|
||||
|
||||
if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_WIPE_DATABASE,
|
||||
nodemap, data, true) != 0) {
|
||||
DEBUG(0, (__location__ " Unable to wipe database. Recovery failed.\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* push out the correct database. This sets the dmaster and skips
|
||||
the empty records */
|
||||
ret = push_recdb_database(ctdb, dbid, recdb, nodemap);
|
||||
if (ret != 0) {
|
||||
talloc_free(recdb);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* all done with this database */
|
||||
talloc_free(recdb);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
we are the recmaster, and recovery is needed - start a recovery run
|
||||
@ -999,6 +1061,34 @@ static int do_recovery(struct ctdb_recoverd *rec,
|
||||
return -1;
|
||||
}
|
||||
|
||||
DEBUG(0, (__location__ " Recovery initiated due to problem with node %u\n", culprit));
|
||||
|
||||
/* get a list of all databases */
|
||||
ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), pnn, mem_ctx, &dbmap);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ " Unable to get dbids from node :%u\n", pnn));
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* we do the db creation before we set the recovery mode, so the freeze happens
|
||||
on all databases we will be dealing with. */
|
||||
|
||||
/* verify that we have all the databases any other node has */
|
||||
ret = create_missing_local_databases(ctdb, nodemap, pnn, &dbmap, mem_ctx);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ " Unable to create missing local databases\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* verify that all other nodes have all our databases */
|
||||
ret = create_missing_remote_databases(ctdb, nodemap, pnn, dbmap, mem_ctx);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ " Unable to create missing remote databases\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
DEBUG(0, (__location__ " Recovery - created remote databases\n"));
|
||||
|
||||
/* set recovery mode to active on all nodes */
|
||||
ret = set_recovery_mode(ctdb, nodemap, CTDB_RECOVERY_ACTIVE);
|
||||
if (ret!=0) {
|
||||
@ -1006,8 +1096,6 @@ static int do_recovery(struct ctdb_recoverd *rec,
|
||||
return -1;
|
||||
}
|
||||
|
||||
DEBUG(0, (__location__ " Recovery initiated due to problem with node %u\n", culprit));
|
||||
|
||||
/* pick a new generation number */
|
||||
generation = new_generation();
|
||||
|
||||
@ -1028,67 +1116,32 @@ static int do_recovery(struct ctdb_recoverd *rec,
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* get a list of all databases */
|
||||
ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), pnn, mem_ctx, &dbmap);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ " Unable to get dbids from node :%u\n", pnn));
|
||||
if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_TRANSACTION_START,
|
||||
nodemap, tdb_null, true) != 0) {
|
||||
DEBUG(0, (__location__ " Unable to start transactions. Recovery failed.\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
DEBUG(0,(__location__ " started transactions on all nodes\n"));
|
||||
|
||||
/* verify that all other nodes have all our databases */
|
||||
ret = create_missing_remote_databases(ctdb, nodemap, pnn, dbmap, mem_ctx);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ " Unable to create missing remote databases\n"));
|
||||
for (i=0;i<dbmap->num;i++) {
|
||||
if (recover_database(rec, mem_ctx, dbmap->dbs[i].dbid, pnn, nodemap) != 0) {
|
||||
DEBUG(0, (__location__ " Failed to recover database 0x%x\n", dbmap->dbs[i].dbid));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG(0, (__location__ " Recovery - starting database commits\n"));
|
||||
|
||||
/* commit all the changes */
|
||||
if (async_control_on_active_nodes(ctdb, CTDB_CONTROL_TRANSACTION_COMMIT,
|
||||
nodemap, tdb_null, true) != 0) {
|
||||
DEBUG(0, (__location__ " Unable to commit recovery changes. Recovery failed.\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* verify that we have all the databases any other node has */
|
||||
ret = create_missing_local_databases(ctdb, nodemap, pnn, &dbmap, mem_ctx);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ " Unable to create missing local databases\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* verify that all other nodes have all our databases */
|
||||
ret = create_missing_remote_databases(ctdb, nodemap, pnn, dbmap, mem_ctx);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ " Unable to create missing remote databases\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
DEBUG(0, (__location__ " Recovery - created remote databases\n"));
|
||||
|
||||
/* pull all remote databases onto the local node */
|
||||
ret = pull_all_remote_databases(ctdb, nodemap, pnn, dbmap, mem_ctx);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ " Unable to pull remote databases\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
DEBUG(0, (__location__ " Recovery - pulled remote databases\n"));
|
||||
|
||||
/* repoint all local database records to the local node as
|
||||
being dmaster
|
||||
*/
|
||||
ret = update_dmaster_on_our_databases(ctdb, pnn, dbmap, mem_ctx);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ " Unable to update dmaster on all databases\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
DEBUG(0, (__location__ " Recovery - updated dmaster on our databases\n"));
|
||||
|
||||
|
||||
/* push all local databases to the remote nodes */
|
||||
ret = push_all_local_databases(ctdb, nodemap, pnn, dbmap, mem_ctx);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ " Unable to push local databases\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
DEBUG(0, (__location__ " Recovery - pushed remote databases\n"));
|
||||
DEBUG(0, (__location__ " Recovery - committed databases\n"));
|
||||
|
||||
|
||||
/* build a new vnn map with all the currently active and
|
||||
unbanned nodes */
|
||||
@ -1133,17 +1186,6 @@ static int do_recovery(struct ctdb_recoverd *rec,
|
||||
|
||||
DEBUG(0, (__location__ " Recovery - updated flags\n"));
|
||||
|
||||
/*
|
||||
run a vacuum operation on empty records
|
||||
*/
|
||||
ret = vacuum_all_databases(ctdb, nodemap, dbmap);
|
||||
if (ret != 0) {
|
||||
DEBUG(0, (__location__ " Unable to vacuum all databases\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
DEBUG(0, (__location__ " Recovery - vacuumed all databases\n"));
|
||||
|
||||
/*
|
||||
if enabled, tell nodes to takeover their public IPs
|
||||
*/
|
||||
@ -1157,10 +1199,6 @@ static int do_recovery(struct ctdb_recoverd *rec,
|
||||
DEBUG(1, (__location__ " Recovery - done takeover\n"));
|
||||
}
|
||||
|
||||
for (i=0;i<dbmap->num;i++) {
|
||||
DEBUG(2,("Recovered database with db_id 0x%08x\n", dbmap->dbs[i].dbid));
|
||||
}
|
||||
|
||||
/* disable recovery mode */
|
||||
ret = set_recovery_mode(ctdb, nodemap, CTDB_RECOVERY_NORMAL);
|
||||
if (ret!=0) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user