mirror of
https://github.com/samba-team/samba.git
synced 2024-12-24 21:34:56 +03:00
Merge branch 'master' of git://git.samba.org/sahlberg/ctdb
(This used to be ctdb commit cb2c05d5d3f8908eecdad1ae6a1dc8efa1ffcb1e)
This commit is contained in:
commit
e8a62cdca4
@ -1839,19 +1839,33 @@ int ctdb_traverse(struct ctdb_db_context *ctdb_db, ctdb_traverse_func fn, void *
|
||||
*/
|
||||
static int dumpdb_fn(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA data, void *p)
|
||||
{
|
||||
int i;
|
||||
FILE *f = (FILE *)p;
|
||||
char *keystr, *datastr;
|
||||
struct ctdb_ltdb_header *h = (struct ctdb_ltdb_header *)data.dptr;
|
||||
|
||||
keystr = hex_encode_talloc(ctdb, key.dptr, key.dsize);
|
||||
datastr = hex_encode_talloc(ctdb, data.dptr+sizeof(*h), data.dsize-sizeof(*h));
|
||||
|
||||
fprintf(f, "dmaster: %u\n", h->dmaster);
|
||||
fprintf(f, "rsn: %llu\n", (unsigned long long)h->rsn);
|
||||
fprintf(f, "key: %s\ndata: %s\n", keystr, datastr);
|
||||
|
||||
talloc_free(keystr);
|
||||
talloc_free(datastr);
|
||||
fprintf(f, "key(%d) = \"", key.dsize);
|
||||
for (i=0;i<key.dsize;i++) {
|
||||
if (isascii(key.dptr[i])) {
|
||||
fprintf(f, "%c", key.dptr[i]);
|
||||
} else {
|
||||
fprintf(f, "\\%02X", key.dptr[i]);
|
||||
}
|
||||
}
|
||||
fprintf(f, "\"\n");
|
||||
|
||||
fprintf(f, "data(%d) = \"", data.dsize);
|
||||
for (i=sizeof(*h);i<data.dsize;i++) {
|
||||
if (isascii(data.dptr[i])) {
|
||||
fprintf(f, "%c", data.dptr[i]);
|
||||
} else {
|
||||
fprintf(f, "\\%02X", data.dptr[i]);
|
||||
}
|
||||
}
|
||||
fprintf(f, "\"\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2657,8 +2671,11 @@ int ctdb_ctrl_end_recovery(struct ctdb_context *ctdb, struct timeval timeout, ui
|
||||
static void async_callback(struct ctdb_client_control_state *state)
|
||||
{
|
||||
struct client_async_data *data = talloc_get_type(state->async.private_data, struct client_async_data);
|
||||
struct ctdb_context *ctdb = talloc_get_type(state->ctdb, struct ctdb_context);
|
||||
int ret;
|
||||
TDB_DATA outdata;
|
||||
int32_t res;
|
||||
uint32_t destnode = state->c->hdr.destnode;
|
||||
|
||||
/* one more node has responded with recmode data */
|
||||
data->count--;
|
||||
@ -2676,13 +2693,16 @@ static void async_callback(struct ctdb_client_control_state *state)
|
||||
|
||||
state->async.fn = NULL;
|
||||
|
||||
ret = ctdb_control_recv(state->ctdb, state, data, NULL, &res, NULL);
|
||||
ret = ctdb_control_recv(ctdb, state, data, &outdata, &res, NULL);
|
||||
if ((ret != 0) || (res != 0)) {
|
||||
if ( !data->dont_log_errors) {
|
||||
DEBUG(DEBUG_ERR,("Async operation failed with ret=%d res=%d\n", ret, (int)res));
|
||||
}
|
||||
data->fail_count++;
|
||||
}
|
||||
if ((ret == 0) && (data->callback != NULL)) {
|
||||
data->callback(ctdb, destnode, res, outdata);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -2725,15 +2745,17 @@ int ctdb_client_async_control(struct ctdb_context *ctdb,
|
||||
uint32_t *nodes,
|
||||
struct timeval timeout,
|
||||
bool dont_log_errors,
|
||||
TDB_DATA data)
|
||||
TDB_DATA data,
|
||||
client_async_callback client_callback)
|
||||
{
|
||||
struct client_async_data *async_data;
|
||||
struct ctdb_client_control_state *state;
|
||||
int j, num_nodes;
|
||||
|
||||
|
||||
async_data = talloc_zero(ctdb, struct client_async_data);
|
||||
CTDB_NO_MEMORY_FATAL(ctdb, async_data);
|
||||
async_data->dont_log_errors = dont_log_errors;
|
||||
async_data->callback = client_callback;
|
||||
|
||||
num_nodes = talloc_get_size(nodes) / sizeof(uint32_t);
|
||||
|
||||
@ -2857,3 +2879,44 @@ ctdb_read_pnn_lock(int fd, int32_t pnn)
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
get capabilities of a remote node
|
||||
*/
|
||||
struct ctdb_client_control_state *
|
||||
ctdb_ctrl_getcapabilities_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
|
||||
{
|
||||
return ctdb_control_send(ctdb, destnode, 0,
|
||||
CTDB_CONTROL_GET_CAPABILITIES, 0, tdb_null,
|
||||
mem_ctx, &timeout, NULL);
|
||||
}
|
||||
|
||||
int ctdb_ctrl_getcapabilities_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *capabilities)
|
||||
{
|
||||
int ret;
|
||||
int32_t res;
|
||||
TDB_DATA outdata;
|
||||
|
||||
ret = ctdb_control_recv(ctdb, state, mem_ctx, &outdata, &res, NULL);
|
||||
if ( (ret != 0) || (res != 0) ) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_getcapabilities_recv failed\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (capabilities) {
|
||||
*capabilities = *((uint32_t *)outdata.dptr);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ctdb_ctrl_getcapabilities(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *capabilities)
|
||||
{
|
||||
struct ctdb_client_control_state *state;
|
||||
TALLOC_CTX *tmp_ctx = talloc_new(NULL);
|
||||
int ret;
|
||||
|
||||
state = ctdb_ctrl_getcapabilities_send(ctdb, tmp_ctx, timeout, destnode);
|
||||
ret = ctdb_ctrl_getcapabilities_recv(ctdb, tmp_ctx, state, capabilities);
|
||||
talloc_free(tmp_ctx);
|
||||
return ret;
|
||||
}
|
||||
|
@ -66,6 +66,12 @@ CTDB_OPTIONS="$CTDB_OPTIONS --reclock=$CTDB_RECOVERY_LOCK"
|
||||
[ -z "$CTDB_START_AS_DISABLED" ] || [ "$CTDB_START_AS_DISABLED" != "yes" ] || {
|
||||
CTDB_OPTIONS="$CTDB_OPTIONS --start-as-disabled"
|
||||
}
|
||||
[ -z "$CTDB_CAPABILITY_RECMASTER" ] || [ "$CTDB_CAPABILITY_RECMASTER" != "no" ] || {
|
||||
CTDB_OPTIONS="$CTDB_OPTIONS --no-recmaster"
|
||||
}
|
||||
[ -z "$CTDB_CAPABILITY_LMASTER" ] || [ "$CTDB_CAPABILITY_LMASTER" != "no" ] || {
|
||||
CTDB_OPTIONS="$CTDB_OPTIONS --no-lmaster"
|
||||
}
|
||||
|
||||
if [ -x /sbin/startproc ]; then
|
||||
init_style="suse"
|
||||
|
@ -91,6 +91,23 @@
|
||||
# the node with "ctdb enable"
|
||||
# CTDB_START_AS_DISABLED="yes"
|
||||
|
||||
# LMASTER and RECMASTER capabilities.
|
||||
# By default all nodes are capable of both being LMASTER for records and
|
||||
# also for taking the RECMASTER role and perform recovery.
|
||||
# These parameters can be used to disable these two roles on a node.
|
||||
# Note: If there are NO available nodes left in a cluster that can perform
|
||||
# the RECMASTER role, the cluster will not be able to recover from a failure
|
||||
# and will remain in RECOVERY mode until an RECMASTER capable node becomes
|
||||
# available. Same for LMASTER.
|
||||
# These parametersd are useful for scenarios where you have one "remote" node
|
||||
# in a cluster and you do not want the remote node to be fully participating
|
||||
# in the cluster and slow things down.
|
||||
# For that case, set both roles to "no" for the remote node on the remote site
|
||||
# but leave the roles default to "yes" on the primary nodes in the central
|
||||
# datacentre.
|
||||
# CTDB_CAPABILITY_RECMASTER=yes
|
||||
# CTDB_CAPABILITY_LMASTER=yes
|
||||
|
||||
# where to log messages
|
||||
# the default is /var/log/log.ctdb
|
||||
# CTDB_LOGFILE=/var/log/log.ctdb
|
||||
|
@ -536,4 +536,13 @@ uint32_t *list_of_vnnmap_nodes(struct ctdb_context *ctdb,
|
||||
|
||||
int ctdb_read_pnn_lock(int fd, int32_t pnn);
|
||||
|
||||
/*
|
||||
get capabilities of a remote node
|
||||
*/
|
||||
int ctdb_ctrl_getcapabilities(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *capabilities);
|
||||
|
||||
struct ctdb_client_control_state *ctdb_ctrl_getcapabilities_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode);
|
||||
|
||||
int ctdb_ctrl_getcapabilities_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *capabilities);
|
||||
|
||||
#endif
|
||||
|
@ -199,6 +199,11 @@ struct ctdb_node {
|
||||
uint32_t rx_cnt;
|
||||
uint32_t tx_cnt;
|
||||
|
||||
/* used to track node capabilities, is only valid/tracked inside the
|
||||
recovery daemon.
|
||||
*/
|
||||
uint32_t capabilities;
|
||||
|
||||
/* a list of controls pending to this node, so we can time them out quickly
|
||||
if the node becomes disconnected */
|
||||
struct daemon_control_state *pending_controls;
|
||||
@ -332,6 +337,10 @@ enum ctdb_freeze_mode {CTDB_FREEZE_NONE, CTDB_FREEZE_PENDING, CTDB_FREEZE_FROZEN
|
||||
#define CTDB_MONITORING_ACTIVE 0
|
||||
#define CTDB_MONITORING_DISABLED 1
|
||||
|
||||
/* The different capabilities of the ctdb daemon. */
|
||||
#define CTDB_CAP_RECMASTER 0x00000001
|
||||
#define CTDB_CAP_LMASTER 0x00000002
|
||||
|
||||
/* main state of the ctdb daemon */
|
||||
struct ctdb_context {
|
||||
struct event_context *ev;
|
||||
@ -356,6 +365,7 @@ struct ctdb_context {
|
||||
uint32_t num_nodes;
|
||||
uint32_t num_connected;
|
||||
unsigned flags;
|
||||
uint32_t capabilities;
|
||||
struct idr_context *idr;
|
||||
uint16_t idr_cnt;
|
||||
struct ctdb_node **nodes; /* array of nodes in the cluster - indexed by vnn */
|
||||
@ -513,6 +523,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0,
|
||||
CTDB_CONTROL_ADD_PUBLIC_IP = 77,
|
||||
CTDB_CONTROL_DEL_PUBLIC_IP = 78,
|
||||
CTDB_CONTROL_RUN_EVENTSCRIPTS = 79,
|
||||
CTDB_CONTROL_GET_CAPABILITIES = 80,
|
||||
};
|
||||
|
||||
/*
|
||||
@ -1270,10 +1281,13 @@ int32_t ctdb_monitoring_mode(struct ctdb_context *ctdb);
|
||||
int ctdb_set_child_logging(struct ctdb_context *ctdb);
|
||||
|
||||
|
||||
typedef void (*client_async_callback)(struct ctdb_context *ctdb, uint32_t node_pnn, int32_t res, TDB_DATA outdata);
|
||||
|
||||
struct client_async_data {
|
||||
bool dont_log_errors;
|
||||
uint32_t count;
|
||||
uint32_t fail_count;
|
||||
client_async_callback callback;
|
||||
};
|
||||
void ctdb_client_async_add(struct client_async_data *data, struct ctdb_client_control_state *state);
|
||||
int ctdb_client_async_wait(struct ctdb_context *ctdb, struct client_async_data *data);
|
||||
@ -1282,12 +1296,14 @@ int ctdb_client_async_control(struct ctdb_context *ctdb,
|
||||
uint32_t *nodes,
|
||||
struct timeval timeout,
|
||||
bool dont_log_errors,
|
||||
TDB_DATA data);
|
||||
TDB_DATA data,
|
||||
client_async_callback client_callback);
|
||||
|
||||
void ctdb_load_nodes_file(struct ctdb_context *ctdb);
|
||||
|
||||
int ctdb_control_reload_nodes_file(struct ctdb_context *ctdb, uint32_t opcode);
|
||||
|
||||
int32_t ctdb_dump_memory(struct ctdb_context *ctdb, TDB_DATA *outdata);
|
||||
int32_t ctdb_control_get_capabilities(struct ctdb_context *ctdb, TDB_DATA *outdata);
|
||||
|
||||
#endif
|
||||
|
@ -5,7 +5,7 @@ Vendor: Samba Team
|
||||
Packager: Samba Team <samba@samba.org>
|
||||
Name: ctdb
|
||||
Version: 1.0
|
||||
Release: 33
|
||||
Release: 35
|
||||
Epoch: 0
|
||||
License: GNU GPL version 3
|
||||
Group: System Environment/Daemons
|
||||
@ -120,6 +120,36 @@ fi
|
||||
%{_includedir}/ctdb_private.h
|
||||
|
||||
%changelog
|
||||
* Wed May 7 2008 : Version 1.0.35
|
||||
- During recovery, when we define the new set of lmasters (vnnmap)
|
||||
only consider those nodes that have the can-be-lmaster capability
|
||||
when we create the vnnmap. unless there are no nodes available which
|
||||
supports this capability in which case we allow the recmaster to
|
||||
become lmaster capable (temporarily).
|
||||
- Extend the async framework so that we can use paralell async calls
|
||||
to controls that return data.
|
||||
- If we do not have the "can be recmaster" capability, make sure we will
|
||||
lose any recmaster elections, unless there are no nodes available that
|
||||
have the capability, in which case we "take/win" the election anyway.
|
||||
- Close and reopen the reclock pnn file at regular intervals.
|
||||
Make it a non-fatal event if we occasionally fail to open/read/write
|
||||
to this file.
|
||||
- Monitor that the recovery daemon is still running from the main ctdb
|
||||
daemon and shutdown the main daemon when recovery daemon has terminated.
|
||||
- Add a "ctdb getcapabilities" command to read the capabilities off a node.
|
||||
- Define two new capabilities : can be recmaster and can be lmaster
|
||||
and default both capabilities to YES.
|
||||
- Log denied tcp connection attempts with DEBUG_ERR and not DEBUG_WARNING
|
||||
* Thu Apr 24 2008 : Version 1.0.34
|
||||
- When deleting a public ip from a node, try to migrate the ip to a different
|
||||
node first.
|
||||
- Change catdb to produce output similar to tdbdump
|
||||
- When adding a new public ip address, if this ip does not exist yet in
|
||||
the cluster, then grab the ip on the local node and activate it.
|
||||
- When a node disagrees with the recmaster on WHO is the recmaster, then
|
||||
mark that node as a recovery culprit so it will eventually become
|
||||
banned.
|
||||
- Make ctdb eventscript support the -n all argument.
|
||||
* Thu Apr 10 2008 : Version 1.0.33
|
||||
- Add facilities to include site local adaptations to the eventscript
|
||||
by /etc/ctdb/rc.local which will be read by all eventscripts.
|
||||
|
@ -389,6 +389,9 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
|
||||
case CTDB_CONTROL_DEL_PUBLIC_IP:
|
||||
return ctdb_control_del_public_address(ctdb, indata);
|
||||
|
||||
case CTDB_CONTROL_GET_CAPABILITIES:
|
||||
return ctdb_control_get_capabilities(ctdb, outdata);
|
||||
|
||||
default:
|
||||
DEBUG(DEBUG_CRIT,(__location__ " Unknown CTDB control opcode %u\n", opcode));
|
||||
return -1;
|
||||
|
@ -957,3 +957,21 @@ int32_t ctdb_control_try_delete_records(struct ctdb_context *ctdb, TDB_DATA inda
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
report capabilities
|
||||
*/
|
||||
int32_t ctdb_control_get_capabilities(struct ctdb_context *ctdb, TDB_DATA *outdata)
|
||||
{
|
||||
uint32_t *capabilities = NULL;
|
||||
|
||||
capabilities = talloc(outdata, uint32_t);
|
||||
CTDB_NO_MEMORY(ctdb, capabilities);
|
||||
*capabilities = ctdb->capabilities;
|
||||
|
||||
outdata->dsize = sizeof(uint32_t);
|
||||
outdata->dptr = (uint8_t *)capabilities;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -212,7 +212,7 @@ static int run_recovered_eventscript(struct ctdb_context *ctdb, struct ctdb_node
|
||||
|
||||
if (ctdb_client_async_control(ctdb, CTDB_CONTROL_END_RECOVERY,
|
||||
list_of_active_nodes(ctdb, nodemap, tmp_ctx, true),
|
||||
CONTROL_TIMEOUT(), false, tdb_null) != 0) {
|
||||
CONTROL_TIMEOUT(), false, tdb_null, NULL) != 0) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Unable to run the 'recovered' event. Recovery failed.\n"));
|
||||
talloc_free(tmp_ctx);
|
||||
return -1;
|
||||
@ -234,7 +234,7 @@ static int run_startrecovery_eventscript(struct ctdb_context *ctdb, struct ctdb_
|
||||
|
||||
if (ctdb_client_async_control(ctdb, CTDB_CONTROL_START_RECOVERY,
|
||||
list_of_active_nodes(ctdb, nodemap, tmp_ctx, true),
|
||||
CONTROL_TIMEOUT(), false, tdb_null) != 0) {
|
||||
CONTROL_TIMEOUT(), false, tdb_null, NULL) != 0) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Unable to run the 'startrecovery' event. Recovery failed.\n"));
|
||||
talloc_free(tmp_ctx);
|
||||
return -1;
|
||||
@ -244,6 +244,40 @@ static int run_startrecovery_eventscript(struct ctdb_context *ctdb, struct ctdb_
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void async_getcap_callback(struct ctdb_context *ctdb, uint32_t node_pnn, int32_t res, TDB_DATA outdata)
|
||||
{
|
||||
if ( (outdata.dsize != sizeof(uint32_t)) || (outdata.dptr == NULL) ) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Invalid lenght/pointer for getcap callback : %d %p\n", outdata.dsize, outdata.dptr));
|
||||
return;
|
||||
}
|
||||
ctdb->nodes[node_pnn]->capabilities = *((uint32_t *)outdata.dptr);
|
||||
}
|
||||
|
||||
/*
|
||||
update the node capabilities for all connected nodes
|
||||
*/
|
||||
static int update_capabilities(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
|
||||
{
|
||||
uint32_t *nodes;
|
||||
TALLOC_CTX *tmp_ctx;
|
||||
|
||||
tmp_ctx = talloc_new(ctdb);
|
||||
CTDB_NO_MEMORY(ctdb, tmp_ctx);
|
||||
|
||||
nodes = list_of_active_nodes(ctdb, nodemap, tmp_ctx, true);
|
||||
|
||||
if (ctdb_client_async_control(ctdb, CTDB_CONTROL_GET_CAPABILITIES,
|
||||
nodes, CONTROL_TIMEOUT(),
|
||||
false, tdb_null, async_getcap_callback) != 0) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Failed to read node capabilities.\n"));
|
||||
talloc_free(tmp_ctx);
|
||||
return -1;
|
||||
}
|
||||
|
||||
talloc_free(tmp_ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
change recovery mode on all nodes
|
||||
*/
|
||||
@ -262,7 +296,7 @@ static int set_recovery_mode(struct ctdb_context *ctdb, struct ctdb_node_map *no
|
||||
if (rec_mode == CTDB_RECOVERY_ACTIVE) {
|
||||
if (ctdb_client_async_control(ctdb, CTDB_CONTROL_FREEZE,
|
||||
nodes, CONTROL_TIMEOUT(),
|
||||
false, tdb_null) != 0) {
|
||||
false, tdb_null, NULL) != 0) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Unable to freeze nodes. Recovery failed.\n"));
|
||||
talloc_free(tmp_ctx);
|
||||
return -1;
|
||||
@ -275,7 +309,7 @@ static int set_recovery_mode(struct ctdb_context *ctdb, struct ctdb_node_map *no
|
||||
|
||||
if (ctdb_client_async_control(ctdb, CTDB_CONTROL_SET_RECMODE,
|
||||
nodes, CONTROL_TIMEOUT(),
|
||||
false, data) != 0) {
|
||||
false, data, NULL) != 0) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Unable to set recovery mode. Recovery failed.\n"));
|
||||
talloc_free(tmp_ctx);
|
||||
return -1;
|
||||
@ -284,7 +318,7 @@ static int set_recovery_mode(struct ctdb_context *ctdb, struct ctdb_node_map *no
|
||||
if (rec_mode == CTDB_RECOVERY_NORMAL) {
|
||||
if (ctdb_client_async_control(ctdb, CTDB_CONTROL_THAW,
|
||||
nodes, CONTROL_TIMEOUT(),
|
||||
false, tdb_null) != 0) {
|
||||
false, tdb_null, NULL) != 0) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Unable to thaw nodes. Recovery failed.\n"));
|
||||
talloc_free(tmp_ctx);
|
||||
return -1;
|
||||
@ -311,7 +345,7 @@ static int set_recovery_master(struct ctdb_context *ctdb, struct ctdb_node_map *
|
||||
|
||||
if (ctdb_client_async_control(ctdb, CTDB_CONTROL_SET_RECMASTER,
|
||||
list_of_active_nodes(ctdb, nodemap, tmp_ctx, true),
|
||||
CONTROL_TIMEOUT(), false, data) != 0) {
|
||||
CONTROL_TIMEOUT(), false, data, NULL) != 0) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Unable to set recmaster. Recovery failed.\n"));
|
||||
talloc_free(tmp_ctx);
|
||||
return -1;
|
||||
@ -1142,7 +1176,7 @@ static int push_recdb_database(struct ctdb_context *ctdb, uint32_t dbid,
|
||||
|
||||
if (ctdb_client_async_control(ctdb, CTDB_CONTROL_PUSH_DB,
|
||||
list_of_active_nodes(ctdb, nodemap, tmp_ctx, true),
|
||||
CONTROL_TIMEOUT(), false, outdata) != 0) {
|
||||
CONTROL_TIMEOUT(), false, outdata, NULL) != 0) {
|
||||
DEBUG(DEBUG_ERR,(__location__ " Failed to push recdb records to nodes for db 0x%x\n", dbid));
|
||||
talloc_free(recdata);
|
||||
talloc_free(tmp_ctx);
|
||||
@ -1198,7 +1232,7 @@ static int recover_database(struct ctdb_recoverd *rec,
|
||||
|
||||
if (ctdb_client_async_control(ctdb, CTDB_CONTROL_WIPE_DATABASE,
|
||||
list_of_active_nodes(ctdb, nodemap, recdb, true),
|
||||
CONTROL_TIMEOUT(), false, data) != 0) {
|
||||
CONTROL_TIMEOUT(), false, data, NULL) != 0) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Unable to wipe database. Recovery failed.\n"));
|
||||
talloc_free(recdb);
|
||||
return -1;
|
||||
@ -1321,7 +1355,7 @@ static int do_recovery(struct ctdb_recoverd *rec,
|
||||
|
||||
if (ctdb_client_async_control(ctdb, CTDB_CONTROL_TRANSACTION_START,
|
||||
list_of_active_nodes(ctdb, nodemap, mem_ctx, true),
|
||||
CONTROL_TIMEOUT(), false, data) != 0) {
|
||||
CONTROL_TIMEOUT(), false, data, NULL) != 0) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Unable to start transactions. Recovery failed.\n"));
|
||||
return -1;
|
||||
}
|
||||
@ -1340,7 +1374,7 @@ static int do_recovery(struct ctdb_recoverd *rec,
|
||||
/* commit all the changes */
|
||||
if (ctdb_client_async_control(ctdb, CTDB_CONTROL_TRANSACTION_COMMIT,
|
||||
list_of_active_nodes(ctdb, nodemap, mem_ctx, true),
|
||||
CONTROL_TIMEOUT(), false, data) != 0) {
|
||||
CONTROL_TIMEOUT(), false, data, NULL) != 0) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Unable to commit recovery changes. Recovery failed.\n"));
|
||||
return -1;
|
||||
}
|
||||
@ -1348,19 +1382,45 @@ static int do_recovery(struct ctdb_recoverd *rec,
|
||||
DEBUG(DEBUG_NOTICE, (__location__ " Recovery - committed databases\n"));
|
||||
|
||||
|
||||
/* update the capabilities for all nodes */
|
||||
ret = update_capabilities(ctdb, nodemap);
|
||||
if (ret!=0) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Unable to update node capabilities.\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* build a new vnn map with all the currently active and
|
||||
unbanned nodes */
|
||||
generation = new_generation();
|
||||
vnnmap = talloc(mem_ctx, struct ctdb_vnn_map);
|
||||
CTDB_NO_MEMORY(ctdb, vnnmap);
|
||||
vnnmap->generation = generation;
|
||||
vnnmap->size = rec->num_active;
|
||||
vnnmap->size = 0;
|
||||
vnnmap->map = talloc_zero_array(vnnmap, uint32_t, vnnmap->size);
|
||||
CTDB_NO_MEMORY(ctdb, vnnmap->map);
|
||||
for (i=j=0;i<nodemap->num;i++) {
|
||||
if (!(nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE)) {
|
||||
vnnmap->map[j++] = nodemap->nodes[i].pnn;
|
||||
if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
|
||||
continue;
|
||||
}
|
||||
if (!(ctdb->nodes[i]->capabilities & CTDB_CAP_LMASTER)) {
|
||||
/* this node can not be an lmaster */
|
||||
DEBUG(DEBUG_DEBUG, ("Node %d cant be a LMASTER, skipping it\n", i));
|
||||
continue;
|
||||
}
|
||||
|
||||
vnnmap->size++;
|
||||
vnnmap->map = talloc_realloc_size(vnnmap, vnnmap->map, vnnmap->size);
|
||||
CTDB_NO_MEMORY(ctdb, vnnmap->map);
|
||||
vnnmap->map[j++] = nodemap->nodes[i].pnn;
|
||||
|
||||
}
|
||||
if (vnnmap->size == 0) {
|
||||
DEBUG(DEBUG_NOTICE, ("No suitable lmasters found. Adding local node (recmaster) anyway.\n"));
|
||||
vnnmap->size++;
|
||||
vnnmap->map = talloc_realloc_size(vnnmap, vnnmap->map, vnnmap->size);
|
||||
CTDB_NO_MEMORY(ctdb, vnnmap->map);
|
||||
vnnmap->map[0] = pnn;
|
||||
}
|
||||
|
||||
/* update to the new vnnmap on all nodes */
|
||||
ret = update_vnnmap_on_all_nodes(ctdb, nodemap, pnn, vnnmap, mem_ctx);
|
||||
@ -1481,6 +1541,13 @@ static void ctdb_election_data(struct ctdb_recoverd *rec, struct election_messag
|
||||
em->num_connected++;
|
||||
}
|
||||
}
|
||||
|
||||
/* we shouldnt try to win this election if we cant be a recmaster */
|
||||
if ((ctdb->capabilities & CTDB_CAP_RECMASTER) == 0) {
|
||||
em->num_connected = 0;
|
||||
em->priority_time = timeval_current();
|
||||
}
|
||||
|
||||
talloc_free(nodemap);
|
||||
}
|
||||
|
||||
@ -1494,6 +1561,11 @@ static bool ctdb_election_win(struct ctdb_recoverd *rec, struct election_message
|
||||
|
||||
ctdb_election_data(rec, &myem);
|
||||
|
||||
/* we cant win if we dont have the recmaster capability */
|
||||
if ((rec->ctdb->capabilities & CTDB_CAP_RECMASTER) == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* we cant win if we are banned */
|
||||
if (rec->node_flags & NODE_FLAGS_BANNED) {
|
||||
return false;
|
||||
@ -2017,8 +2089,15 @@ ctdb_recoverd_write_pnn_connect_count(struct ctdb_recoverd *rec)
|
||||
const char count = rec->num_connected;
|
||||
struct ctdb_context *ctdb = talloc_get_type(rec->ctdb, struct ctdb_context);
|
||||
|
||||
if (rec->rec_file_fd == -1) {
|
||||
DEBUG(DEBUG_CRIT,(__location__ " Unable to write pnn count. pnnfile is not open.\n"));
|
||||
return;
|
||||
}
|
||||
|
||||
if (pwrite(rec->rec_file_fd, &count, 1, ctdb->pnn) == -1) {
|
||||
DEBUG(DEBUG_CRIT, (__location__ " Failed to write pnn count\n"));
|
||||
close(rec->rec_file_fd);
|
||||
rec->rec_file_fd = -1;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2038,8 +2117,8 @@ ctdb_recoverd_get_pnn_lock(struct ctdb_recoverd *rec)
|
||||
DEBUG(DEBUG_INFO, ("Setting PNN lock for pnn:%d\n", ctdb->pnn));
|
||||
|
||||
if (rec->rec_file_fd != -1) {
|
||||
DEBUG(DEBUG_CRIT, (__location__ " rec_lock_fd is already open. Aborting\n"));
|
||||
exit(10);
|
||||
close(rec->rec_file_fd);
|
||||
rec->rec_file_fd = -1;
|
||||
}
|
||||
|
||||
pnnfile = talloc_asprintf(rec, "%s.pnn", ctdb->recovery_lock_file);
|
||||
@ -2049,7 +2128,8 @@ ctdb_recoverd_get_pnn_lock(struct ctdb_recoverd *rec)
|
||||
if (rec->rec_file_fd == -1) {
|
||||
DEBUG(DEBUG_CRIT,(__location__ " Unable to open %s - (%s)\n",
|
||||
pnnfile, strerror(errno)));
|
||||
exit(10);
|
||||
talloc_free(pnnfile);
|
||||
return;
|
||||
}
|
||||
|
||||
set_close_on_exec(rec->rec_file_fd);
|
||||
@ -2063,12 +2143,12 @@ ctdb_recoverd_get_pnn_lock(struct ctdb_recoverd *rec)
|
||||
close(rec->rec_file_fd);
|
||||
rec->rec_file_fd = -1;
|
||||
DEBUG(DEBUG_CRIT,(__location__ " Failed to get pnn lock on '%s'\n", pnnfile));
|
||||
exit(10);
|
||||
talloc_free(pnnfile);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
DEBUG(DEBUG_NOTICE,(__location__ " Got pnn lock on '%s'\n", pnnfile));
|
||||
|
||||
talloc_free(pnnfile);
|
||||
|
||||
/* we start out with 0 connected nodes */
|
||||
@ -2086,6 +2166,9 @@ static void ctdb_update_pnn_count(struct event_context *ev, struct timed_event *
|
||||
struct ctdb_context *ctdb = rec->ctdb;
|
||||
struct ctdb_node_map *nodemap = rec->nodemap;
|
||||
|
||||
/* close and reopen the pnn lock file */
|
||||
ctdb_recoverd_get_pnn_lock(rec);
|
||||
|
||||
ctdb_recoverd_write_pnn_connect_count(rec);
|
||||
|
||||
event_add_timed(rec->ctdb->ev, rec->ctdb,
|
||||
@ -2108,6 +2191,10 @@ static void ctdb_update_pnn_count(struct event_context *ev, struct timed_event *
|
||||
return;
|
||||
}
|
||||
if (ctdb->recovery_lock_fd == -1) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Lost reclock pnn file. Yielding recmaster role\n"));
|
||||
close(ctdb->recovery_lock_fd);
|
||||
ctdb->recovery_lock_fd = -1;
|
||||
force_election(rec, ctdb->pnn, rec->nodemap);
|
||||
return;
|
||||
}
|
||||
for (i=0; i<nodemap->num; i++) {
|
||||
@ -2687,6 +2774,37 @@ static void ctdb_recoverd_parent(struct event_context *ev, struct fd_event *fde,
|
||||
_exit(1);
|
||||
}
|
||||
|
||||
/*
|
||||
called regularly to verify that the recovery daemon is still running
|
||||
*/
|
||||
static void ctdb_check_recd(struct event_context *ev, struct timed_event *te,
|
||||
struct timeval yt, void *p)
|
||||
{
|
||||
struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context);
|
||||
|
||||
/* make sure we harvest the child if signals are blocked for some
|
||||
reason
|
||||
*/
|
||||
waitpid(ctdb->recoverd_pid, 0, WNOHANG);
|
||||
|
||||
if (kill(ctdb->recoverd_pid, 0) != 0) {
|
||||
DEBUG(DEBUG_ERR,("Recovery daemon (pid:%d) is no longer running. Shutting down main daemon\n", (int)ctdb->recoverd_pid));
|
||||
|
||||
ctdb_stop_recoverd(ctdb);
|
||||
ctdb_stop_keepalive(ctdb);
|
||||
ctdb_stop_monitoring(ctdb);
|
||||
ctdb_release_all_ips(ctdb);
|
||||
ctdb->methods->shutdown(ctdb);
|
||||
ctdb_event_script(ctdb, "shutdown");
|
||||
|
||||
exit(10);
|
||||
}
|
||||
|
||||
event_add_timed(ctdb->ev, ctdb,
|
||||
timeval_current_ofs(30, 0),
|
||||
ctdb_check_recd, ctdb);
|
||||
}
|
||||
|
||||
/*
|
||||
startup the recovery daemon as a child of the main ctdb daemon
|
||||
*/
|
||||
@ -2708,6 +2826,9 @@ int ctdb_start_recoverd(struct ctdb_context *ctdb)
|
||||
|
||||
if (ctdb->recoverd_pid != 0) {
|
||||
close(fd[0]);
|
||||
event_add_timed(ctdb->ev, ctdb,
|
||||
timeval_current_ofs(30, 0),
|
||||
ctdb_check_recd, ctdb);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -43,6 +43,8 @@ static struct {
|
||||
int no_setsched;
|
||||
int use_syslog;
|
||||
int start_as_disabled;
|
||||
int no_lmaster;
|
||||
int no_recmaster;
|
||||
} options = {
|
||||
.nlist = ETCDIR "/ctdb/nodes",
|
||||
.transport = "tcp",
|
||||
@ -120,6 +122,8 @@ int main(int argc, const char *argv[])
|
||||
{ "nosetsched", 0, POPT_ARG_NONE, &options.no_setsched, 0, "disable setscheduler SCHED_FIFO call", NULL },
|
||||
{ "syslog", 0, POPT_ARG_NONE, &options.use_syslog, 0, "log messages to syslog", NULL },
|
||||
{ "start-as-disabled", 0, POPT_ARG_NONE, &options.start_as_disabled, 0, "Node starts in disabled state", NULL },
|
||||
{ "no-lmaster", 0, POPT_ARG_NONE, &options.no_lmaster, 0, "disable lmaster role on this node", NULL },
|
||||
{ "no-recmaster", 0, POPT_ARG_NONE, &options.no_recmaster, 0, "disable recmaster role on this node", NULL },
|
||||
POPT_TABLEEND
|
||||
};
|
||||
int opt, ret;
|
||||
@ -200,6 +204,15 @@ int main(int argc, const char *argv[])
|
||||
}
|
||||
}
|
||||
|
||||
/* set ctdbd capabilities */
|
||||
ctdb->capabilities = 0;
|
||||
if (options.no_lmaster == 0) {
|
||||
ctdb->capabilities |= CTDB_CAP_LMASTER;
|
||||
}
|
||||
if (options.no_recmaster == 0) {
|
||||
ctdb->capabilities |= CTDB_CAP_RECMASTER;
|
||||
}
|
||||
|
||||
/* tell ctdb what nodes are available */
|
||||
ctdb_load_nodes_file(ctdb);
|
||||
|
||||
|
@ -217,7 +217,7 @@ static void ctdb_listen_event(struct event_context *ev, struct fd_event *fde,
|
||||
nodeid = ctdb_ip_to_nodeid(ctdb, incoming_node);
|
||||
|
||||
if (nodeid == -1) {
|
||||
DEBUG(DEBUG_WARNING, ("Refused connection from unknown node %s\n", incoming_node));
|
||||
DEBUG(DEBUG_ERR, ("Refused connection from unknown node %s\n", incoming_node));
|
||||
close(fd);
|
||||
return;
|
||||
}
|
||||
|
@ -417,6 +417,40 @@ static int control_get_tickles(struct ctdb_context *ctdb, int argc, const char *
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* send a release ip to all nodes */
|
||||
static int control_send_release(struct ctdb_context *ctdb, uint32_t pnn,
|
||||
struct sockaddr_in *sin)
|
||||
{
|
||||
int ret;
|
||||
struct ctdb_public_ip pip;
|
||||
TDB_DATA data;
|
||||
struct ctdb_node_map *nodemap=NULL;
|
||||
|
||||
ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &nodemap);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* send a moveip message to the recovery master */
|
||||
pip.pnn = pnn;
|
||||
pip.sin.sin_family = AF_INET;
|
||||
pip.sin.sin_addr = sin->sin_addr;
|
||||
data.dsize = sizeof(pip);
|
||||
data.dptr = (unsigned char *)&pip;
|
||||
|
||||
|
||||
/* send release ip to all nodes */
|
||||
if (ctdb_client_async_control(ctdb, CTDB_CONTROL_RELEASE_IP,
|
||||
list_of_active_nodes(ctdb, nodemap, ctdb, true),
|
||||
TIMELIMIT(), false, data, NULL) != 0) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Unable to send 'ReleaseIP' to all nodes.\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
move/failover an ip address to a specific node
|
||||
*/
|
||||
@ -426,9 +460,6 @@ static int control_moveip(struct ctdb_context *ctdb, int argc, const char **argv
|
||||
struct sockaddr_in ip;
|
||||
uint32_t value;
|
||||
struct ctdb_all_public_ips *ips;
|
||||
struct ctdb_public_ip pip;
|
||||
TDB_DATA data;
|
||||
struct ctdb_node_map *nodemap=NULL;
|
||||
int i, ret;
|
||||
|
||||
if (argc < 2) {
|
||||
@ -442,12 +473,6 @@ static int control_moveip(struct ctdb_context *ctdb, int argc, const char **argv
|
||||
}
|
||||
|
||||
|
||||
ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, &nodemap);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (sscanf(argv[1], "%u", &pnn) != 1) {
|
||||
DEBUG(DEBUG_ERR, ("Badly formed pnn\n"));
|
||||
return -1;
|
||||
@ -496,47 +521,175 @@ static int control_moveip(struct ctdb_context *ctdb, int argc, const char **argv
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* send a moveip message to the recovery master */
|
||||
pip.pnn = pnn;
|
||||
pip.sin.sin_family = AF_INET;
|
||||
pip.sin.sin_addr = ips->ips[i].sin.sin_addr;
|
||||
data.dsize = sizeof(pip);
|
||||
data.dptr = (unsigned char *)&pip;
|
||||
|
||||
|
||||
/* send release ip to all nodes */
|
||||
if (ctdb_client_async_control(ctdb, CTDB_CONTROL_RELEASE_IP,
|
||||
list_of_active_nodes(ctdb, nodemap, ctdb, true),
|
||||
TIMELIMIT(), false, data) != 0) {
|
||||
DEBUG(DEBUG_ERR, (__location__ " Unable to send 'ReleaseIP' to all nodes.\n"));
|
||||
ret = control_send_release(ctdb, pnn, &ips->ips[i].sin);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR, ("Failed to send 'change ip' to all nodes\n"));;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct node_ip {
|
||||
uint32_t pnn;
|
||||
struct sockaddr_in sin;
|
||||
};
|
||||
|
||||
void getips_store_callback(void *param, void *data)
|
||||
{
|
||||
struct node_ip *node_ip = (struct node_ip *)data;
|
||||
struct ctdb_all_public_ips *ips = param;
|
||||
int i;
|
||||
|
||||
i = ips->num++;
|
||||
ips->ips[i].pnn = node_ip->pnn;
|
||||
ips->ips[i].sin = node_ip->sin;
|
||||
}
|
||||
|
||||
void getips_count_callback(void *param, void *data)
|
||||
{
|
||||
uint32_t *count = param;
|
||||
|
||||
(*count)++;
|
||||
}
|
||||
|
||||
static int
|
||||
control_get_all_public_ips(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx, struct ctdb_all_public_ips **ips)
|
||||
{
|
||||
struct ctdb_all_public_ips *tmp_ips;
|
||||
struct ctdb_node_map *nodemap=NULL;
|
||||
trbt_tree_t *tree;
|
||||
int i, j, len, ret;
|
||||
uint32_t count;
|
||||
|
||||
ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, tmp_ctx, &nodemap);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR, ("Unable to get nodemap from node %u\n", options.pnn));
|
||||
return ret;
|
||||
}
|
||||
|
||||
tree = trbt_create(tmp_ctx, 0);
|
||||
|
||||
for(i=0;i<nodemap->num;i++){
|
||||
if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* read the public ip list from this node */
|
||||
ret = ctdb_ctrl_get_public_ips(ctdb, TIMELIMIT(), nodemap->nodes[i].pnn, tmp_ctx, &tmp_ips);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR, ("Unable to get public ip list from node %u\n", nodemap->nodes[i].pnn));
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (j=0; j<tmp_ips->num;j++) {
|
||||
struct node_ip *node_ip;
|
||||
|
||||
node_ip = talloc(tmp_ctx, struct node_ip);
|
||||
node_ip->pnn = tmp_ips->ips[j].pnn;
|
||||
node_ip->sin = tmp_ips->ips[j].sin;
|
||||
|
||||
trbt_insert32(tree, tmp_ips->ips[j].sin.sin_addr.s_addr, node_ip);
|
||||
}
|
||||
talloc_free(tmp_ips);
|
||||
}
|
||||
|
||||
/* traverse */
|
||||
count = 0;
|
||||
trbt_traversearray32(tree, 1, getips_count_callback, &count);
|
||||
|
||||
len = offsetof(struct ctdb_all_public_ips, ips) +
|
||||
count*sizeof(struct ctdb_public_ip);
|
||||
tmp_ips = talloc_zero_size(tmp_ctx, len);
|
||||
trbt_traversearray32(tree, 1, getips_store_callback, tmp_ips);
|
||||
|
||||
*ips = tmp_ips;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* scans all other nodes and returns a pnn for another node that can host this
|
||||
* ip address or -1
|
||||
*/
|
||||
static int
|
||||
find_other_host_for_public_ip(struct ctdb_context *ctdb, struct sockaddr_in *addr)
|
||||
{
|
||||
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
|
||||
struct ctdb_all_public_ips *ips;
|
||||
struct ctdb_node_map *nodemap=NULL;
|
||||
int i, j, ret;
|
||||
|
||||
ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, tmp_ctx, &nodemap);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR, ("Unable to get nodemap from node %u\n", options.pnn));
|
||||
talloc_free(tmp_ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
for(i=0;i<nodemap->num;i++){
|
||||
if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
|
||||
continue;
|
||||
}
|
||||
if (nodemap->nodes[i].pnn == options.pnn) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* read the public ip list from this node */
|
||||
ret = ctdb_ctrl_get_public_ips(ctdb, TIMELIMIT(), nodemap->nodes[i].pnn, tmp_ctx, &ips);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR, ("Unable to get public ip list from node %u\n", nodemap->nodes[i].pnn));
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (j=0;j<ips->num;j++) {
|
||||
if (ctdb_same_ip(addr, &ips->ips[j].sin)) {
|
||||
talloc_free(tmp_ctx);
|
||||
return nodemap->nodes[i].pnn;
|
||||
}
|
||||
}
|
||||
talloc_free(ips);
|
||||
}
|
||||
|
||||
talloc_free(tmp_ctx);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
add a public ip address to a node
|
||||
*/
|
||||
static int control_addip(struct ctdb_context *ctdb, int argc, const char **argv)
|
||||
{
|
||||
int ret;
|
||||
int i, ret;
|
||||
int len;
|
||||
unsigned mask;
|
||||
struct sockaddr_in addr;
|
||||
struct ctdb_control_ip_iface *pub;
|
||||
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
|
||||
struct ctdb_all_public_ips *ips;
|
||||
|
||||
if (argc != 2) {
|
||||
talloc_free(tmp_ctx);
|
||||
usage();
|
||||
}
|
||||
|
||||
if (!parse_ip_mask(argv[0], &addr, &mask)) {
|
||||
DEBUG(DEBUG_ERR, ("Badly formed ip/mask : %s\n", argv[0]));
|
||||
talloc_free(tmp_ctx);
|
||||
return -1;
|
||||
}
|
||||
|
||||
ret = control_get_all_public_ips(ctdb, tmp_ctx, &ips);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR, ("Unable to get public ip list from cluster\n"));
|
||||
talloc_free(tmp_ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
len = offsetof(struct ctdb_control_ip_iface, iface) + strlen(argv[1]) + 1;
|
||||
pub = talloc_size(ctdb, len);
|
||||
pub = talloc_size(tmp_ctx, len);
|
||||
CTDB_NO_MEMORY(ctdb, pub);
|
||||
|
||||
pub->sin = addr;
|
||||
@ -547,9 +700,32 @@ static int control_addip(struct ctdb_context *ctdb, int argc, const char **argv)
|
||||
ret = ctdb_ctrl_add_public_ip(ctdb, TIMELIMIT(), options.pnn, pub);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR, ("Unable to add public ip to node %u\n", options.pnn));
|
||||
talloc_free(tmp_ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/* check if some other node is already serving this ip, if not,
|
||||
* we will claim it
|
||||
*/
|
||||
for (i=0;i<ips->num;i++) {
|
||||
if (ctdb_same_ip(&addr, &ips->ips[i].sin)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* no one has this ip so we claim it */
|
||||
if (i == ips->num) {
|
||||
ret = control_send_release(ctdb, options.pnn, &addr);
|
||||
} else {
|
||||
ret = control_send_release(ctdb, ips->ips[i].pnn, &addr);
|
||||
}
|
||||
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR, ("Failed to send 'change ip' to all nodes\n"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
talloc_free(tmp_ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -558,11 +734,14 @@ static int control_addip(struct ctdb_context *ctdb, int argc, const char **argv)
|
||||
*/
|
||||
static int control_delip(struct ctdb_context *ctdb, int argc, const char **argv)
|
||||
{
|
||||
int ret;
|
||||
int i, ret;
|
||||
struct sockaddr_in addr;
|
||||
struct ctdb_control_ip_iface pub;
|
||||
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
|
||||
struct ctdb_all_public_ips *ips;
|
||||
|
||||
if (argc != 1) {
|
||||
talloc_free(tmp_ctx);
|
||||
usage();
|
||||
}
|
||||
|
||||
@ -576,12 +755,44 @@ static int control_delip(struct ctdb_context *ctdb, int argc, const char **argv)
|
||||
pub.mask = 0;
|
||||
pub.len = 0;
|
||||
|
||||
ret = ctdb_ctrl_get_public_ips(ctdb, TIMELIMIT(), options.pnn, tmp_ctx, &ips);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR, ("Unable to get public ip list from cluster\n"));
|
||||
talloc_free(tmp_ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
for (i=0;i<ips->num;i++) {
|
||||
if (ctdb_same_ip(&addr, &ips->ips[i].sin)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (i==ips->num) {
|
||||
DEBUG(DEBUG_ERR, ("This node does not support this public address '%s'\n",
|
||||
inet_ntoa(addr.sin_addr)));
|
||||
talloc_free(tmp_ctx);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (ips->ips[i].pnn == options.pnn) {
|
||||
ret = find_other_host_for_public_ip(ctdb, &addr);
|
||||
if (ret != -1) {
|
||||
ret = control_send_release(ctdb, ret, &addr);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR, ("Failed to migrate this ip to another node. Use moveip of recover to reassign this address to a node\n"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ret = ctdb_ctrl_del_public_ip(ctdb, TIMELIMIT(), options.pnn, &pub);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR, ("Unable to del public ip from node %u\n", options.pnn));
|
||||
talloc_free(tmp_ctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
talloc_free(tmp_ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1069,6 +1280,32 @@ static int control_getmonmode(struct ctdb_context *ctdb, int argc, const char **
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
display capabilities of a remote node
|
||||
*/
|
||||
static int control_getcapabilities(struct ctdb_context *ctdb, int argc, const char **argv)
|
||||
{
|
||||
uint32_t capabilities;
|
||||
int ret;
|
||||
|
||||
ret = ctdb_ctrl_getcapabilities(ctdb, TIMELIMIT(), options.pnn, &capabilities);
|
||||
if (ret != 0) {
|
||||
DEBUG(DEBUG_ERR, ("Unable to get capabilities from node %u\n", options.pnn));
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!options.machinereadable){
|
||||
printf("RECMASTER: %s\n", (capabilities&CTDB_CAP_RECMASTER)?"YES":"NO");
|
||||
printf("LMASTER: %s\n", (capabilities&CTDB_CAP_LMASTER)?"YES":"NO");
|
||||
} else {
|
||||
printf(":RECMASTER:LMASTER:\n");
|
||||
printf(":%d:%d:\n",
|
||||
!!(capabilities&CTDB_CAP_RECMASTER),
|
||||
!!(capabilities&CTDB_CAP_LMASTER));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
disable monitoring on a node
|
||||
*/
|
||||
@ -1675,6 +1912,7 @@ static const struct {
|
||||
{ "getdbmap", control_getdbmap, true, "show the database map" },
|
||||
{ "catdb", control_catdb, true, "dump a database" , "<dbname>"},
|
||||
{ "getmonmode", control_getmonmode, true, "show monitoring mode" },
|
||||
{ "getcapabilities", control_getcapabilities, true, "show node capabilities" },
|
||||
{ "disablemonitor", control_disable_monmode, true, "set monitoring mode to DISABLE" },
|
||||
{ "enablemonitor", control_enable_monmode, true, "set monitoring mode to ACTIVE" },
|
||||
{ "setdebug", control_setdebug, true, "set debug level", "<EMERG|ALERT|CRIT|ERR|WARNING|NOTICE|INFO|DEBUG>" },
|
||||
@ -1707,7 +1945,7 @@ static const struct {
|
||||
{ "reloadnodes", control_reload_nodes_file, false, "reload the nodes file and restart the transport on all nodes"},
|
||||
{ "getreclock", control_getreclock, false, "get the path to the reclock file" },
|
||||
{ "moveip", control_moveip, false, "move/failover an ip address to another node", "<ip> <node>"},
|
||||
{ "addip", control_addip, false, "add a ip address to a node", "<ip/mask> <iface>"},
|
||||
{ "addip", control_addip, true, "add a ip address to a node", "<ip/mask> <iface>"},
|
||||
{ "delip", control_delip, false, "delete an ip address from a node", "<ip>"},
|
||||
{ "eventscript", control_eventscript, true, "run the eventscript with the given parameters on a node", "<arguments>"},
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user