1
0
mirror of https://github.com/samba-team/samba.git synced 2025-02-04 17:47:26 +03:00

merge from ronnie

(This used to be ctdb commit ab11fd70cf4d2165a5b55930cbad6fddf5397f54)
This commit is contained in:
Andrew Tridgell 2007-08-27 18:04:53 +10:00
commit 8c94d4dc87
17 changed files with 1154 additions and 220 deletions

View File

@ -48,6 +48,7 @@ CTDB_SERVER_OBJ = server/ctdbd.o server/ctdb_daemon.o server/ctdb_lockwait.o \
server/ctdb_tunables.o server/ctdb_monitor.o server/ctdb_server.o \
server/ctdb_control.o server/ctdb_call.o server/ctdb_ltdb_server.o \
server/ctdb_traverse.o server/eventscript.o server/ctdb_takeover.o \
server/ctdb_serverids.o \
$(CTDB_CLIENT_OBJ) $(CTDB_TCP_OBJ) @INFINIBAND_WRAPPER_OBJ@
TEST_BINS=bin/ctdb_bench bin/ctdb_fetch bin/ctdb_store bin/rb_test \

View File

@ -301,6 +301,10 @@ struct ctdb_record_handle {
*/
int ctdb_call_recv(struct ctdb_client_call_state *state, struct ctdb_call *call)
{
if (state == NULL) {
return -1;
}
while (state->state < CTDB_CALL_DONE) {
event_loop_once(state->ctdb_db->ctdb->ev);
}
@ -661,14 +665,28 @@ int ctdb_fetch(struct ctdb_db_context *ctdb_db, TALLOC_CTX *mem_ctx,
}
struct ctdb_client_control_state {
struct ctdb_context *ctdb;
uint32_t reqid;
int32_t status;
TDB_DATA outdata;
enum call_state state;
char *errormsg;
};
/*
called when a control completes or timesout to invoke the callback
function the user provided
*/
static void invoke_control_callback(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data)
{
struct ctdb_client_control_state *state;
TALLOC_CTX *tmp_ctx = talloc_new(NULL);
int ret;
state = talloc_get_type(private_data, struct ctdb_client_control_state);
talloc_steal(tmp_ctx, state);
ret = ctdb_control_recv(state->ctdb, state, state,
NULL,
NULL,
NULL);
talloc_free(tmp_ctx);
}
/*
called when a CTDB_REPLY_CONTROL packet comes in in the client
@ -703,21 +721,22 @@ static void ctdb_client_reply_control(struct ctdb_context *ctdb,
c->errorlen);
}
/* state->outdata now uses resources from c so we dont want c
to just dissappear from under us while state is still alive
*/
talloc_steal(state, c);
state->state = CTDB_CALL_DONE;
state->state = CTDB_CONTROL_DONE;
/* if we had a callback registered for this control, pull the response
and call the callback.
*/
if (state->async.fn) {
event_add_timed(ctdb->ev, state, timeval_zero(), invoke_control_callback, state);
}
}
/* time out handler for ctdb_control */
static void timeout_func(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data)
{
uint32_t *timed_out = (uint32_t *)private_data;
*timed_out = 1;
}
/*
destroy a ctdb_control in client
*/
@ -727,6 +746,152 @@ static int ctdb_control_destructor(struct ctdb_client_control_state *state)
return 0;
}
/* time out handler for ctdb_control */
static void control_timeout_func(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data)
{
struct ctdb_client_control_state *state = talloc_get_type(private_data, struct ctdb_client_control_state);
DEBUG(0,("control timed out. reqid:%d opcode:%d dstnode:%d\n", state->reqid, state->c->opcode, state->c->hdr.destnode));
state->state = CTDB_CONTROL_TIMEOUT;
/* if we had a callback registered for this control, pull the response
and call the callback.
*/
if (state->async.fn) {
event_add_timed(state->ctdb->ev, state, timeval_zero(), invoke_control_callback, state);
}
}
/* async version of send control request */
struct ctdb_client_control_state *ctdb_control_send(struct ctdb_context *ctdb,
uint32_t destnode, uint64_t srvid,
uint32_t opcode, uint32_t flags, TDB_DATA data,
TALLOC_CTX *mem_ctx, TDB_DATA *outdata,
struct timeval *timeout,
char **errormsg)
{
struct ctdb_client_control_state *state;
size_t len;
struct ctdb_req_control *c;
int ret;
if (errormsg) {
*errormsg = NULL;
}
/* if the domain socket is not yet open, open it */
if (ctdb->daemon.sd==-1) {
ctdb_socket_connect(ctdb);
}
state = talloc_zero(mem_ctx, struct ctdb_client_control_state);
CTDB_NO_MEMORY_NULL(ctdb, state);
state->ctdb = ctdb;
state->reqid = ctdb_reqid_new(ctdb, state);
state->state = CTDB_CONTROL_WAIT;
state->errormsg = NULL;
talloc_set_destructor(state, ctdb_control_destructor);
len = offsetof(struct ctdb_req_control, data) + data.dsize;
c = ctdbd_allocate_pkt(ctdb, state, CTDB_REQ_CONTROL,
len, struct ctdb_req_control);
state->c = c;
CTDB_NO_MEMORY_NULL(ctdb, c);
c->hdr.reqid = state->reqid;
c->hdr.destnode = destnode;
c->hdr.reqid = state->reqid;
c->opcode = opcode;
c->client_id = 0;
c->flags = flags;
c->srvid = srvid;
c->datalen = data.dsize;
if (data.dsize) {
memcpy(&c->data[0], data.dptr, data.dsize);
}
/* timeout */
if (timeout && !timeval_is_zero(timeout)) {
event_add_timed(ctdb->ev, state, *timeout, control_timeout_func, state);
}
ret = ctdb_client_queue_pkt(ctdb, &(c->hdr));
if (ret != 0) {
talloc_free(state);
return NULL;
}
if (flags & CTDB_CTRL_FLAG_NOREPLY) {
talloc_free(state);
return NULL;
}
return state;
}
/* async version of receive control reply */
int ctdb_control_recv(struct ctdb_context *ctdb,
struct ctdb_client_control_state *state,
TALLOC_CTX *mem_ctx,
TDB_DATA *outdata, int32_t *status, char **errormsg)
{
if (state == NULL) {
return -1;
}
/* loop one event at a time until we either timeout or the control
completes.
*/
while (state->state == CTDB_CONTROL_WAIT) {
event_loop_once(ctdb->ev);
}
if (state->state != CTDB_CONTROL_DONE) {
DEBUG(0,(__location__ " ctdb_control_recv failed\n"));
if (state->async.fn) {
state->async.fn(state);
}
talloc_free(state);
return -1;
}
if (state->errormsg) {
DEBUG(0,("ctdb_control error: '%s'\n", state->errormsg));
if (errormsg) {
(*errormsg) = talloc_move(mem_ctx, &state->errormsg);
}
if (state->async.fn) {
state->async.fn(state);
}
talloc_free(state);
return -1;
}
if (outdata) {
*outdata = state->outdata;
outdata->dptr = talloc_memdup(mem_ctx, outdata->dptr, outdata->dsize);
}
if (status) {
*status = state->status;
}
if (state->async.fn) {
state->async.fn(state);
}
talloc_free(state);
return 0;
}
/*
send a ctdb control message
timeout specifies how long we should wait for a reply.
@ -739,99 +904,17 @@ int ctdb_control(struct ctdb_context *ctdb, uint32_t destnode, uint64_t srvid,
char **errormsg)
{
struct ctdb_client_control_state *state;
struct ctdb_req_control *c;
size_t len;
int ret;
uint32_t timed_out;
if (errormsg) {
*errormsg = NULL;
}
/* if the domain socket is not yet open, open it */
if (ctdb->daemon.sd==-1) {
ctdb_socket_connect(ctdb);
}
state = talloc_zero(ctdb, struct ctdb_client_control_state);
CTDB_NO_MEMORY(ctdb, state);
state->ctdb = ctdb;
state->reqid = ctdb_reqid_new(ctdb, state);
state->state = CTDB_CALL_WAIT;
state->errormsg = NULL;
talloc_set_destructor(state, ctdb_control_destructor);
len = offsetof(struct ctdb_req_control, data) + data.dsize;
c = ctdbd_allocate_pkt(ctdb, state, CTDB_REQ_CONTROL,
len, struct ctdb_req_control);
CTDB_NO_MEMORY(ctdb, c);
c->hdr.reqid = state->reqid;
c->hdr.destnode = destnode;
c->hdr.reqid = state->reqid;
c->opcode = opcode;
c->client_id = 0;
c->flags = flags;
c->srvid = srvid;
c->datalen = data.dsize;
if (data.dsize) {
memcpy(&c->data[0], data.dptr, data.dsize);
}
ret = ctdb_client_queue_pkt(ctdb, &(c->hdr));
if (ret != 0) {
talloc_free(state);
return -1;
}
if (flags & CTDB_CTRL_FLAG_NOREPLY) {
talloc_free(state);
return 0;
}
/* semi-async operation */
timed_out = 0;
if (timeout && !timeval_is_zero(timeout)) {
event_add_timed(ctdb->ev, state, *timeout, timeout_func, &timed_out);
}
while ((state->state == CTDB_CALL_WAIT)
&& (timed_out == 0) ){
event_loop_once(ctdb->ev);
}
if (timed_out) {
talloc_free(state);
if (errormsg) {
(*errormsg) = talloc_strdup(mem_ctx, "control timed out");
} else {
DEBUG(0,("ctdb_control timed out\n"));
}
return -1;
}
if (outdata) {
*outdata = state->outdata;
outdata->dptr = talloc_memdup(mem_ctx, outdata->dptr, outdata->dsize);
}
*status = state->status;
if (!errormsg && state->errormsg) {
DEBUG(0,("ctdb_control error: '%s'\n", state->errormsg));
}
if (errormsg && state->errormsg) {
(*errormsg) = talloc_move(mem_ctx, &state->errormsg);
}
talloc_free(state);
return 0;
state = ctdb_control_send(ctdb, destnode, srvid, opcode,
flags, data, mem_ctx, outdata,
timeout, errormsg);
return ctdb_control_recv(ctdb, state, mem_ctx, outdata, status,
errormsg);
}
/*
a process exists call. Returns 0 if process exists, -1 otherwise
*/
@ -889,13 +972,12 @@ int ctdb_ctrl_statistics(struct ctdb_context *ctdb, uint32_t destnode, struct ct
*/
int ctdb_ctrl_shutdown(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
{
int ret;
int32_t res;
struct ctdb_client_control_state *state;
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_SHUTDOWN, CTDB_CTRL_FLAG_NOREPLY, tdb_null,
NULL, NULL, &res, &timeout, NULL);
if (ret != 0) {
state = ctdb_control_send(ctdb, destnode, 0,
CTDB_CONTROL_SHUTDOWN, 0, tdb_null,
NULL, NULL, &timeout, NULL);
if (state == NULL) {
DEBUG(0,(__location__ " ctdb_control for shutdown failed\n"));
return -1;
}
@ -941,27 +1023,47 @@ int ctdb_ctrl_getvnnmap(struct ctdb_context *ctdb, struct timeval timeout, uint3
return 0;
}
/*
get the recovery mode of a remote node
*/
int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *recmode)
struct ctdb_client_control_state *
ctdb_ctrl_getrecmode_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
{
return ctdb_control_send(ctdb, destnode, 0,
CTDB_CONTROL_GET_RECMODE, 0, tdb_null,
mem_ctx, NULL, &timeout, NULL);
}
int ctdb_ctrl_getrecmode_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *recmode)
{
int ret;
int32_t res;
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_GET_RECMODE, 0, tdb_null,
NULL, NULL, &res, &timeout, NULL);
ret = ctdb_control_recv(ctdb, state, mem_ctx, NULL, &res, NULL);
if (ret != 0) {
DEBUG(0,(__location__ " ctdb_control for getrecmode failed\n"));
DEBUG(0,(__location__ " ctdb_ctrl_getrecmode_recv failed\n"));
return -1;
}
*recmode = res;
if (recmode) {
*recmode = (uint32_t)res;
}
return 0;
}
int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t *recmode)
{
struct ctdb_client_control_state *state;
state = ctdb_ctrl_getrecmode_send(ctdb, mem_ctx, timeout, destnode);
return ctdb_ctrl_getrecmode_recv(ctdb, mem_ctx, state, recmode);
}
/*
set the recovery mode of a remote node
*/
@ -985,27 +1087,47 @@ int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint
return 0;
}
/*
get the recovery master of a remote node
*/
int ctdb_ctrl_getrecmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *recmaster)
struct ctdb_client_control_state *
ctdb_ctrl_getrecmaster_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx,
struct timeval timeout, uint32_t destnode)
{
return ctdb_control_send(ctdb, destnode, 0,
CTDB_CONTROL_GET_RECMASTER, 0, tdb_null,
mem_ctx, NULL, &timeout, NULL);
}
int ctdb_ctrl_getrecmaster_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *recmaster)
{
int ret;
int32_t res;
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_GET_RECMASTER, 0, tdb_null,
NULL, NULL, &res, &timeout, NULL);
ret = ctdb_control_recv(ctdb, state, mem_ctx, NULL, &res, NULL);
if (ret != 0) {
DEBUG(0,(__location__ " ctdb_control for getrecmaster failed\n"));
DEBUG(0,(__location__ " ctdb_ctrl_getrecmaster_recv failed\n"));
return -1;
}
*recmaster = res;
if (recmaster) {
*recmaster = (uint32_t)res;
}
return 0;
}
int ctdb_ctrl_getrecmaster(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t *recmaster)
{
struct ctdb_client_control_state *state;
state = ctdb_ctrl_getrecmaster_send(ctdb, mem_ctx, timeout, destnode);
return ctdb_ctrl_getrecmaster_recv(ctdb, mem_ctx, state, recmaster);
}
/*
set the recovery master of a remote node
*/
@ -1684,24 +1806,49 @@ int ctdb_ctrl_getpid(struct ctdb_context *ctdb, struct timeval timeout, uint32_t
/*
freeze a node
async freeze send control
*/
int ctdb_ctrl_freeze(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
struct ctdb_client_control_state *
ctdb_ctrl_freeze_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
{
return ctdb_control_send(ctdb, destnode, 0,
CTDB_CONTROL_FREEZE, 0, tdb_null,
mem_ctx, NULL, &timeout, NULL);
}
/*
async freeze recv control
*/
int ctdb_ctrl_freeze_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state)
{
int ret;
int32_t res;
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_FREEZE, 0, tdb_null,
NULL, NULL, &res, &timeout, NULL);
if (ret != 0 || res != 0) {
DEBUG(0,(__location__ " ctdb_control freeze failed\n"));
ret = ctdb_control_recv(ctdb, state, mem_ctx, NULL, &res, NULL);
if ( (ret != 0) || (res != 0) ){
DEBUG(0,(__location__ " ctdb_ctrl_freeze_recv failed\n"));
return -1;
}
return 0;
}
/*
freeze a node
*/
int ctdb_ctrl_freeze(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
{
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
struct ctdb_client_control_state *state;
int ret;
state = ctdb_ctrl_freeze_send(ctdb, tmp_ctx, timeout, destnode);
ret = ctdb_ctrl_freeze_recv(ctdb, tmp_ctx, state);
talloc_free(tmp_ctx);
return ret;
}
/*
thaw a node
*/
@ -2176,6 +2323,117 @@ int ctdb_ctrl_get_tcp_tickles(struct ctdb_context *ctdb,
return status;
}
/*
register a server id
*/
int ctdb_ctrl_register_server_id(struct ctdb_context *ctdb,
struct timeval timeout,
struct ctdb_server_id *id)
{
TDB_DATA data;
int32_t res;
int ret;
data.dsize = sizeof(struct ctdb_server_id);
data.dptr = (unsigned char *)id;
ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0,
CTDB_CONTROL_REGISTER_SERVER_ID,
0, data, NULL,
NULL, &res, &timeout, NULL);
if (ret != 0 || res != 0) {
DEBUG(0,(__location__ " ctdb_control for register server id failed\n"));
return -1;
}
return 0;
}
/*
unregister a server id
*/
int ctdb_ctrl_unregister_server_id(struct ctdb_context *ctdb,
struct timeval timeout,
struct ctdb_server_id *id)
{
TDB_DATA data;
int32_t res;
int ret;
data.dsize = sizeof(struct ctdb_server_id);
data.dptr = (unsigned char *)id;
ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0,
CTDB_CONTROL_UNREGISTER_SERVER_ID,
0, data, NULL,
NULL, &res, &timeout, NULL);
if (ret != 0 || res != 0) {
DEBUG(0,(__location__ " ctdb_control for unregister server id failed\n"));
return -1;
}
return 0;
}
/*
check if a server id exists
*/
int ctdb_ctrl_check_server_id(struct ctdb_context *ctdb,
struct timeval timeout,
uint32_t destnode,
struct ctdb_server_id *id,
uint32_t *status)
{
TDB_DATA data;
int32_t res;
int ret;
data.dsize = sizeof(struct ctdb_server_id);
data.dptr = (unsigned char *)id;
ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_CHECK_SERVER_ID,
0, data, NULL,
NULL, &res, &timeout, NULL);
if (ret != 0) {
DEBUG(0,(__location__ " ctdb_control for check server id failed\n"));
return -1;
}
if (res) {
*status = 1;
} else {
*status = 0;
}
return 0;
}
/*
get the list of server ids that are registered on a node
*/
int ctdb_ctrl_get_server_id_list(struct ctdb_context *ctdb,
TALLOC_CTX *mem_ctx,
struct timeval timeout, uint32_t destnode,
struct ctdb_server_id_list **svid_list)
{
int ret;
TDB_DATA outdata;
int32_t res;
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_GET_SERVER_ID_LIST, 0, tdb_null,
mem_ctx, &outdata, &res, &timeout, NULL);
if (ret != 0 || res != 0) {
DEBUG(0,(__location__ " ctdb_control for get_server_id_list failed\n"));
return -1;
}
*svid_list = (struct ctdb_server_id_list *)talloc_steal(mem_ctx, outdata.dptr);
return 0;
}
/*
initialise the ctdb daemon for client applications

View File

@ -23,6 +23,7 @@
#include "popt.h"
#include "../include/ctdb.h"
#include "../include/ctdb_private.h"
#include "../common/rb_tree.h"
/* Handle common command line options for ctdb test progs
*/
@ -87,6 +88,9 @@ struct ctdb_context *ctdb_cmdline_init(struct event_context *ev)
exit(1);
}
/* set up the tree to store server ids */
ctdb->server_ids = trbt_create(ctdb, 0);
return ctdb;
}

View File

@ -1,11 +1,11 @@
.\" Title: ctdb
.\" Author:
.\" Generator: DocBook XSL Stylesheets v1.71.0 <http://docbook.sf.net/>
.\" Date: 08/03/2007
.\" Date: 08/23/2007
.\" Manual:
.\" Source:
.\"
.TH "CTDB" "1" "08/03/2007" "" ""
.TH "CTDB" "1" "08/23/2007" "" ""
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@ -31,7 +31,7 @@ The virtual node number is an integer that describes the node in the cluster. Th
.PP
\-Y
.RS 3n
Produce output in machinereadable form for easier parsing by scripts. Not all commands support this option.
Produce output in machine readable form for easier parsing by scripts. Not all commands support this option.
.RE
.PP
\-t <timeout>
@ -78,7 +78,7 @@ Node status reflects the current status of the node. There are four possible sta
.PP
OK \- This node is fully functional.
.PP
DISCONNECTED \- This node could not be connected through the network and is currently not parcipitating in the cluster. If there is a public IP address associated with this node it should have been taken over by a different node. No services are running on this node.
DISCONNECTED \- This node could not be connected through the network and is currently not participating in the cluster. If there is a public IP address associated with this node it should have been taken over by a different node. No services are running on this node.
.PP
DISABLED \- This node has been administratively disabled. This node is still functional and participates in the CTDB cluster but its IP addresses have been taken over by a different node and no services are currently being hosted.
.PP
@ -104,7 +104,7 @@ The generation id is a number that indicates the current generation of a cluster
\fBVNNMAP\fR
.RS
.PP
The list of Virtual Node Numbers. This is a list of all nodes that actively participates in the cluster and that share the workload of hosting the Clustered TDB database records. Only nodes that are parcipitating in the vnnmap can become lmaster or dmaster for a database record.
The list of Virtual Node Numbers. This is a list of all nodes that actively participates in the cluster and that share the workload of hosting the Clustered TDB database records. Only nodes that are participating in the vnnmap can become lmaster or dmaster for a database record.
.RE
.sp
.it 1 an-trap

View File

@ -8,7 +8,7 @@
The virtual node number is an integer that describes the node in the
cluster. The first node has virtual node number 0.
</p></dd><dt><span class="term">-Y</span></dt><dd><p>
Produce output in machinereadable form for easier parsing by scripts. Not all commands support this option.
Produce output in machine readable form for easier parsing by scripts. Not all commands support this option.
</p></dd><dt><span class="term">-t &lt;timeout&gt;</span></dt><dd><p>
How long should ctdb wait for a command to complete before timing out. Default is 3 seconds.
</p></dd><dt><span class="term">-? --help</span></dt><dd><p>
@ -24,36 +24,36 @@
You only need to specify this parameter if you run multiple ctdb
daemons on the same physical host and thus can not use the default
name for the domain socket.
</p></dd></dl></div></div><div class="refsect1" lang="en"><a name="id2481133"></a><h2>Administrative Commands</h2><p>
</p></dd></dl></div></div><div class="refsect1" lang="en"><a name="id2481134"></a><h2>Administrative Commands</h2><p>
These are commands used to monitor and administrate a CTDB cluster.
</p><div class="refsect2" lang="en"><a name="id2481142"></a><h3>status</h3><p>
</p><div class="refsect2" lang="en"><a name="id2481143"></a><h3>status</h3><p>
This command shows the current status of the ctdb node.
</p><div class="refsect3" lang="en"><a name="id2481151"></a><h4>node status</h4><p>
</p><div class="refsect3" lang="en"><a name="id2481152"></a><h4>node status</h4><p>
Node status reflects the current status of the node. There are four possible states:
</p><p>
OK - This node is fully functional.
</p><p>
DISCONNECTED - This node could not be connected through the network and is currently not parcipitating in the cluster. If there is a public IP address associated with this node it should have been taken over by a different node. No services are running on this node.
DISCONNECTED - This node could not be connected through the network and is currently not participating in the cluster. If there is a public IP address associated with this node it should have been taken over by a different node. No services are running on this node.
</p><p>
DISABLED - This node has been administratively disabled. This node is still functional and participates in the CTDB cluster but its IP addresses have been taken over by a different node and no services are currently being hosted.
</p><p>
UNHEALTHY - A service provided by this node is malfunctioning and should be investigated. The CTDB daemon itself is operational and participates in the cluster. Its public IP address has been taken over by a different node and no services are currnetly being hosted. All unhealthy nodes should be investigated and require an administrative action to rectify.
</p><p>
BANNED - This node failed too many recovery attempts and has been banned from participating in the cluster for a period of RecoveryBanPeriod seconds. Any public IP address has been taken over by other nodes. This node does not provide any services. All banned nodes should be investigated and require an administrative action to rectify. This node does not perticipate in the CTDB cluster but can still be communicated with. I.e. ctdb commands can be sent to it.
</p></div><div class="refsect3" lang="en"><a name="id2481202"></a><h4>generation</h4><p>
</p></div><div class="refsect3" lang="en"><a name="id2481204"></a><h4>generation</h4><p>
The generation id is a number that indicates the current generation
of a cluster instance. Each time a cluster goes through a
reconfiguration or a recovery its generation id will be changed.
</p></div><div class="refsect3" lang="en"><a name="id2481215"></a><h4>VNNMAP</h4><p>
</p></div><div class="refsect3" lang="en"><a name="id2481216"></a><h4>VNNMAP</h4><p>
The list of Virtual Node Numbers. This is a list of all nodes that actively participates in the cluster and that share the workload of hosting the Clustered TDB database records.
Only nodes that are parcipitating in the vnnmap can become lmaster or dmaster for a database record.
</p></div><div class="refsect3" lang="en"><a name="id2481229"></a><h4>Recovery mode</h4><p>
Only nodes that are participating in the vnnmap can become lmaster or dmaster for a database record.
</p></div><div class="refsect3" lang="en"><a name="id2481230"></a><h4>Recovery mode</h4><p>
This is the current recovery mode of the cluster. There are two possible modes:
</p><p>
NORMAL - The cluster is fully operational.
</p><p>
RECOVERY - The cluster databases have all been frozen, pausing all services while the cluster awaits a recovery process to complete. A recovery process should finish within seconds. If a cluster is stuck in the RECOVERY state this would indicate a cluster malfunction which needs to be investigated.
</p></div><div class="refsect3" lang="en"><a name="id2481253"></a><h4>Recovery master</h4><p>
</p></div><div class="refsect3" lang="en"><a name="id2481254"></a><h4>Recovery master</h4><p>
This is the cluster node that is currently designated as the recovery master. This node is responsible of monitoring the consistency of the cluster and to perform the actual recovery process when reqired.
</p></div><p>
Example: ctdb status
@ -94,7 +94,7 @@ Number of nodes:4
12.1.1.2 1
12.1.1.3 2
12.1.1.4 3
</pre></div><div class="refsect2" lang="en"><a name="id2481335"></a><h3>getvar &lt;name&gt;</h3><p>
</pre></div><div class="refsect2" lang="en"><a name="id2481336"></a><h3>getvar &lt;name&gt;</h3><p>
Get the runtime value of a tuneable variable.
</p><p>
Example: ctdb getvar MaxRedirectCount
@ -170,7 +170,7 @@ CTDB version 1
max_hop_count 0
max_call_latency 4.948321 sec
max_lockwait_latency 0.000000 sec
</pre></div><div class="refsect2" lang="en"><a name="id2528503"></a><h3>statisticsreset</h3><p>
</pre></div><div class="refsect2" lang="en"><a name="id2528504"></a><h3>statisticsreset</h3><p>
This command is used to clear all statistics counters in a node.
</p><p>
Example: ctdb statisticsreset
@ -178,14 +178,14 @@ CTDB version 1
Get the current debug level for the node. the debug level controls what information is written to the log file.
</p></div><div class="refsect2" lang="en"><a name="id2528529"></a><h3>setdebug &lt;debuglevel&gt;</h3><p>
Set the debug level of a node. This is a number between 0 and 9 and controls what information will be written to the logfile.
</p></div><div class="refsect2" lang="en"><a name="id2528540"></a><h3>getpid</h3><p>
</p></div><div class="refsect2" lang="en"><a name="id2528541"></a><h3>getpid</h3><p>
This command will return the process id of the ctdb daemon.
</p></div><div class="refsect2" lang="en"><a name="id2528551"></a><h3>disable</h3><p>
This command is used to administratively disable a node in the cluster.
A disabled node will still participate in the cluster and host
clustered TDB records but its public ip address has been taken over by
a different node and it no longer hosts any services.
</p></div><div class="refsect2" lang="en"><a name="id2528564"></a><h3>enable</h3><p>
</p></div><div class="refsect2" lang="en"><a name="id2528565"></a><h3>enable</h3><p>
Re-enable a node that has been administratively disabled.
</p></div><div class="refsect2" lang="en"><a name="id2528575"></a><h3>ban &lt;bantime|0&gt;</h3><p>
Administratively ban a node for bantime seconds. A bantime of 0 means that the node should be permanently banned.
@ -221,7 +221,7 @@ CTDB version 1
</p></div></div><div class="refsect1" lang="en"><a name="id2528668"></a><h2>Debugging Commands</h2><p>
These commands are primarily used for CTDB development and testing and
should not be used for normal administration.
</p><div class="refsect2" lang="en"><a name="id2528678"></a><h3>process-exists &lt;pid&gt;</h3><p>
</p><div class="refsect2" lang="en"><a name="id2528679"></a><h3>process-exists &lt;pid&gt;</h3><p>
This command checks if a specific process exists on the CTDB host. This is mainly used by Samba to check if remote instances of samba are still running or not.
</p></div><div class="refsect2" lang="en"><a name="id2528691"></a><h3>getdbmap</h3><p>
This command lists all clustered TDB databases that the CTDB daemon has attahced to.

View File

@ -1,11 +1,11 @@
.\" Title: ctdbd
.\" Author:
.\" Generator: DocBook XSL Stylesheets v1.71.0 <http://docbook.sf.net/>
.\" Date: 07/10/2007
.\" Date: 08/23/2007
.\" Manual:
.\" Source:
.\"
.TH "CTDBD" "1" "07/10/2007" "" ""
.TH "CTDBD" "1" "08/23/2007" "" ""
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@ -179,7 +179,7 @@ There are five possible for a node.
.PP
OK \- This node is fully functional.
.PP
DISCONNECTED \- This node could not be connected through the network and is currently not parcipitating in the cluster. If there is a public IP address associated with this node it should have been taken over by a different node. No services are running on this node.
DISCONNECTED \- This node could not be connected through the network and is currently not particpating in the cluster. If there is a public IP address associated with this node it should have been taken over by a different node. No services are running on this node.
.PP
DISABLED \- This node has been administratively disabled. This node is still functional and participates in the CTDB cluster but its IP addresses have been taken over by a different node and no services are currently being hosted.
.PP

View File

@ -138,7 +138,7 @@
OK - This node is fully functional.
</p><p>
DISCONNECTED - This node could not be connected through the network
and is currently not parcipitating in the cluster. If there is a
and is currently not particpating in the cluster. If there is a
public IP address associated with this node it should have been taken
over by a different node. No services are running on this node.
</p><p>

View File

@ -96,6 +96,28 @@ struct ctdb_call_info {
#define CTDB_BROADCAST_CONNECTED 0xF0000004
enum control_state {CTDB_CONTROL_WAIT, CTDB_CONTROL_DONE, CTDB_CONTROL_ERROR, CTDB_CONTROL_TIMEOUT};
struct ctdb_client_control_state {
struct ctdb_context *ctdb;
uint32_t reqid;
int32_t status;
TDB_DATA outdata;
enum control_state state;
char *errormsg;
struct ctdb_req_control *c;
/* if we have a callback registered for the completion (or failure) of
this control
if a callback is used, it MUST talloc_free the cb_data passed to it
*/
struct {
void (*fn)(struct ctdb_client_control_state *);
void *private;
} async;
};
struct event_context;
/*
@ -302,7 +324,13 @@ int ctdb_ctrl_write_record(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_
/*
get the recovery mode of a remote node
*/
int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *recmode);
int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t *recmode);
struct ctdb_client_control_state *ctdb_ctrl_getrecmode_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode);
int ctdb_ctrl_getrecmode_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *recmode);
/*
set the recovery mode of a remote node
*/
@ -319,7 +347,14 @@ int ctdb_ctrl_setmonmode(struct ctdb_context *ctdb, struct timeval timeout, uint
/*
get the recovery master of a remote node
*/
int ctdb_ctrl_getrecmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *recmaster);
int ctdb_ctrl_getrecmaster(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t *recmaster);
struct ctdb_client_control_state *ctdb_ctrl_getrecmaster_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode);
int ctdb_ctrl_getrecmaster_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *recmaster);
/*
set the recovery master of a remote node
*/
@ -344,7 +379,16 @@ int ctdb_dump_db(struct ctdb_db_context *ctdb_db, FILE *f);
*/
int ctdb_ctrl_getpid(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *pid);
int ctdb_ctrl_freeze(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode);
int ctdb_ctrl_freeze(struct ctdb_context *ctdb, struct timeval timeout,
uint32_t destnode);
struct ctdb_client_control_state *
ctdb_ctrl_freeze_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx,
struct timeval timeout, uint32_t destnode);
int ctdb_ctrl_freeze_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx,
struct ctdb_client_control_state *state);
int ctdb_ctrl_thaw(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode);
int ctdb_ctrl_getvnn(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode);
@ -370,6 +414,34 @@ int ctdb_ctrl_modflags(struct ctdb_context *ctdb,
uint32_t destnode,
uint32_t set, uint32_t clear);
enum ctdb_server_id_type { SERVER_TYPE_SAMBA=1 };
struct ctdb_server_id {
enum ctdb_server_id_type type;
uint32_t vnn;
uint32_t server_id;
};
struct ctdb_server_id_list {
uint32_t num;
struct ctdb_server_id server_ids[1];
};
int ctdb_ctrl_register_server_id(struct ctdb_context *ctdb,
struct timeval timeout,
struct ctdb_server_id *id);
int ctdb_ctrl_unregister_server_id(struct ctdb_context *ctdb,
struct timeval timeout,
struct ctdb_server_id *id);
int ctdb_ctrl_check_server_id(struct ctdb_context *ctdb,
struct timeval timeout, uint32_t destnode,
struct ctdb_server_id *id, uint32_t *status);
int ctdb_ctrl_get_server_id_list(struct ctdb_context *ctdb,
TALLOC_CTX *mem_ctx,
struct timeval timeout, uint32_t destnode,
struct ctdb_server_id_list **svid_list);
int ctdb_socket_connect(struct ctdb_context *ctdb);
#endif

View File

@ -349,6 +349,7 @@ struct ctdb_context {
bool do_setsched;
void *saved_scheduler_param;
struct ctdb_kill_tcp *killtcp;
struct _trbt_tree_t *server_ids;
};
struct ctdb_db_context {
@ -451,6 +452,10 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0,
CTDB_CONTROL_KILL_TCP = 54,
CTDB_CONTROL_GET_TCP_TICKLE_LIST = 55,
CTDB_CONTROL_SET_TCP_TICKLE_LIST = 56,
CTDB_CONTROL_REGISTER_SERVER_ID = 57,
CTDB_CONTROL_UNREGISTER_SERVER_ID = 58,
CTDB_CONTROL_CHECK_SERVER_ID = 59,
CTDB_CONTROL_GET_SERVER_ID_LIST = 60,
};
/*
@ -850,6 +855,18 @@ int ctdb_control(struct ctdb_context *ctdb, uint32_t destnode, uint64_t srvid,
uint32_t opcode, uint32_t flags, TDB_DATA data,
TALLOC_CTX *mem_ctx, TDB_DATA *outdata, int32_t *status,
struct timeval *timeout, char **errormsg);
int ctdb_control_recv(struct ctdb_context *ctdb,
struct ctdb_client_control_state *state,
TALLOC_CTX *mem_ctx,
TDB_DATA *outdata, int32_t *status, char **errormsg);
struct ctdb_client_control_state *
ctdb_control_send(struct ctdb_context *ctdb,
uint32_t destnode, uint64_t srvid,
uint32_t opcode, uint32_t flags, TDB_DATA data,
TALLOC_CTX *mem_ctx, TDB_DATA *outdata,
struct timeval *timeout,
char **errormsg);
@ -1108,4 +1125,15 @@ int ctdb_ctrl_get_tcp_tickles(struct ctdb_context *ctdb,
uint32_t vnn,
struct ctdb_control_tcp_tickle_list **list);
int32_t ctdb_control_register_server_id(struct ctdb_context *ctdb,
uint32_t client_id,
TDB_DATA indata);
int32_t ctdb_control_check_server_id(struct ctdb_context *ctdb,
TDB_DATA indata);
int32_t ctdb_control_unregister_server_id(struct ctdb_context *ctdb,
TDB_DATA indata);
int32_t ctdb_control_get_server_id_list(struct ctdb_context *ctdb,
TDB_DATA *outdata);
#endif

View File

@ -26,6 +26,7 @@
#include "lib/util/dlinklist.h"
#include "db_wrap.h"
struct ctdb_control_state {
struct ctdb_context *ctdb;
uint32_t reqid;
@ -34,6 +35,7 @@ struct ctdb_control_state {
unsigned flags;
};
/*
process a control request
*/
@ -294,6 +296,22 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
/* data size is verified in the called function */
return ctdb_control_set_tcp_tickle_list(ctdb, indata);
case CTDB_CONTROL_REGISTER_SERVER_ID:
CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_server_id));
return ctdb_control_register_server_id(ctdb, client_id, indata);
case CTDB_CONTROL_UNREGISTER_SERVER_ID:
CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_server_id));
return ctdb_control_unregister_server_id(ctdb, indata);
case CTDB_CONTROL_CHECK_SERVER_ID:
CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_server_id));
return ctdb_control_check_server_id(ctdb, indata);
case CTDB_CONTROL_GET_SERVER_ID_LIST:
CHECK_CONTROL_DATA_SIZE(0);
return ctdb_control_get_server_id_list(ctdb, outdata);
default:
DEBUG(0,(__location__ " Unknown CTDB control opcode %u\n", opcode));
return -1;

View File

@ -116,6 +116,87 @@ static void ctdb_ban_node(struct ctdb_recoverd *rec, uint32_t vnn, uint32_t ban_
}
}
enum monitor_result { MONITOR_OK, MONITOR_RECOVERY_NEEDED, MONITOR_ELECTION_NEEDED, MONITOR_FAILED};
struct freeze_node_data {
uint32_t count;
enum monitor_result status;
};
static void freeze_node_callback(struct ctdb_client_control_state *state)
{
struct freeze_node_data *fndata = talloc_get_type(state->async.private, struct freeze_node_data);
/* one more node has responded to our freeze node*/
fndata->count--;
/* if we failed to freeze the node, we must trigger another recovery */
if ( (state->state != CTDB_CONTROL_DONE) || (state->status != 0) ) {
DEBUG(0, (__location__ " Failed to freeze node:%u. recovery failed\n", state->c->hdr.destnode));
fndata->status = MONITOR_RECOVERY_NEEDED;
}
return;
}
/* freeze all nodes */
static enum monitor_result freeze_all_nodes(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
{
struct freeze_node_data *fndata;
TALLOC_CTX *mem_ctx = talloc_new(ctdb);
struct ctdb_client_control_state *state;
enum monitor_result status;
int j;
fndata = talloc(mem_ctx, struct freeze_node_data);
CTDB_NO_MEMORY_FATAL(ctdb, fndata);
fndata->count = 0;
fndata->status = MONITOR_OK;
/* loop over all active nodes and send an async freeze call to
them*/
for (j=0; j<nodemap->num; j++) {
if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
continue;
}
state = ctdb_ctrl_freeze_send(ctdb, mem_ctx,
CONTROL_TIMEOUT(),
nodemap->nodes[j].vnn);
if (state == NULL) {
/* we failed to send the control, treat this as
an error and try again next iteration
*/
DEBUG(0,("Failed to call ctdb_ctrl_freeze_send during recovery\n"));
talloc_free(mem_ctx);
return MONITOR_RECOVERY_NEEDED;
}
/* set up the callback functions */
state->async.fn = freeze_node_callback;
state->async.private = fndata;
/* one more control to wait for to complete */
fndata->count++;
}
/* now wait for up to the maximum number of seconds allowed
or until all nodes we expect a response from has replied
*/
while (fndata->count > 0) {
event_loop_once(ctdb->ev);
}
status = fndata->status;
talloc_free(mem_ctx);
return status;
}
/*
change recovery mode on all nodes
@ -124,10 +205,15 @@ static int set_recovery_mode(struct ctdb_context *ctdb, struct ctdb_node_map *no
{
int j, ret;
/* start the freeze process immediately on all nodes */
ctdb_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
CTDB_CONTROL_FREEZE, CTDB_CTRL_FLAG_NOREPLY, tdb_null,
NULL, NULL, NULL, NULL, NULL);
/* freeze all nodes */
if (rec_mode == CTDB_RECOVERY_ACTIVE) {
ret = freeze_all_nodes(ctdb, nodemap);
if (ret != MONITOR_OK) {
DEBUG(0, (__location__ " Unable to freeze nodes. Recovery failed.\n"));
return -1;
}
}
/* set recovery mode to active on all nodes */
for (j=0; j<nodemap->num; j++) {
@ -136,14 +222,6 @@ static int set_recovery_mode(struct ctdb_context *ctdb, struct ctdb_node_map *no
continue;
}
if (rec_mode == CTDB_RECOVERY_ACTIVE) {
ret = ctdb_ctrl_freeze(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn);
if (ret != 0) {
DEBUG(0, (__location__ " Unable to freeze node %u\n", nodemap->nodes[j].vnn));
return -1;
}
}
ret = ctdb_ctrl_setrecmode(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, rec_mode);
if (ret != 0) {
DEBUG(0, (__location__ " Unable to set recmode on node %u\n", nodemap->nodes[j].vnn));
@ -531,28 +609,33 @@ static void ban_handler(struct ctdb_context *ctdb, uint64_t srvid,
{
struct ctdb_recoverd *rec = talloc_get_type(private_data, struct ctdb_recoverd);
struct ctdb_ban_info *b = (struct ctdb_ban_info *)data.dptr;
TALLOC_CTX *mem_ctx = talloc_new(ctdb);
uint32_t recmaster;
int ret;
if (data.dsize != sizeof(*b)) {
DEBUG(0,("Bad data in ban_handler\n"));
talloc_free(mem_ctx);
return;
}
ret = ctdb_ctrl_getrecmaster(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &recmaster);
ret = ctdb_ctrl_getrecmaster(ctdb, mem_ctx, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &recmaster);
if (ret != 0) {
DEBUG(0,(__location__ " Failed to find the recmaster\n"));
talloc_free(mem_ctx);
return;
}
if (recmaster != ctdb->vnn) {
DEBUG(0,("We are not the recmaster - ignoring ban request\n"));
talloc_free(mem_ctx);
return;
}
DEBUG(0,("Node %u has been banned for %u seconds by the administrator\n",
b->vnn, b->ban_time));
ctdb_ban_node(rec, b->vnn, b->ban_time);
talloc_free(mem_ctx);
}
/*
@ -562,29 +645,34 @@ static void unban_handler(struct ctdb_context *ctdb, uint64_t srvid,
TDB_DATA data, void *private_data)
{
struct ctdb_recoverd *rec = talloc_get_type(private_data, struct ctdb_recoverd);
TALLOC_CTX *mem_ctx = talloc_new(ctdb);
uint32_t vnn;
int ret;
uint32_t recmaster;
if (data.dsize != sizeof(uint32_t)) {
DEBUG(0,("Bad data in unban_handler\n"));
talloc_free(mem_ctx);
return;
}
vnn = *(uint32_t *)data.dptr;
ret = ctdb_ctrl_getrecmaster(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &recmaster);
ret = ctdb_ctrl_getrecmaster(ctdb, mem_ctx, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &recmaster);
if (ret != 0) {
DEBUG(0,(__location__ " Failed to find the recmaster\n"));
talloc_free(mem_ctx);
return;
}
if (recmaster != ctdb->vnn) {
DEBUG(0,("We are not the recmaster - ignoring unban request\n"));
talloc_free(mem_ctx);
return;
}
DEBUG(0,("Node %u has been unbanned by the administrator\n", vnn));
ctdb_unban_node(rec, vnn);
talloc_free(mem_ctx);
}
@ -1104,11 +1192,11 @@ static void monitor_handler(struct ctdb_context *ctdb, uint64_t srvid,
nodemap->nodes[i].flags = c->new_flags;
ret = ctdb_ctrl_getrecmaster(ctdb, CONTROL_TIMEOUT(),
ret = ctdb_ctrl_getrecmaster(ctdb, tmp_ctx, CONTROL_TIMEOUT(),
CTDB_CURRENT_NODE, &ctdb->recovery_master);
if (ret == 0) {
ret = ctdb_ctrl_getrecmode(ctdb, CONTROL_TIMEOUT(),
ret = ctdb_ctrl_getrecmode(ctdb, tmp_ctx, CONTROL_TIMEOUT(),
CTDB_CURRENT_NODE, &ctdb->recovery_mode);
}
@ -1139,12 +1227,192 @@ static void monitor_handler(struct ctdb_context *ctdb, uint64_t srvid,
struct verify_recmode_normal_data {
uint32_t count;
enum monitor_result status;
};
static void verify_recmode_normal_callback(struct ctdb_client_control_state *state)
{
struct verify_recmode_normal_data *rmdata = talloc_get_type(state->async.private, struct verify_recmode_normal_data);
/* one more node has responded with recmode data*/
rmdata->count--;
/* if we failed to get the recmode, then return an error and let
the main loop try again.
*/
if (state->state != CTDB_CONTROL_DONE) {
if (rmdata->status == MONITOR_OK) {
rmdata->status = MONITOR_FAILED;
}
return;
}
/* if we got a response, then the recmode will be stored in the
status field
*/
if (state->status != CTDB_RECOVERY_NORMAL) {
DEBUG(0, (__location__ " Node:%u was in recovery mode. Restart recovery process\n", state->c->hdr.destnode));
rmdata->status = MONITOR_RECOVERY_NEEDED;
}
return;
}
/* verify that all nodes are in normal recovery mode */
static enum monitor_result verify_recmode(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
{
struct verify_recmode_normal_data *rmdata;
TALLOC_CTX *mem_ctx = talloc_new(ctdb);
struct ctdb_client_control_state *state;
enum monitor_result status;
int j;
rmdata = talloc(mem_ctx, struct verify_recmode_normal_data);
CTDB_NO_MEMORY_FATAL(ctdb, rmdata);
rmdata->count = 0;
rmdata->status = MONITOR_OK;
/* loop over all active nodes and send an async getrecmode call to
them*/
for (j=0; j<nodemap->num; j++) {
if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
continue;
}
state = ctdb_ctrl_getrecmode_send(ctdb, mem_ctx,
CONTROL_TIMEOUT(),
nodemap->nodes[j].vnn);
if (state == NULL) {
/* we failed to send the control, treat this as
an error and try again next iteration
*/
DEBUG(0,("Failed to call ctdb_ctrl_getrecmode_send during monitoring\n"));
talloc_free(mem_ctx);
return MONITOR_FAILED;
}
/* set up the callback functions */
state->async.fn = verify_recmode_normal_callback;
state->async.private = rmdata;
/* one more control to wait for to complete */
rmdata->count++;
}
/* now wait for up to the maximum number of seconds allowed
or until all nodes we expect a response from has replied
*/
while (rmdata->count > 0) {
event_loop_once(ctdb->ev);
}
status = rmdata->status;
talloc_free(mem_ctx);
return status;
}
struct verify_recmaster_data {
uint32_t count;
uint32_t vnn;
enum monitor_result status;
};
static void verify_recmaster_callback(struct ctdb_client_control_state *state)
{
struct verify_recmaster_data *rmdata = talloc_get_type(state->async.private, struct verify_recmaster_data);
/* one more node has responded with recmaster data*/
rmdata->count--;
/* if we failed to get the recmaster, then return an error and let
the main loop try again.
*/
if (state->state != CTDB_CONTROL_DONE) {
if (rmdata->status == MONITOR_OK) {
rmdata->status = MONITOR_FAILED;
}
return;
}
/* if we got a response, then the recmaster will be stored in the
status field
*/
if (state->status != rmdata->vnn) {
DEBUG(0,("Node %d does not agree we are the recmaster. Need a new recmaster election\n", state->c->hdr.destnode));
rmdata->status = MONITOR_ELECTION_NEEDED;
}
return;
}
/* verify that all nodes agree that we are the recmaster */
static enum monitor_result verify_recmaster(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, uint32_t vnn)
{
struct verify_recmaster_data *rmdata;
TALLOC_CTX *mem_ctx = talloc_new(ctdb);
struct ctdb_client_control_state *state;
enum monitor_result status;
int j;
rmdata = talloc(mem_ctx, struct verify_recmaster_data);
CTDB_NO_MEMORY_FATAL(ctdb, rmdata);
rmdata->count = 0;
rmdata->vnn = vnn;
rmdata->status = MONITOR_OK;
/* loop over all active nodes and send an async getrecmaster call to
them*/
for (j=0; j<nodemap->num; j++) {
if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
continue;
}
state = ctdb_ctrl_getrecmaster_send(ctdb, mem_ctx,
CONTROL_TIMEOUT(),
nodemap->nodes[j].vnn);
if (state == NULL) {
/* we failed to send the control, treat this as
an error and try again next iteration
*/
DEBUG(0,("Failed to call ctdb_ctrl_getrecmaster_send during monitoring\n"));
talloc_free(mem_ctx);
return MONITOR_FAILED;
}
/* set up the callback functions */
state->async.fn = verify_recmaster_callback;
state->async.private = rmdata;
/* one more control to wait for to complete */
rmdata->count++;
}
/* now wait for up to the maximum number of seconds allowed
or until all nodes we expect a response from has replied
*/
while (rmdata->count > 0) {
event_loop_once(ctdb->ev);
}
status = rmdata->status;
talloc_free(mem_ctx);
return status;
}
/*
the main monitoring loop
*/
static void monitor_cluster(struct ctdb_context *ctdb)
{
uint32_t vnn, num_active, recmode, recmaster;
uint32_t vnn, num_active, recmaster;
TALLOC_CTX *mem_ctx=NULL;
struct ctdb_node_map *nodemap=NULL;
struct ctdb_node_map *remote_nodemap=NULL;
@ -1235,7 +1503,7 @@ again:
/* check which node is the recovery master */
ret = ctdb_ctrl_getrecmaster(ctdb, CONTROL_TIMEOUT(), vnn, &recmaster);
ret = ctdb_ctrl_getrecmaster(ctdb, mem_ctx, CONTROL_TIMEOUT(), vnn, &recmaster);
if (ret != 0) {
DEBUG(0, (__location__ " Unable to get recmaster from node %u\n", vnn));
goto again;
@ -1276,49 +1544,35 @@ again:
/* verify that all active nodes agree that we are the recmaster */
for (j=0; j<nodemap->num; j++) {
if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
continue;
}
if (nodemap->nodes[j].vnn == vnn) {
continue;
}
ret = ctdb_ctrl_getrecmaster(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, &recmaster);
if (ret != 0) {
DEBUG(0, (__location__ " Unable to get recmaster from node %u\n", vnn));
switch (verify_recmaster(ctdb, nodemap, vnn)) {
case MONITOR_RECOVERY_NEEDED:
/* can not happen */
goto again;
}
if (recmaster!=vnn) {
DEBUG(0, ("Node %u does not agree we are the recmaster. Force reelection\n",
nodemap->nodes[j].vnn));
case MONITOR_ELECTION_NEEDED:
force_election(rec, mem_ctx, vnn, nodemap);
goto again;
}
case MONITOR_OK:
break;
case MONITOR_FAILED:
goto again;
}
/* verify that all active nodes are in normal mode
and not in recovery mode
*/
for (j=0; j<nodemap->num; j++) {
if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) {
continue;
}
ret = ctdb_ctrl_getrecmode(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, &recmode);
if (ret != 0) {
DEBUG(0, ("Unable to get recmode from node %u\n", vnn));
goto again;
}
if (recmode != CTDB_RECOVERY_NORMAL) {
DEBUG(0, (__location__ " Node:%u was in recovery mode. Restart recovery process\n",
nodemap->nodes[j].vnn));
switch (verify_recmode(ctdb, nodemap)) {
case MONITOR_RECOVERY_NEEDED:
do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].vnn);
goto again;
case MONITOR_FAILED:
goto again;
case MONITOR_ELECTION_NEEDED:
/* can not happen */
case MONITOR_OK:
break;
}
}
/* get the nodemap for all active remote nodes and verify

View File

@ -0,0 +1,189 @@
/*
ctdb_control protocol code to manage server ids
Copyright (C) Ronnie Sahlberg 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "../include/ctdb_private.h"
#include "../common/rb_tree.h"
#define SERVER_ID_KEY_SIZE 3
static uint32_t *get_server_id_key(struct ctdb_server_id *server_id)
{
static uint32_t key[SERVER_ID_KEY_SIZE];
key[0] = server_id->type;
key[1] = server_id->vnn;
key[2] = server_id->server_id;
return &key[0];
}
/* add a server_id to the tree.
if we had already 'data' in the tree then this is a duplicate and we can
just talloc_free the structure in parm and leave data in the tree.
othervise if this is a new node we return parm and that is inserted
into the tree.
*/
static void *add_server_id_callback(void *parm, void *data)
{
if (data) {
talloc_free(parm);
return data;
}
return parm;
}
/*
register a server id
a serverid that is registered with ctdb will be automatically unregistered
once the client domain socket dissappears.
*/
int32_t ctdb_control_register_server_id(struct ctdb_context *ctdb,
uint32_t client_id,
TDB_DATA indata)
{
struct ctdb_server_id *server_id;
struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
if (client == NULL) {
DEBUG(0,(__location__ " Could not find client parent structure. You can not send this control to a remote node\n"));
return 1;
}
/* hang the server_id structure off client before storing it in the
tree so that is will be automatically destroyed when client
is destroyed.
when the structure is free'd it will be automatically
removed from the tree
*/
server_id = talloc_zero(client, struct ctdb_server_id);
CTDB_NO_MEMORY(ctdb, server_id);
memcpy(server_id, indata.dptr, sizeof(struct ctdb_server_id));
trbt_insertarray32_callback(ctdb->server_ids, SERVER_ID_KEY_SIZE,
get_server_id_key(server_id),
add_server_id_callback, server_id);
return 0;
}
/*
check whether a server id exists
*/
int32_t ctdb_control_check_server_id(struct ctdb_context *ctdb,
TDB_DATA indata)
{
struct ctdb_server_id *server_id = (struct ctdb_server_id *)indata.dptr;
return (int32_t)trbt_lookuparray32(ctdb->server_ids,
SERVER_ID_KEY_SIZE,
get_server_id_key(server_id));
}
/*
unregisters a server id
*/
int32_t ctdb_control_unregister_server_id(struct ctdb_context *ctdb,
TDB_DATA indata)
{
struct ctdb_server_id *server_id = (struct ctdb_server_id *)indata.dptr;
talloc_free(trbt_lookuparray32(ctdb->server_ids,
SERVER_ID_KEY_SIZE,
get_server_id_key(server_id)));
return 0;
}
struct count_server_ids {
int count;
struct ctdb_server_id_list *list;
};
static void server_id_count(void *param, void *data)
{
struct count_server_ids *svid = talloc_get_type(param,
struct count_server_ids);
if (svid == NULL) {
DEBUG(0, (__location__ " Got null pointer for svid\n"));
return;
}
svid->count++;
}
static void server_id_store(void *param, void *data)
{
struct count_server_ids *svid = talloc_get_type(param,
struct count_server_ids);
struct ctdb_server_id *server_id = talloc_get_type(data,
struct ctdb_server_id);
if (svid == NULL) {
DEBUG(0, (__location__ " Got null pointer for svid\n"));
return;
}
if (svid->count >= svid->list->num) {
DEBUG(0, (__location__ " size of server id tree changed during traverse\n"));
return;
}
memcpy(&svid->list->server_ids[svid->count], server_id, sizeof(struct ctdb_server_id));
svid->count++;
}
/*
returns a list of all registered server ids for a node
*/
int32_t ctdb_control_get_server_id_list(struct ctdb_context *ctdb, TDB_DATA *outdata)
{
struct count_server_ids *svid;
svid = talloc_zero(outdata, struct count_server_ids);
CTDB_NO_MEMORY(ctdb, svid);
/* first we must count how many entries we have */
trbt_traversearray32(ctdb->server_ids, SERVER_ID_KEY_SIZE,
server_id_count, svid);
outdata->dsize = offsetof(struct ctdb_server_id_list,
server_ids)
+ sizeof(struct ctdb_server_id) * svid->count;
outdata->dptr = talloc_size(outdata, outdata->dsize);
CTDB_NO_MEMORY(ctdb, outdata->dptr);
/* now fill the structure in */
svid->list = (struct ctdb_server_id_list *)(outdata->dptr);
svid->list->num = svid->count;
svid->count=0;
trbt_traversearray32(ctdb->server_ids, SERVER_ID_KEY_SIZE,
server_id_store, svid);
return 0;
}

View File

@ -234,6 +234,7 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
*/
static int event_script_destructor(struct ctdb_event_script_state *state)
{
DEBUG(0,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));
kill(state->child, SIGTERM);
waitpid(state->child, NULL, 0);
return 0;

View File

@ -217,7 +217,7 @@ int main(int argc, const char *argv[])
printf("Waiting for cluster\n");
while (1) {
uint32_t recmode=1;
ctdb_ctrl_getrecmode(ctdb, timeval_zero(), CTDB_CURRENT_NODE, &recmode);
ctdb_ctrl_getrecmode(ctdb, ctdb, timeval_zero(), CTDB_CURRENT_NODE, &recmode);
if (recmode == 0) break;
event_loop_once(ev);
}

View File

@ -232,7 +232,7 @@ int main(int argc, const char *argv[])
printf("Waiting for cluster\n");
while (1) {
uint32_t recmode=1;
ctdb_ctrl_getrecmode(ctdb, timeval_zero(), CTDB_CURRENT_NODE, &recmode);
ctdb_ctrl_getrecmode(ctdb, ctdb, timeval_zero(), CTDB_CURRENT_NODE, &recmode);
if (recmode == 0) break;
event_loop_once(ev);
}

View File

@ -145,7 +145,7 @@ int main(int argc, const char *argv[])
printf("Waiting for cluster\n");
while (1) {
uint32_t recmode=1;
ctdb_ctrl_getrecmode(ctdb, timeval_zero(), CTDB_CURRENT_NODE, &recmode);
ctdb_ctrl_getrecmode(ctdb, ctdb, timeval_zero(), CTDB_CURRENT_NODE, &recmode);
if (recmode == 0) break;
event_loop_once(ev);
}

View File

@ -292,14 +292,14 @@ static int control_status(struct ctdb_context *ctdb, int argc, const char **argv
printf("hash:%d lmaster:%d\n", i, vnnmap->map[i]);
}
ret = ctdb_ctrl_getrecmode(ctdb, TIMELIMIT(), options.vnn, &recmode);
ret = ctdb_ctrl_getrecmode(ctdb, ctdb, TIMELIMIT(), options.vnn, &recmode);
if (ret != 0) {
DEBUG(0, ("Unable to get recmode from node %u\n", options.vnn));
return ret;
}
printf("Recovery mode:%s (%d)\n",recmode==CTDB_RECOVERY_NORMAL?"NORMAL":"RECOVERY",recmode);
ret = ctdb_ctrl_getrecmaster(ctdb, TIMELIMIT(), options.vnn, &recmaster);
ret = ctdb_ctrl_getrecmaster(ctdb, ctdb, TIMELIMIT(), options.vnn, &recmaster);
if (ret != 0) {
DEBUG(0, ("Unable to get recmaster from node %u\n", options.vnn));
return ret;
@ -373,6 +373,110 @@ static int kill_tcp(struct ctdb_context *ctdb, int argc, const char **argv)
return -1;
}
/*
register a server id
*/
static int regsrvid(struct ctdb_context *ctdb, int argc, const char **argv)
{
int ret;
struct ctdb_server_id server_id;
if (argc < 3) {
usage();
}
server_id.vnn = strtoul(argv[0], NULL, 0);
server_id.type = strtoul(argv[1], NULL, 0);
server_id.server_id = strtoul(argv[2], NULL, 0);
ret = ctdb_ctrl_register_server_id(ctdb, TIMELIMIT(), &server_id);
if (ret != 0) {
DEBUG(0, ("Unable to register server_id from node %u\n", options.vnn));
return ret;
}
return -1;
}
/*
unregister a server id
*/
static int unregsrvid(struct ctdb_context *ctdb, int argc, const char **argv)
{
int ret;
struct ctdb_server_id server_id;
if (argc < 3) {
usage();
}
server_id.vnn = strtoul(argv[0], NULL, 0);
server_id.type = strtoul(argv[1], NULL, 0);
server_id.server_id = strtoul(argv[2], NULL, 0);
ret = ctdb_ctrl_unregister_server_id(ctdb, TIMELIMIT(), &server_id);
if (ret != 0) {
DEBUG(0, ("Unable to unregister server_id from node %u\n", options.vnn));
return ret;
}
return -1;
}
/*
check if a server id exists
*/
static int chksrvid(struct ctdb_context *ctdb, int argc, const char **argv)
{
uint32_t status;
int ret;
struct ctdb_server_id server_id;
if (argc < 3) {
usage();
}
server_id.vnn = strtoul(argv[0], NULL, 0);
server_id.type = strtoul(argv[1], NULL, 0);
server_id.server_id = strtoul(argv[2], NULL, 0);
ret = ctdb_ctrl_check_server_id(ctdb, TIMELIMIT(), options.vnn, &server_id, &status);
if (ret != 0) {
DEBUG(0, ("Unable to check server_id from node %u\n", options.vnn));
return ret;
}
if (status) {
printf("Server id %d:%d:%d EXISTS\n", server_id.vnn, server_id.type, server_id.server_id);
} else {
printf("Server id %d:%d:%d does NOT exist\n", server_id.vnn, server_id.type, server_id.server_id);
}
return 0;
}
/*
get a list of all server ids that are registered on a node
*/
static int getsrvids(struct ctdb_context *ctdb, int argc, const char **argv)
{
int i, ret;
struct ctdb_server_id_list *server_ids;
ret = ctdb_ctrl_get_server_id_list(ctdb, ctdb, TIMELIMIT(), options.vnn, &server_ids);
if (ret != 0) {
DEBUG(0, ("Unable to get server_id list from node %u\n", options.vnn));
return ret;
}
for (i=0; i<server_ids->num; i++) {
printf("Server id %d:%d:%d\n",
server_ids->server_ids[i].vnn,
server_ids->server_ids[i].type,
server_ids->server_ids[i].server_id);
}
return -1;
}
/*
send a tcp tickle ack
*/
@ -544,7 +648,7 @@ static int control_ban(struct ctdb_context *ctdb, int argc, const char **argv)
ban_time = strtoul(argv[0], NULL, 0);
ret = ctdb_ctrl_getrecmaster(ctdb, TIMELIMIT(), options.vnn, &recmaster);
ret = ctdb_ctrl_getrecmaster(ctdb, ctdb, TIMELIMIT(), options.vnn, &recmaster);
if (ret != 0) {
DEBUG(0,("Failed to find the recmaster\n"));
return -1;
@ -575,7 +679,7 @@ static int control_unban(struct ctdb_context *ctdb, int argc, const char **argv)
uint32_t recmaster;
TDB_DATA data;
ret = ctdb_ctrl_getrecmaster(ctdb, TIMELIMIT(), options.vnn, &recmaster);
ret = ctdb_ctrl_getrecmaster(ctdb, ctdb, TIMELIMIT(), options.vnn, &recmaster);
if (ret != 0) {
DEBUG(0,("Failed to find the recmaster\n"));
return -1;
@ -964,6 +1068,11 @@ static const struct {
{ "killtcp", kill_tcp, false, "kill a tcp connection.", "<srcip:port> <dstip:port>" },
{ "tickle", tickle_tcp, false, "send a tcp tickle ack", "<srcip:port> <dstip:port>" },
{ "gettickles", control_get_tickles, false, "get the list of tickles registered for this vnn", "<vnn>" },
{ "regsrvid", regsrvid, false, "register a server id", "<vnn> <type> <id>" },
{ "unregsrvid", unregsrvid, false, "unregister a server id", "<vnn> <type> <id>" },
{ "chksrvid", chksrvid, false, "check if a server id exists", "<vnn> <type> <id>" },
{ "getsrvids", getsrvids, false, "get a list of all server ids"},
};
/*