1
0
mirror of https://github.com/samba-team/samba.git synced 2025-01-08 21:18:16 +03:00

Merge commit 'origin/master' into for-ronnie

(This used to be ctdb commit 666c3835376cd6b66aeaa110c76ecf052cd71a0a)
This commit is contained in:
Martin Schwenke 2008-09-12 11:26:25 +10:00
commit c1b622f359
44 changed files with 2678 additions and 1046 deletions

View File

@ -56,7 +56,8 @@ CTDB_SERVER_OBJ = server/ctdbd.o server/ctdb_daemon.o server/ctdb_lockwait.o \
server/ctdb_keepalive.o server/ctdb_logging.o server/ctdb_uptime.c \
$(CTDB_CLIENT_OBJ) $(CTDB_TCP_OBJ) @INFINIBAND_WRAPPER_OBJ@
TEST_BINS=bin/ctdb_bench bin/ctdb_fetch bin/ctdb_store bin/ctdb_randrec bin/ctdb_persistent bin/ctdb_traverse bin/rb_test \
TEST_BINS=bin/ctdb_bench bin/ctdb_fetch bin/ctdb_store bin/ctdb_randrec bin/ctdb_persistent \
bin/ctdb_traverse bin/rb_test bin/ctdb_transaction \
@INFINIBAND_BINS@
BINS = bin/ctdb @CTDB_SCSI_IO@ bin/ctdb_ipmux bin/smnotify
@ -141,6 +142,10 @@ bin/ctdb_persistent: $(CTDB_CLIENT_OBJ) tests/ctdb_persistent.o
@echo Linking $@
@$(CC) $(CFLAGS) -o $@ tests/ctdb_persistent.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
bin/ctdb_transaction: $(CTDB_CLIENT_OBJ) tests/ctdb_transaction.o
@echo Linking $@
@$(CC) $(CFLAGS) -o $@ tests/ctdb_transaction.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
bin/ibwrapper_test: $(CTDB_CLIENT_OBJ) ib/ibwrapper_test.o
@echo Linking $@
@$(CC) $(CFLAGS) -o $@ ib/ibwrapper_test.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)

View File

@ -1231,29 +1231,6 @@ int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, struct timeval timeout, uint32
return 0;
}
/*
get the reclock filename
*/
int ctdb_ctrl_getreclock(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
TALLOC_CTX *mem_ctx, const char **reclock)
{
int ret;
TDB_DATA outdata;
int32_t res;
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_GET_RECLOCK_FILE, 0, tdb_null,
mem_ctx, &outdata, &res, &timeout, NULL);
if (ret != 0 || res != 0) {
DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getreclock failed\n"));
return -1;
}
*reclock = (const char *)talloc_steal(mem_ctx, outdata.dptr);
return 0;
}
/*
get a list of nodes (vnn and flags ) from a remote node
*/
@ -1787,6 +1764,11 @@ static void traverse_handler(struct ctdb_context *ctdb, uint64_t srvid, TDB_DATA
return;
}
if (data.dsize == sizeof(struct ctdb_ltdb_header)) {
/* empty records are deleted records in ctdb */
return;
}
if (state->fn(ctdb, key, data, state->private_data) != 0) {
state->done = True;
}
@ -1847,6 +1829,7 @@ int ctdb_traverse(struct ctdb_db_context *ctdb_db, ctdb_traverse_func fn, void *
return state.count;
}
#define ISASCII(x) ((x>31)&&(x<128))
/*
called on each key during a catdb
*/
@ -1861,7 +1844,7 @@ static int dumpdb_fn(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA data, voi
fprintf(f, "key(%u) = \"", (unsigned)key.dsize);
for (i=0;i<key.dsize;i++) {
if (isascii(key.dptr[i])) {
if (ISASCII(key.dptr[i])) {
fprintf(f, "%c", key.dptr[i]);
} else {
fprintf(f, "\\%02X", key.dptr[i]);
@ -1871,7 +1854,7 @@ static int dumpdb_fn(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA data, voi
fprintf(f, "data(%u) = \"", (unsigned)data.dsize);
for (i=sizeof(*h);i<data.dsize;i++) {
if (isascii(data.dptr[i])) {
if (ISASCII(data.dptr[i])) {
fprintf(f, "%c", data.dptr[i]);
} else {
fprintf(f, "\\%02X", data.dptr[i]);
@ -2439,15 +2422,15 @@ int ctdb_ctrl_gratious_arp(struct ctdb_context *ctdb,
int ctdb_ctrl_get_tcp_tickles(struct ctdb_context *ctdb,
struct timeval timeout, uint32_t destnode,
TALLOC_CTX *mem_ctx,
struct sockaddr_in *ip,
ctdb_sock_addr *addr,
struct ctdb_control_tcp_tickle_list **list)
{
int ret;
TDB_DATA data, outdata;
int32_t status;
data.dptr = (uint8_t*)ip;
data.dsize = sizeof(struct sockaddr_in);
data.dptr = (uint8_t*)addr;
data.dsize = sizeof(ctdb_sock_addr);
ret = ctdb_control(ctdb, destnode, 0,
CTDB_CONTROL_GET_TCP_TICKLE_LIST, 0, data,
@ -2947,3 +2930,371 @@ int ctdb_ctrl_getcapabilities(struct ctdb_context *ctdb, struct timeval timeout,
talloc_free(tmp_ctx);
return ret;
}
struct ctdb_transaction_handle {
struct ctdb_db_context *ctdb_db;
bool in_replay;
/* we store the reads and writes done under a transaction one
list stores both reads and writes, the other just writes
*/
struct ctdb_marshall_buffer *m_all;
struct ctdb_marshall_buffer *m_write;
};
/* start a transaction on a database */
static int ctdb_transaction_destructor(struct ctdb_transaction_handle *h)
{
tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
return 0;
}
/* start a transaction on a database */
static int ctdb_transaction_fetch_start(struct ctdb_transaction_handle *h)
{
struct ctdb_record_handle *rh;
TDB_DATA key;
struct ctdb_ltdb_header header;
TALLOC_CTX *tmp_ctx;
const char *keyname = CTDB_TRANSACTION_LOCK_KEY;
int ret;
struct ctdb_db_context *ctdb_db = h->ctdb_db;
key.dptr = discard_const(keyname);
key.dsize = strlen(keyname);
if (!ctdb_db->persistent) {
DEBUG(DEBUG_ERR,(__location__ " Attempted transaction on non-persistent database\n"));
return -1;
}
again:
tmp_ctx = talloc_new(h);
rh = ctdb_fetch_lock(ctdb_db, tmp_ctx, key, NULL);
if (rh == NULL) {
DEBUG(DEBUG_ERR,(__location__ " Failed to fetch_lock database\n"));
talloc_free(tmp_ctx);
return -1;
}
talloc_free(rh);
ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
if (ret != 0) {
DEBUG(DEBUG_ERR,(__location__ " Failed to start tdb transaction\n"));
talloc_free(tmp_ctx);
return -1;
}
ret = ctdb_ltdb_fetch(ctdb_db, key, &header, tmp_ctx, NULL);
if (ret != 0 || header.dmaster != ctdb_db->ctdb->pnn) {
tdb_transaction_cancel(ctdb_db->ltdb->tdb);
talloc_free(tmp_ctx);
goto again;
}
talloc_free(tmp_ctx);
return 0;
}
/* start a transaction on a database */
struct ctdb_transaction_handle *ctdb_transaction_start(struct ctdb_db_context *ctdb_db,
TALLOC_CTX *mem_ctx)
{
struct ctdb_transaction_handle *h;
int ret;
h = talloc_zero(mem_ctx, struct ctdb_transaction_handle);
if (h == NULL) {
DEBUG(DEBUG_ERR,(__location__ " oom for transaction handle\n"));
return NULL;
}
h->ctdb_db = ctdb_db;
ret = ctdb_transaction_fetch_start(h);
if (ret != 0) {
talloc_free(h);
return NULL;
}
talloc_set_destructor(h, ctdb_transaction_destructor);
return h;
}
/*
fetch a record inside a transaction
*/
int ctdb_transaction_fetch(struct ctdb_transaction_handle *h,
TALLOC_CTX *mem_ctx,
TDB_DATA key, TDB_DATA *data)
{
struct ctdb_ltdb_header header;
int ret;
ZERO_STRUCT(header);
ret = ctdb_ltdb_fetch(h->ctdb_db, key, &header, mem_ctx, data);
if (ret == -1 && header.dmaster == (uint32_t)-1) {
/* record doesn't exist yet */
*data = tdb_null;
ret = 0;
}
if (ret != 0) {
return ret;
}
if (!h->in_replay) {
h->m_all = ctdb_marshall_add(h, h->m_all, h->ctdb_db->db_id, 1, key, NULL, *data);
if (h->m_all == NULL) {
DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
return -1;
}
}
return 0;
}
/*
stores a record inside a transaction
*/
int ctdb_transaction_store(struct ctdb_transaction_handle *h,
TDB_DATA key, TDB_DATA data)
{
TALLOC_CTX *tmp_ctx = talloc_new(h);
struct ctdb_ltdb_header header;
TDB_DATA olddata;
int ret;
ZERO_STRUCT(header);
/* we need the header so we can update the RSN */
ret = ctdb_ltdb_fetch(h->ctdb_db, key, &header, tmp_ctx, &olddata);
if (ret == -1 && header.dmaster == (uint32_t)-1) {
/* the record doesn't exist - create one with us as dmaster.
This is only safe because we are in a transaction and this
is a persistent database */
header.dmaster = h->ctdb_db->ctdb->pnn;
header.rsn = 0;
} else if (ret != 0) {
DEBUG(DEBUG_ERR,(__location__ " Failed to fetch record\n"));
talloc_free(tmp_ctx);
return ret;
}
if (data.dsize == olddata.dsize &&
memcmp(data.dptr, olddata.dptr, data.dsize) == 0) {
/* save writing the same data */
talloc_free(tmp_ctx);
return 0;
}
header.rsn++;
if (!h->in_replay) {
h->m_all = ctdb_marshall_add(h, h->m_all, h->ctdb_db->db_id, 0, key, NULL, data);
if (h->m_all == NULL) {
DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
talloc_free(tmp_ctx);
return -1;
}
}
h->m_write = ctdb_marshall_add(h, h->m_write, h->ctdb_db->db_id, 0, key, &header, data);
if (h->m_write == NULL) {
DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
talloc_free(tmp_ctx);
return -1;
}
ret = ctdb_ltdb_store(h->ctdb_db, key, &header, data);
talloc_free(tmp_ctx);
return ret;
}
/*
replay a transaction
*/
static int ctdb_replay_transaction(struct ctdb_transaction_handle *h)
{
int ret, i;
struct ctdb_rec_data *rec = NULL;
h->in_replay = true;
talloc_free(h->m_write);
h->m_write = NULL;
ret = ctdb_transaction_fetch_start(h);
if (ret != 0) {
return ret;
}
for (i=0;i<h->m_all->count;i++) {
TDB_DATA key, data;
rec = ctdb_marshall_loop_next(h->m_all, rec, NULL, NULL, &key, &data);
if (rec == NULL) {
DEBUG(DEBUG_ERR, (__location__ " Out of records in ctdb_replay_transaction?\n"));
goto failed;
}
if (rec->reqid == 0) {
/* its a store */
if (ctdb_transaction_store(h, key, data) != 0) {
goto failed;
}
} else {
TDB_DATA data2;
TALLOC_CTX *tmp_ctx = talloc_new(h);
if (ctdb_transaction_fetch(h, tmp_ctx, key, &data2) != 0) {
talloc_free(tmp_ctx);
goto failed;
}
if (data2.dsize != data.dsize ||
memcmp(data2.dptr, data.dptr, data.dsize) != 0) {
/* the record has changed on us - we have to give up */
talloc_free(tmp_ctx);
goto failed;
}
talloc_free(tmp_ctx);
}
}
return 0;
failed:
tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
return -1;
}
/*
commit a transaction
*/
int ctdb_transaction_commit(struct ctdb_transaction_handle *h)
{
int ret, retries=0;
int32_t status;
struct ctdb_context *ctdb = h->ctdb_db->ctdb;
struct timeval timeout;
enum ctdb_controls failure_control = CTDB_CONTROL_TRANS2_ERROR;
talloc_set_destructor(h, NULL);
/* our commit strategy is quite complex.
- we first try to commit the changes to all other nodes
- if that works, then we commit locally and we are done
- if a commit on another node fails, then we need to cancel
the transaction, then restart the transaction (thus
opening a window of time for a pending recovery to
complete), then replay the transaction, checking all the
reads and writes (checking that reads give the same data,
and writes succeed). Then we retry the transaction to the
other nodes
*/
again:
if (h->m_write == NULL) {
/* no changes were made */
tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
talloc_free(h);
return 0;
}
/* tell ctdbd to commit to the other nodes */
timeout = timeval_current_ofs(1, 0);
ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
retries==0?CTDB_CONTROL_TRANS2_COMMIT:CTDB_CONTROL_TRANS2_COMMIT_RETRY, 0,
ctdb_marshall_finish(h->m_write), NULL, NULL, &status,
&timeout, NULL);
if (ret != 0 || status != 0) {
tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
sleep(1);
if (ret != 0) {
failure_control = CTDB_CONTROL_TRANS2_ERROR;
} else {
/* work out what error code we will give if we
have to fail the operation */
switch ((enum ctdb_trans2_commit_error)status) {
case CTDB_TRANS2_COMMIT_SUCCESS:
case CTDB_TRANS2_COMMIT_SOMEFAIL:
case CTDB_TRANS2_COMMIT_TIMEOUT:
failure_control = CTDB_CONTROL_TRANS2_ERROR;
break;
case CTDB_TRANS2_COMMIT_ALLFAIL:
failure_control = CTDB_CONTROL_TRANS2_FINISHED;
break;
}
}
if (++retries == 10) {
DEBUG(DEBUG_ERR,(__location__ " Giving up transaction on db 0x%08x after %d retries failure_control=%u\n",
h->ctdb_db->db_id, retries, (unsigned)failure_control));
ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
failure_control, CTDB_CTRL_FLAG_NOREPLY,
tdb_null, NULL, NULL, NULL, NULL, NULL);
talloc_free(h);
return -1;
}
if (ctdb_replay_transaction(h) != 0) {
DEBUG(DEBUG_ERR,(__location__ " Failed to replay transaction\n"));
ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
failure_control, CTDB_CTRL_FLAG_NOREPLY,
tdb_null, NULL, NULL, NULL, NULL, NULL);
talloc_free(h);
return -1;
}
goto again;
} else {
failure_control = CTDB_CONTROL_TRANS2_ERROR;
}
/* do the real commit locally */
ret = tdb_transaction_commit(h->ctdb_db->ltdb->tdb);
if (ret != 0) {
DEBUG(DEBUG_ERR,(__location__ " Failed to commit transaction\n"));
ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
failure_control, CTDB_CTRL_FLAG_NOREPLY,
tdb_null, NULL, NULL, NULL, NULL, NULL);
talloc_free(h);
return ret;
}
/* tell ctdbd that we are finished with our local commit */
ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
CTDB_CONTROL_TRANS2_FINISHED, CTDB_CTRL_FLAG_NOREPLY,
tdb_null, NULL, NULL, NULL, NULL, NULL);
talloc_free(h);
return 0;
}
/*
recovery daemon ping to main daemon
*/
int ctdb_ctrl_recd_ping(struct ctdb_context *ctdb)
{
int ret;
int32_t res;
ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_RECD_PING, 0, tdb_null,
ctdb, NULL, &res, NULL, NULL);
if (ret != 0 || res != 0) {
DEBUG(DEBUG_ERR,("Failed to send recd ping\n"));
return -1;
}
return 0;
}

View File

@ -157,65 +157,6 @@ int ctdb_ltdb_store(struct ctdb_db_context *ctdb_db, TDB_DATA key,
return ret;
}
/*
write a record to a persistent database
this is done by a child process
*/
int ctdb_ltdb_persistent_store(struct ctdb_db_context *ctdb_db, TDB_DATA key,
struct ctdb_ltdb_header *header, TDB_DATA data)
{
struct ctdb_context *ctdb = ctdb_db->ctdb;
TDB_DATA rec;
int ret;
if (ctdb->flags & CTDB_FLAG_TORTURE) {
struct ctdb_ltdb_header *h2;
rec = tdb_fetch(ctdb_db->ltdb->tdb, key);
h2 = (struct ctdb_ltdb_header *)rec.dptr;
if (rec.dptr && rec.dsize >= sizeof(h2) && h2->rsn > header->rsn) {
DEBUG(DEBUG_CRIT,("RSN regression! %llu %llu\n",
(unsigned long long)h2->rsn, (unsigned long long)header->rsn));
}
if (rec.dptr) free(rec.dptr);
}
rec.dsize = sizeof(*header) + data.dsize;
rec.dptr = talloc_size(ctdb, rec.dsize);
CTDB_NO_MEMORY(ctdb, rec.dptr);
memcpy(rec.dptr, header, sizeof(*header));
memcpy(rec.dptr + sizeof(*header), data.dptr, data.dsize);
/* if this is a persistent database without NOSYNC then we
will do this via a transaction */
if (!(ctdb_db->client_tdb_flags & TDB_NOSYNC)) {
ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
if (ret != 0) {
DEBUG(DEBUG_ERR, (__location__ " Failed to start local transaction\n"));
goto failed;
}
ret = tdb_store(ctdb_db->ltdb->tdb, key, rec, TDB_REPLACE);
if (ret != 0) {
DEBUG(DEBUG_ERR, (__location__ " Failed to store persistent data\n"));
tdb_transaction_cancel(ctdb_db->ltdb->tdb);
goto failed;
}
ret = tdb_transaction_commit(ctdb_db->ltdb->tdb);
if (ret != 0) {
DEBUG(DEBUG_ERR, (__location__ " Failed to commit persistent store transaction.\n"));
tdb_transaction_cancel(ctdb_db->ltdb->tdb);
goto failed;
}
} else {
ret = tdb_store(ctdb_db->ltdb->tdb, key, rec, TDB_REPLACE);
}
failed:
talloc_free(rec.dptr);
return ret;
}
/*
lock a record in the ltdb, given a key
*/

View File

@ -147,7 +147,7 @@ void *_ctdb_reqid_find(struct ctdb_context *ctdb, uint32_t reqid, const char *ty
p = _idr_find_type(ctdb->idr, (reqid>>16)&0xFFFF, type, location);
if (p == NULL) {
DEBUG(DEBUG_ERR, ("Could not find idr:%u\n",reqid));
DEBUG(DEBUG_WARNING, ("Could not find idr:%u\n",reqid));
}
return p;
@ -200,6 +200,106 @@ struct ctdb_rec_data *ctdb_marshall_record(TALLOC_CTX *mem_ctx, uint32_t reqid,
return d;
}
/* helper function for marshalling multiple records */
struct ctdb_marshall_buffer *ctdb_marshall_add(TALLOC_CTX *mem_ctx,
struct ctdb_marshall_buffer *m,
uint64_t db_id,
uint32_t reqid,
TDB_DATA key,
struct ctdb_ltdb_header *header,
TDB_DATA data)
{
struct ctdb_rec_data *r;
size_t m_size, r_size;
struct ctdb_marshall_buffer *m2;
r = ctdb_marshall_record(mem_ctx, reqid, key, header, data);
if (r == NULL) {
talloc_free(m);
return NULL;
}
if (m == NULL) {
m = talloc_zero_size(mem_ctx, offsetof(struct ctdb_marshall_buffer, data));
if (m == NULL) {
return NULL;
}
m->db_id = db_id;
}
m_size = talloc_get_size(m);
r_size = talloc_get_size(r);
m2 = talloc_realloc_size(mem_ctx, m, m_size + r_size);
if (m2 == NULL) {
talloc_free(m);
return NULL;
}
memcpy(m_size + (uint8_t *)m2, r, r_size);
talloc_free(r);
m2->count++;
return m2;
}
/* we've finished marshalling, return a data blob with the marshalled records */
TDB_DATA ctdb_marshall_finish(struct ctdb_marshall_buffer *m)
{
TDB_DATA data;
data.dptr = (uint8_t *)m;
data.dsize = talloc_get_size(m);
return data;
}
/*
loop over a marshalling buffer
- pass r==NULL to start
- loop the number of times indicated by m->count
*/
struct ctdb_rec_data *ctdb_marshall_loop_next(struct ctdb_marshall_buffer *m, struct ctdb_rec_data *r,
uint32_t *reqid,
struct ctdb_ltdb_header *header,
TDB_DATA *key, TDB_DATA *data)
{
if (r == NULL) {
r = (struct ctdb_rec_data *)&m->data[0];
} else {
r = (struct ctdb_rec_data *)(r->length + (uint8_t *)r);
}
if (reqid != NULL) {
*reqid = r->reqid;
}
if (key != NULL) {
key->dptr = &r->data[0];
key->dsize = r->keylen;
}
if (data != NULL) {
data->dptr = &r->data[r->keylen];
data->dsize = r->datalen;
if (header != NULL) {
data->dptr += sizeof(*header);
data->dsize -= sizeof(*header);
}
}
if (header != NULL) {
if (r->datalen < sizeof(*header)) {
return NULL;
}
*header = *(struct ctdb_ltdb_header *)&r->data[r->keylen];
}
return r;
}
#if HAVE_SCHED_H
#include <sched.h>
#endif
@ -262,40 +362,6 @@ void set_close_on_exec(int fd)
}
/*
parse a ip:num pair with the given separator
*/
static bool parse_ip_num(const char *s, struct in_addr *addr, unsigned *num, const char sep)
{
const char *p;
char *endp = NULL;
char buf[16];
p = strchr(s, sep);
if (p == NULL) {
return false;
}
if (p - s > 15) {
return false;
}
*num = strtoul(p+1, &endp, 10);
if (endp == NULL || *endp != 0) {
/* trailing garbage */
return false;
}
strlcpy(buf, s, 1+p-s);
if (inet_aton(buf, addr) == 0) {
return false;
}
return true;
}
static bool parse_ipv4(const char *s, unsigned port, ctdb_sock_addr *saddr)
{
saddr->ip.sin_family = AF_INET;
@ -392,45 +458,91 @@ bool parse_ip(const char *addr, ctdb_sock_addr *saddr)
/*
parse a ip/mask pair
*/
bool parse_ip_mask(const char *s, struct sockaddr_in *ip, unsigned *mask)
bool parse_ip_mask(const char *str, ctdb_sock_addr *addr, unsigned *mask)
{
ZERO_STRUCT(*ip);
TALLOC_CTX *tmp_ctx = talloc_new(NULL);
char *s, *p;
char *endp = NULL;
bool ret;
if (!parse_ip_num(s, &ip->sin_addr, mask, '/')) {
ZERO_STRUCT(*addr);
s = talloc_strdup(tmp_ctx, str);
if (s == NULL) {
DEBUG(DEBUG_ERR, (__location__ " Failed strdup()\n"));
talloc_free(tmp_ctx);
return false;
}
if (*mask > 32) {
p = rindex(s, '/');
if (p == NULL) {
DEBUG(DEBUG_ERR, (__location__ " This addr: %s does not contain a mask\n", s));
talloc_free(tmp_ctx);
return false;
}
ip->sin_family = AF_INET;
ip->sin_port = 0;
return true;
*mask = strtoul(p+1, &endp, 10);
if (endp == NULL || *endp != 0) {
/* trailing garbage */
DEBUG(DEBUG_ERR, (__location__ " Trailing garbage after the mask in %s\n", s));
talloc_free(tmp_ctx);
return false;
}
*p = 0;
/* now is this a ipv4 or ipv6 address ?*/
p = index(s, ':');
if (p == NULL) {
ret = parse_ipv4(s, 0, addr);
} else {
ret = parse_ipv6(s, 0, addr);
}
talloc_free(tmp_ctx);
return ret;
}
/*
compare two sockaddr_in structures - matching only on IP
*/
bool ctdb_same_ipv4(const struct sockaddr_in *ip1, const struct sockaddr_in *ip2)
This is used to canonicalize a ctdb_sock_addr structure.
*/
void ctdb_canonicalize_ip(const ctdb_sock_addr *ip, ctdb_sock_addr *cip)
{
return ip1->sin_family == ip2->sin_family &&
ip1->sin_addr.s_addr == ip2->sin_addr.s_addr;
char prefix[12] = { 0,0,0,0,0,0,0,0,0,0,0xff,0xff };
memcpy(cip, ip, sizeof (*cip));
if ( (ip->sa.sa_family == AF_INET6)
&& !memcmp(&ip->ip6.sin6_addr, prefix, 12)) {
memset(cip, 0, sizeof(*cip));
#ifdef HAVE_SOCK_SIN_LEN
cip->ip.sin_len = sizeof(*cip);
#endif
cip->ip.sin_family = AF_INET;
cip->ip.sin_port = ip->ip6.sin6_port;
memcpy(&cip->ip.sin_addr, &ip->ip6.sin6_addr.s6_addr32[3], 4);
}
}
bool ctdb_same_ip(ctdb_sock_addr *ip1, ctdb_sock_addr *ip2)
bool ctdb_same_ip(const ctdb_sock_addr *tip1, const ctdb_sock_addr *tip2)
{
if (ip1->sa.sa_family != ip2->sa.sa_family) {
ctdb_sock_addr ip1, ip2;
ctdb_canonicalize_ip(tip1, &ip1);
ctdb_canonicalize_ip(tip2, &ip2);
if (ip1.sa.sa_family != ip2.sa.sa_family) {
return false;
}
switch (ip1->sa.sa_family) {
switch (ip1.sa.sa_family) {
case AF_INET:
return ip1->ip.sin_addr.s_addr == ip2->ip.sin_addr.s_addr;
return ip1.ip.sin_addr.s_addr == ip2.ip.sin_addr.s_addr;
case AF_INET6:
return !memcmp(&ip1->ip6.sin6_addr.s6_addr[0],
&ip2->ip6.sin6_addr.s6_addr[0],
return !memcmp(&ip1.ip6.sin6_addr.s6_addr[0],
&ip2.ip6.sin6_addr.s6_addr[0],
16);
default:
DEBUG(DEBUG_ERR, (__location__ " CRITICAL Can not compare sockaddr structures of type %u\n", ip1->sa.sa_family));
DEBUG(DEBUG_ERR, (__location__ " CRITICAL Can not compare sockaddr structures of type %u\n", ip1.sa.sa_family));
return false;
}
@ -438,13 +550,30 @@ bool ctdb_same_ip(ctdb_sock_addr *ip1, ctdb_sock_addr *ip2)
}
/*
compare two sockaddr_in structures
compare two ctdb_sock_addr structures
*/
bool ctdb_same_sockaddr(const struct sockaddr_in *ip1, const struct sockaddr_in *ip2)
bool ctdb_same_sockaddr(const ctdb_sock_addr *ip1, const ctdb_sock_addr *ip2)
{
return ctdb_same_ipv4(ip1, ip2) && ip1->sin_port == ip2->sin_port;
return ctdb_same_ip(ip1, ip2) && ip1->ip.sin_port == ip2->ip.sin_port;
}
char *ctdb_addr_to_str(ctdb_sock_addr *addr)
{
static char cip[128] = "";
switch (addr->sa.sa_family) {
case AF_INET:
inet_ntop(addr->ip.sin_family, &addr->ip.sin_addr, cip, sizeof(cip));
break;
case AF_INET6:
inet_ntop(addr->ip6.sin6_family, &addr->ip6.sin6_addr, cip, sizeof(cip));
break;
default:
DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family %u\n", addr->sa.sa_family));
}
return cip;
}
void ctdb_block_signal(int signum)

View File

@ -194,17 +194,17 @@ int ctdb_sys_send_tcp(const ctdb_sock_addr *dest,
we try to bind to it, and if that fails then we don't have that IP
on an interface
*/
bool ctdb_sys_have_ip(struct sockaddr_in ip)
bool ctdb_sys_have_ip(ctdb_sock_addr *addr)
{
int s;
int ret;
ip.sin_port = 0;
s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
addr->ip.sin_port = 0;
s = socket(addr->sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
if (s == -1) {
return false;
}
ret = bind(s, (struct sockaddr *)&ip, sizeof(ip));
ret = bind(s, (struct sockaddr *)addr, sizeof(ctdb_sock_addr));
close(s);
return ret == 0;
}
@ -306,12 +306,13 @@ static int aix_get_mac_addr(const char *device_name, uint8_t mac[6])
}
int ctdb_sys_read_tcp_packet(int s, void *private_data,
struct sockaddr_in *src, struct sockaddr_in *dst,
ctdb_sock_addr *src, ctdb_sock_addr *dst,
uint32_t *ack_seq, uint32_t *seq)
{
int ret;
struct ether_header *eth;
struct ip *ip;
struct ip6_hdr *ip6;
struct tcphdr *tcp;
struct ctdb_killtcp_connection *conn;
struct pcap_pkthdr pkthdr;
@ -326,44 +327,75 @@ int ctdb_sys_read_tcp_packet(int s, void *private_data,
/* Ethernet */
eth = (struct ether_header *)buffer;
/* We are only interested in IP packets */
if (eth->ether_type != htons(ETHERTYPE_IP)) {
return -1;
}
/* we want either IPv4 or IPv6 */
if (eth->ether_type == htons(ETHERTYPE_IP)) {
/* IP */
ip = (struct ip *)(eth+1);
/* IP */
ip = (struct ip *)(eth+1);
/* We only want IPv4 packets */
if (ip->ip_v != 4) {
return -1;
}
/* Dont look at fragments */
if ((ntohs(ip->ip_off)&0x1fff) != 0) {
return -1;
}
/* we only want TCP */
if (ip->ip_p != IPPROTO_TCP) {
return -1;
}
/* We only want IPv4 packets */
if (ip->ip_v != 4) {
return -1;
}
/* Dont look at fragments */
if ((ntohs(ip->ip_off)&0x1fff) != 0) {
return -1;
}
/* we only want TCP */
if (ip->ip_p != IPPROTO_TCP) {
return -1;
}
/* make sure its not a short packet */
if (offsetof(struct tcphdr, th_ack) + 4 +
(ip->ip_hl*4) > ret) {
return -1;
}
/* TCP */
tcp = (struct tcphdr *)((ip->ip_hl*4) + (char *)ip);
/* make sure its not a short packet */
if (offsetof(struct tcphdr, th_ack) + 4 +
(ip->ip_hl*4) > ret) {
return -1;
}
/* TCP */
tcp = (struct tcphdr *)((ip->ip_hl*4) + (char *)ip);
/* tell the caller which one we've found */
src->sin_addr.s_addr = ip->ip_src.s_addr;
src->sin_port = tcp->th_sport;
dst->sin_addr.s_addr = ip->ip_dst.s_addr;
dst->sin_port = tcp->th_dport;
*ack_seq = tcp->th_ack;
*seq = tcp->th_seq;
/* tell the caller which one we've found */
src->ip.sin_family = AF_INET;
src->ip.sin_addr.s_addr = ip->ip_src.s_addr;
src->ip.sin_port = tcp->th_sport;
dst->ip.sin_family = AF_INET;
dst->ip.sin_addr.s_addr = ip->ip_dst.s_addr;
dst->ip.sin_port = tcp->th_dport;
*ack_seq = tcp->th_ack;
*seq = tcp->th_seq;
return 0;
return 0;
#ifndef ETHERTYPE_IP6
#define ETHERTYPE_IP6 0x86dd
#endif
} else if (eth->ether_type == htons(ETHERTYPE_IP6)) {
/* IP6 */
ip6 = (struct ip6_hdr *)(eth+1);
/* we only want TCP */
if (ip6->ip6_nxt != IPPROTO_TCP) {
return -1;
}
/* TCP */
tcp = (struct tcphdr *)(ip6+1);
/* tell the caller which one we've found */
src->ip6.sin6_family = AF_INET6;
src->ip6.sin6_port = tcp->th_sport;
src->ip6.sin6_addr = ip6->ip6_src;
dst->ip6.sin6_family = AF_INET6;
dst->ip6.sin6_port = tcp->th_dport;
dst->ip6.sin6_addr = ip6->ip6_dst;
*ack_seq = tcp->th_ack;
*seq = tcp->th_seq;
return 0;
}
return -1;
}

View File

@ -26,9 +26,58 @@
#include "lib/events/events.h"
#include <netinet/if_ether.h>
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
#include <net/if_arp.h>
#ifndef ETHERTYPE_IP6
#define ETHERTYPE_IP6 0x86dd
#endif
/*
uint16 checksum for n bytes
*/
static uint32_t uint16_checksum(uint16_t *data, size_t n)
{
uint32_t sum=0;
while (n>=2) {
sum += (uint32_t)ntohs(*data);
data++;
n -= 2;
}
if (n == 1) {
sum += (uint32_t)ntohs(*(uint8_t *)data);
}
return sum;
}
/*
calculate the tcp checksum for tcp over ipv6
*/
static uint16_t tcp_checksum6(uint16_t *data, size_t n, struct ip6_hdr *ip6)
{
uint32_t phdr[2];
uint32_t sum = 0;
uint16_t sum2;
sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_src, 16);
sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_dst, 16);
phdr[0] = htonl(n);
phdr[1] = htonl(ip6->ip6_nxt);
sum += uint16_checksum((uint16_t *)phdr, 8);
sum += uint16_checksum(data, n);
sum = (sum & 0xFFFF) + (sum >> 16);
sum = (sum & 0xFFFF) + (sum >> 16);
sum2 = htons(sum);
sum2 = ~sum2;
if (sum2 == 0) {
return 0xFFFF;
}
return sum2;
}
/*
send gratuitous arp reply after we have taken over an ip address
@ -42,8 +91,10 @@ int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
struct sockaddr sa;
struct ether_header *eh;
struct arphdr *ah;
struct ip6_hdr *ip6;
struct icmp6_hdr *icmp6;
struct ifreq if_hwaddr;
unsigned char buffer[64]; /*minimum eth frame size */
unsigned char buffer[78]; /* ipv6 neigh solicitation size */
char *ptr;
ZERO_STRUCT(sa);
@ -129,10 +180,68 @@ int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
return -1;
}
close(s);
break;
case AF_INET6:
s = socket(AF_INET, SOCK_PACKET, htons(ETHERTYPE_IP6));
if (s == -1){
DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket\n"));
return -1;
}
/* get the mac address */
strcpy(if_hwaddr.ifr_name, iface);
ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
if ( ret < 0 ) {
close(s);
DEBUG(DEBUG_CRIT,(__location__ " ioctl failed\n"));
return -1;
}
if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) {
DEBUG(DEBUG_DEBUG,("Ignoring loopback arp request\n"));
close(s);
return 0;
}
if (if_hwaddr.ifr_hwaddr.sa_family != AF_LOCAL) {
close(s);
errno = EINVAL;
DEBUG(DEBUG_CRIT,(__location__ " not an ethernet address family (0x%x)\n",
if_hwaddr.ifr_hwaddr.sa_family));
return -1;
}
memset(buffer, 0 , sizeof(buffer));
eh = (struct ether_header *)buffer;
memset(eh->ether_dhost, 0xff, ETH_ALEN);
memcpy(eh->ether_shost, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
eh->ether_type = htons(ETHERTYPE_IP6);
ip6 = (struct ip6_hdr *)(eh+1);
ip6->ip6_vfc = 0x60;
ip6->ip6_plen = htons(24);
ip6->ip6_nxt = IPPROTO_ICMPV6;
ip6->ip6_hlim = 255;
ip6->ip6_dst = addr->ip6.sin6_addr;
icmp6 = (struct icmp6_hdr *)(ip6+1);
icmp6->icmp6_type = ND_NEIGHBOR_SOLICIT;
icmp6->icmp6_code = 0;
memcpy(&icmp6->icmp6_data32[1], &addr->ip6.sin6_addr, 16);
icmp6->icmp6_cksum = tcp_checksum6((uint16_t *)icmp6, ntohs(ip6->ip6_plen), ip6);
strncpy(sa.sa_data, iface, sizeof(sa.sa_data));
ret = sendto(s, buffer, 78, 0, &sa, sizeof(sa));
if (ret < 0 ){
close(s);
DEBUG(DEBUG_CRIT,(__location__ " failed sendto\n"));
return -1;
}
close(s);
break;
default:
DEBUG(DEBUG_CRIT,(__location__ " not an ipv4 address (family is %u)\n", addr->ip.sin_family));
DEBUG(DEBUG_CRIT,(__location__ " not an ipv4/ipv6 address (family is %u)\n", addr->ip.sin_family));
return -1;
}
@ -140,23 +249,6 @@ int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface)
}
/*
uint16 checksum for n bytes
*/
static uint32_t uint16_checksum(uint16_t *data, size_t n)
{
uint32_t sum=0;
while (n>=2) {
sum += (uint32_t)ntohs(*data);
data++;
n -= 2;
}
if (n == 1) {
sum += (uint32_t)ntohs(*(uint8_t *)data);
}
return sum;
}
/*
simple TCP checksum - assumes data is multiple of 2 bytes long
*/
@ -179,29 +271,6 @@ static uint16_t tcp_checksum(uint16_t *data, size_t n, struct iphdr *ip)
return sum2;
}
/*
calculate the tcp checksum for tcp over ipv6
*/
static uint16_t tcp_checksum6(uint16_t *data, size_t n, struct ip6_hdr *ip6)
{
uint32_t sum = uint16_checksum(data, n);
uint16_t sum2;
sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_src, 16);
sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_dst, 16);
sum += ip6->ip6_plen;
sum += ip6->ip6_nxt;
sum = (sum & 0xFFFF) + (sum >> 16);
sum = (sum & 0xFFFF) + (sum >> 16);
sum2 = htons(sum);
sum2 = ~sum2;
if (sum2 == 0) {
return 0xFFFF;
}
return sum2;
}
/*
Send tcp segment from the specified IP/port to the specified
destination IP/port.
@ -285,7 +354,7 @@ int ctdb_sys_send_tcp(const ctdb_sock_addr *dest,
case AF_INET6:
ZERO_STRUCT(ip6pkt);
ip6pkt.ip6.ip6_vfc = 0x60;
ip6pkt.ip6.ip6_plen = 20;
ip6pkt.ip6.ip6_plen = htons(20);
ip6pkt.ip6.ip6_nxt = IPPROTO_TCP;
ip6pkt.ip6.ip6_hlim = 64;
ip6pkt.ip6.ip6_src = src->ip6.sin6_addr;
@ -344,17 +413,17 @@ int ctdb_sys_send_tcp(const ctdb_sock_addr *dest,
ifname, if non-NULL, will return the name of the interface this ip is tied to
*/
bool ctdb_sys_have_ip(struct sockaddr_in ip)
bool ctdb_sys_have_ip(ctdb_sock_addr *addr)
{
int s;
int ret;
ip.sin_port = 0;
s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
addr->ip.sin_port = 0;
s = socket(addr->sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
if (s == -1) {
return false;
}
ret = bind(s, (struct sockaddr *)&ip, sizeof(ip));
ret = bind(s, (struct sockaddr *)addr, sizeof(ctdb_sock_addr));
close(s);
return ret == 0;
@ -395,7 +464,7 @@ int ctdb_sys_close_capture_socket(void *private_data)
called when the raw socket becomes readable
*/
int ctdb_sys_read_tcp_packet(int s, void *private_data,
struct sockaddr_in *src, struct sockaddr_in *dst,
ctdb_sock_addr *src, ctdb_sock_addr *dst,
uint32_t *ack_seq, uint32_t *seq)
{
int ret;
@ -403,6 +472,7 @@ int ctdb_sys_read_tcp_packet(int s, void *private_data,
char pkt[RCVPKTSIZE];
struct ether_header *eth;
struct iphdr *ip;
struct ip6_hdr *ip6;
struct tcphdr *tcp;
ret = recv(s, pkt, RCVPKTSIZE, MSG_TRUNC);
@ -413,45 +483,71 @@ int ctdb_sys_read_tcp_packet(int s, void *private_data,
/* Ethernet */
eth = (struct ether_header *)pkt;
/* We only want IP packets */
if (ntohs(eth->ether_type) != ETHERTYPE_IP) {
return -1;
}
/* IP */
ip = (struct iphdr *)(eth+1);
/* we want either IPv4 or IPv6 */
if (ntohs(eth->ether_type) == ETHERTYPE_IP) {
/* IP */
ip = (struct iphdr *)(eth+1);
/* We only want IPv4 packets */
if (ip->version != 4) {
return -1;
}
/* Dont look at fragments */
if ((ntohs(ip->frag_off)&0x1fff) != 0) {
return -1;
}
/* we only want TCP */
if (ip->protocol != IPPROTO_TCP) {
return -1;
/* We only want IPv4 packets */
if (ip->version != 4) {
return -1;
}
/* Dont look at fragments */
if ((ntohs(ip->frag_off)&0x1fff) != 0) {
return -1;
}
/* we only want TCP */
if (ip->protocol != IPPROTO_TCP) {
return -1;
}
/* make sure its not a short packet */
if (offsetof(struct tcphdr, ack_seq) + 4 +
(ip->ihl*4) + sizeof(*eth) > ret) {
return -1;
}
/* TCP */
tcp = (struct tcphdr *)((ip->ihl*4) + (char *)ip);
/* tell the caller which one we've found */
src->ip.sin_family = AF_INET;
src->ip.sin_addr.s_addr = ip->saddr;
src->ip.sin_port = tcp->source;
dst->ip.sin_family = AF_INET;
dst->ip.sin_addr.s_addr = ip->daddr;
dst->ip.sin_port = tcp->dest;
*ack_seq = tcp->ack_seq;
*seq = tcp->seq;
return 0;
} else if (ntohs(eth->ether_type) == ETHERTYPE_IP6) {
/* IP6 */
ip6 = (struct ip6_hdr *)(eth+1);
/* we only want TCP */
if (ip6->ip6_nxt != IPPROTO_TCP) {
return -1;
}
/* TCP */
tcp = (struct tcphdr *)(ip6+1);
/* tell the caller which one we've found */
src->ip6.sin6_family = AF_INET6;
src->ip6.sin6_port = tcp->source;
src->ip6.sin6_addr = ip6->ip6_src;
dst->ip6.sin6_family = AF_INET6;
dst->ip6.sin6_port = tcp->dest;
dst->ip6.sin6_addr = ip6->ip6_dst;
*ack_seq = tcp->ack_seq;
*seq = tcp->seq;
return 0;
}
/* make sure its not a short packet */
if (offsetof(struct tcphdr, ack_seq) + 4 +
(ip->ihl*4) + sizeof(*eth) > ret) {
return -1;
}
/* TCP */
tcp = (struct tcphdr *)((ip->ihl*4) + (char *)ip);
/* tell the caller which one we've found */
src->sin_addr.s_addr = ip->saddr;
src->sin_port = tcp->source;
dst->sin_addr.s_addr = ip->daddr;
dst->sin_port = tcp->dest;
*ack_seq = tcp->ack_seq;
*seq = tcp->seq;
return 0;
return -1;
}

View File

@ -1,21 +1,21 @@
#!/bin/sh
#
##############################
# init info for redhat distros
# chkconfig: - 90 36
# description: Starts and stops the clustered tdb daemon
# pidfile: /var/run/ctdbd/ctdbd.pid
##############################
# ctdb: Starts the clustered tdb daemon
#
# chkconfig: - 90 36
#
# description: Starts and stops the clustered tdb daemon
# pidfile: /var/run/ctdbd/ctdbd.pid
#
##############################
# SLES/OpenSuSE init info
### BEGIN INIT INFO
# Provides: ctdb
# Required-Start: $network
# Provides: ctdb
# Required-Start: $network
# Required-Stop:
# Default-Start: 3 5
# Default-Stop:
# Description: initscript for the ctdb service
# Short-Description: start and stop ctdb service
# Description: initscript for the ctdb service
### END INIT INFO
# Source function library.
@ -188,7 +188,7 @@ case "$1" in
stop)
stop
;;
restart)
restart|reload)
restart
;;
status)

View File

@ -47,6 +47,13 @@
# since checking each one of them might take a long time.
# CTDB_SAMBA_SKIP_SHARE_CHECK="yes"
# should we skip checking smb.conf for consistency
# CTDB_SAMBA_SKIP_CONF_CHECK="yes"
# specify which ports we should check that there is a daemon listening to
# by default we use testparm and look in smb.conf to figure out.
# CTDB_SAMBA_CHECK_PORTS="445"
# should ctdb manage starting/stopping Winbind service?
# if left comented out then it will be autodetected based on smb.conf
# CTDB_MANAGES_WINBIND=yes

View File

@ -59,7 +59,6 @@ case $cmd in
}
/sbin/ip addr add $ip/$maskbits dev $iface || {
echo "Failed to add $ip/$maskbits on dev $iface"
exit 1
}
# cope with the script being killed while we have the interface blocked
iptables -D INPUT -i $iface -d $ip -j DROP 2> /dev/null
@ -147,6 +146,7 @@ case $cmd in
for IFACE in $INTERFACES ; do
case $IFACE in
bond*)
IFACE=`echo $IFACE |sed -e 's/\....$//'`
grep '^MII Status: up' /proc/net/bonding/$IFACE > /dev/null || {
echo "ERROR: public network interface $IFACE is down"
exit 1

View File

@ -17,10 +17,81 @@ shift
SAMBA_CLEANUP_PERIOD=10
}
# we keep a cached copy of smb.conf here
smbconf_cache="$CTDB_BASE/state/samba/smb.conf.cache"
#############################################
# update the smb.conf cache in the foreground
testparm_foreground_update() {
mkdir -p "$CTDB_BASE/state/samba" || exit 1
testparm -s 2> /dev/null | egrep -v 'registry.shares.=|include.=' > "$smbconf_cache"
}
#############################################
# update the smb.conf cache in the background
testparm_background_update() {
# if the cache doesn't exist, then update in the foreground
[ -f $smbconf_cache ] || {
testparm_foreground_update
}
# otherwise do a background update
(
tmpfile="${smbconf_cache}.$$"
testparm -s > $tmpfile 2> /dev/null &
# remember the pid of the teamparm process
pid="$!"
# give it 10 seconds to run
timeleft=10
while [ $timeleft -gt 0 ]; do
timeleft=$(($timeleft - 1))
# see if the process still exists
kill -0 $pid > /dev/null 2>&1 || {
# it doesn't exist, grab its exit status
wait $pid
[ $? = 0 ] || {
echo "50.samba: smb.conf background update exited with status $?"
rm -f "${tmpfile}"
exit 1
}
# put the new smb.conf contents in the cache (atomic rename)
# make sure we remove references to the registry while doing
# this to ensure that running testparm on the cache does
# not use the registry
egrep -v 'registry.shares.=|include.=' < "$tmpfile" > "${tmpfile}.2"
rm -f "$tmpfile"
mv -f "${tmpfile}.2" "$smbconf_cache" || {
echo "50.samba: failed to update background cache"
rm -f "${tmpfile}.2"
exit 1
}
exit 0
}
# keep waiting for testparm to finish
sleep 1
done
# it took more than 10 seconds - kill it off
rm -f "${tmpfile}"
kill -9 "$pid" > /dev/null 2>&1
echo "50.samba: timed out updating smbconf cache in background"
exit 1
) &
}
##################################################
# show the testparm output using a cached smb.conf
# to avoid registry access
testparm_cat() {
[ -f $smbconf_cache ] || {
testparm_foreground_update
}
testparm -s "$smbconf_cache" "$@" 2>/dev/null
}
# function to see if ctdb manages winbind
check_ctdb_manages_winbind() {
[ -z "$CTDB_MANAGES_WINBIND" ] && {
secmode=`testparm -s --parameter-name=security 2> /dev/null`
secmode=`testparm_cat --parameter-name=security`
case $secmode in
ADS|DOMAIN)
CTDB_MANAGES_WINBIND="yes";
@ -108,18 +179,27 @@ case $cmd in
touch $CTDB_BASE/state/samba/periodic_cleanup
}
testparm -s 2>&1 | egrep '^WARNING|^ERROR|^Unknown' && {
echo "ERROR: testparm shows smb.conf is not clean"
exit 1
testparm_background_update
testparm_cat | egrep '^WARNING|^ERROR|^Unknown' && {
testparm_foreground_update
testparm_cat | egrep '^WARNING|^ERROR|^Unknown' && {
echo "ERROR: testparm shows smb.conf is not clean"
exit 1
}
}
[ "$CTDB_SAMBA_SKIP_SHARE_CHECK" != "yes" ] && {
echo do da test
smb_dirs=`testparm -s 2> /dev/null | egrep '^[[:space:]]*path = ' | cut -d= -f2`
ctdb_check_directories "Samba" $smb_dirs
smb_dirs=`testparm_cat | egrep '^[[:space:]]*path = ' | cut -d= -f2`
ctdb_check_directories_probe "Samba" $smb_dirs || {
testparm_foreground_update
smb_dirs=`testparm_cat | egrep '^[[:space:]]*path = ' | cut -d= -f2`
ctdb_check_directories "Samba" $smb_dirs
}
smb_ports=`testparm -s --parameter-name="smb ports" 2> /dev/null`
smb_ports="$CTDB_SAMBA_CHECK_PORTS"
[ -z "$smb_ports" ] && {
smb_ports=`testparm_cat --parameter-name="smb ports"`
}
ctdb_check_tcp_ports "Samba" $smb_ports
# check winbind is OK

View File

@ -26,6 +26,9 @@ case $cmd in
iface=$1
ip=$2
shopt -s nullglob
# first send a grat arp, to ensure the client knows the updated
# mac address for this IP
ctdb gratiousarp $ip $iface
# send tickle acks for all the connections the old server had
for f in $NFS_TICKLE_SHARED_DIRECTORY/*/$ip; do
cat $f | while read dest; do

View File

@ -143,6 +143,22 @@ ctdb_check_rpc() {
}
}
######################################################
# check a set of directories is available
# return 0 on a missing directory
# usage: ctdb_check_directories_probe SERVICE_NAME <directories...>
######################################################
ctdb_check_directories_probe() {
service_name="$1"
shift
wait_dirs="$*"
[ -z "$wait_dirs" ] && return;
for d in $wait_dirs; do
[ -d $d ] || return 1
done
return 0
}
######################################################
# check a set of directories is available
# usage: ctdb_check_directories SERVICE_NAME <directories...>
@ -151,13 +167,10 @@ ctdb_check_directories() {
service_name="$1"
shift
wait_dirs="$*"
[ -z "$wait_dirs" ] && return;
for d in $wait_dirs; do
[ -d $d ] || {
echo "ERROR: $service_name directory $d not available"
exit 1
}
done
ctdb_check_directories_probe "$service_name" $wait_dirs || {
echo "ERROR: $service_name directory $d not available"
exit 1
}
}
######################################################
@ -211,16 +224,27 @@ kill_tcp_connections() {
_killcount=0
connfile="$CTDB_BASE/state/connections.$_IP"
netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile
netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile
while read dest src; do
srcip=`echo $src | cut -d: -f1`
srcport=`echo $src | cut -d: -f2`
destip=`echo $dest | cut -d: -f1`
destport=`echo $dest | cut -d: -f2`
ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
srcip=`echo $src | sed -e "s/:[^:]*$//"`
srcport=`echo $src | sed -e "s/^.*://"`
destip=`echo $dest | sed -e "s/:[^:]*$//"`
destport=`echo $dest | sed -e "s/^.*://"`
echo "Killing TCP connection $srcip:$srcport $destip:$destport"
ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1
case $destport in
# we only do one-way killtcp for NFS and CIFS
139|445|2049) : ;;
# for all others we do 2-way
*)
ctdb killtcp $destip:$destport $srcip:$srcport >/dev/null 2>&1 || _failed=1
;;
esac
_killcount=`expr $_killcount + 1`
done < $connfile
done < $connfile
/bin/rm -f $connfile
[ $_failed = 0 ] || {
echo "Failed to send killtcp control"
return;
@ -331,3 +355,4 @@ startstop_nfslock() {
[ -x $CTDB_BASE/rc.local ] && {
. $CTDB_BASE/rc.local
}

View File

@ -106,6 +106,8 @@ struct ctdb_call_info {
/* send a broadcast to all connected nodes */
#define CTDB_BROADCAST_CONNECTED 0xF0000004
/* the key used for transaction locking on persistent databases */
#define CTDB_TRANSACTION_LOCK_KEY "__transaction_lock__"
enum control_state {CTDB_CONTROL_WAIT, CTDB_CONTROL_DONE, CTDB_CONTROL_ERROR, CTDB_CONTROL_TIMEOUT};
@ -546,4 +548,24 @@ struct ctdb_client_control_state *ctdb_ctrl_getcapabilities_send(struct ctdb_con
int ctdb_ctrl_getcapabilities_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *capabilities);
struct ctdb_marshall_buffer *ctdb_marshall_add(TALLOC_CTX *mem_ctx,
struct ctdb_marshall_buffer *m,
uint64_t db_id,
uint32_t reqid,
TDB_DATA key,
struct ctdb_ltdb_header *header,
TDB_DATA data);
TDB_DATA ctdb_marshall_finish(struct ctdb_marshall_buffer *m);
struct ctdb_transaction_handle *ctdb_transaction_start(struct ctdb_db_context *ctdb_db,
TALLOC_CTX *mem_ctx);
int ctdb_transaction_fetch(struct ctdb_transaction_handle *h,
TALLOC_CTX *mem_ctx,
TDB_DATA key, TDB_DATA *data);
int ctdb_transaction_store(struct ctdb_transaction_handle *h,
TDB_DATA key, TDB_DATA data);
int ctdb_transaction_commit(struct ctdb_transaction_handle *h);
int ctdb_ctrl_recd_ping(struct ctdb_context *ctdb);
#endif

View File

@ -61,8 +61,8 @@ typedef union {
a tcp connection description
*/
struct ctdb_tcp_connection {
struct sockaddr_in saddr;
struct sockaddr_in daddr;
ctdb_sock_addr src_addr;
ctdb_sock_addr dst_addr;
};
/* the wire representation for a tcp tickle array */
@ -73,7 +73,7 @@ struct ctdb_tcp_wire_array {
/* the list of tcp tickles used by get/set tcp tickle list */
struct ctdb_control_tcp_tickle_list {
struct sockaddr_in ip;
ctdb_sock_addr addr;
struct ctdb_tcp_wire_array tickles;
};
@ -114,6 +114,7 @@ struct ctdb_tunable {
uint32_t reclock_ping_period;
uint32_t no_ip_failback;
uint32_t verbose_memory_names;
uint32_t recd_ping_timeout;
};
/*
@ -161,6 +162,7 @@ struct ctdb_client {
uint32_t client_id;
pid_t pid;
struct ctdb_tcp_list *tcp_list;
uint32_t num_persistent_updates;
};
@ -169,7 +171,7 @@ struct ctdb_vnn {
struct ctdb_vnn *prev, *next;
const char *iface;
struct sockaddr_in public_address;
ctdb_sock_addr public_address;
uint8_t public_netmask_bits;
/* the node number that is serving this public address, if any.
@ -416,6 +418,7 @@ struct ctdb_context {
int start_as_disabled;
uint32_t event_script_timeouts; /* counting how many consecutive times an eventscript has timedout */
TALLOC_CTX *eventscripts_ctx; /* a context to hold data for the RUN_EVENTSCRIPTS control */
TALLOC_CTX *recd_ping_ctx;
};
struct ctdb_db_context {
@ -429,7 +432,6 @@ struct ctdb_db_context {
struct ctdb_registered_call *calls; /* list of registered calls */
uint32_t seqnum;
struct timed_event *te;
uint32_t client_tdb_flags;
};
@ -536,7 +538,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0,
CTDB_CONTROL_START_RECOVERY = 70,
CTDB_CONTROL_END_RECOVERY = 71,
CTDB_CONTROL_RELOAD_NODES_FILE = 72,
CTDB_CONTROL_GET_RECLOCK_FILE = 73,
/* #73 removed */
CTDB_CONTROL_TRY_DELETE_RECORDS = 74,
CTDB_CONTROL_ENABLE_MONITOR = 75,
CTDB_CONTROL_DISABLE_MONITOR = 76,
@ -544,6 +546,13 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0,
CTDB_CONTROL_DEL_PUBLIC_IP = 78,
CTDB_CONTROL_RUN_EVENTSCRIPTS = 79,
CTDB_CONTROL_GET_CAPABILITIES = 80,
CTDB_CONTROL_START_PERSISTENT_UPDATE = 81,
CTDB_CONTROL_CANCEL_PERSISTENT_UPDATE= 82,
CTDB_CONTROL_TRANS2_COMMIT = 83,
CTDB_CONTROL_TRANS2_FINISHED = 84,
CTDB_CONTROL_TRANS2_ERROR = 85,
CTDB_CONTROL_TRANS2_COMMIT_RETRY = 86,
CTDB_CONTROL_RECD_PING = 87,
};
/*
@ -557,26 +566,27 @@ struct ctdb_control_set_call {
/*
struct for tcp_client control
used by samba can not modify
*/
struct ctdb_control_tcp {
struct sockaddr_in src;
struct sockaddr_in dest;
struct sockaddr_in src; // samba uses this
struct sockaddr_in dest;// samba uses this
};
/*
struct for kill_tcp control
*/
struct ctdb_control_killtcp {
struct sockaddr_in src;
struct sockaddr_in dst;
ctdb_sock_addr src_addr;
ctdb_sock_addr dst_addr;
};
/*
struct holding a sockaddr_in and an interface name,
struct holding a ctdb_sock_addr and an interface name,
used to add/remove public addresses
*/
struct ctdb_control_ip_iface {
struct sockaddr_in sin;
ctdb_sock_addr addr;
uint32_t mask;
uint32_t len;
char iface[1];
@ -597,8 +607,8 @@ struct ctdb_control_gratious_arp {
struct for tcp_add and tcp_remove controls
*/
struct ctdb_control_tcp_vnn {
struct sockaddr_in src;
struct sockaddr_in dest;
ctdb_sock_addr src;
ctdb_sock_addr dest;
};
/*
@ -784,17 +794,25 @@ struct ctdb_req_keepalive {
struct ctdb_req_header hdr;
};
/* types of failures possible from TRANS2_COMMIT */
enum ctdb_trans2_commit_error {
CTDB_TRANS2_COMMIT_SUCCESS=0, /* all nodes committed successfully */
CTDB_TRANS2_COMMIT_TIMEOUT=1, /* at least one node timed out */
CTDB_TRANS2_COMMIT_ALLFAIL=2, /* all nodes failed the commit */
CTDB_TRANS2_COMMIT_SOMEFAIL=3 /* some nodes failed the commit, some allowed it */
};
/* internal prototypes */
void ctdb_set_error(struct ctdb_context *ctdb, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3);
void ctdb_fatal(struct ctdb_context *ctdb, const char *msg);
bool ctdb_same_address(struct ctdb_address *a1, struct ctdb_address *a2);
bool parse_ip_mask(const char *s, struct sockaddr_in *ip, unsigned *mask);
int ctdb_parse_address(struct ctdb_context *ctdb,
TALLOC_CTX *mem_ctx, const char *str,
struct ctdb_address *address);
bool ctdb_same_ipv4(const struct sockaddr_in *ip1, const struct sockaddr_in *ip2);
bool ctdb_same_ip(ctdb_sock_addr *ip1, ctdb_sock_addr *ip2);
bool ctdb_same_sockaddr(const struct sockaddr_in *ip1, const struct sockaddr_in *ip2);
bool ctdb_same_ip(const ctdb_sock_addr *ip1, const ctdb_sock_addr *ip2);
bool ctdb_same_sockaddr(const ctdb_sock_addr *ip1, const ctdb_sock_addr *ip2);
uint32_t ctdb_hash(const TDB_DATA *key);
uint32_t ctdb_hash_string(const char *str);
void ctdb_request_call(struct ctdb_context *ctdb, struct ctdb_req_header *hdr);
@ -810,8 +828,12 @@ int ctdb_ltdb_fetch(struct ctdb_db_context *ctdb_db,
TALLOC_CTX *mem_ctx, TDB_DATA *data);
int ctdb_ltdb_store(struct ctdb_db_context *ctdb_db, TDB_DATA key,
struct ctdb_ltdb_header *header, TDB_DATA data);
int ctdb_ltdb_persistent_store(struct ctdb_db_context *ctdb_db, TDB_DATA key,
struct ctdb_ltdb_header *header, TDB_DATA data);
int32_t ctdb_control_start_persistent_update(struct ctdb_context *ctdb,
struct ctdb_req_control *c,
TDB_DATA recdata);
int32_t ctdb_control_cancel_persistent_update(struct ctdb_context *ctdb,
struct ctdb_req_control *c,
TDB_DATA recdata);
void ctdb_queue_packet(struct ctdb_context *ctdb, struct ctdb_req_header *hdr);
void ctdb_queue_packet_opcode(struct ctdb_context *ctdb, struct ctdb_req_header *hdr, unsigned opcode);
int ctdb_ltdb_lock_requeue(struct ctdb_db_context *ctdb_db,
@ -1019,8 +1041,8 @@ struct ctdb_control_pulldb {
uint32_t lmaster;
};
/* structure used for pulldb control */
struct ctdb_control_pulldb_reply {
/* structure used for sending lists of records */
struct ctdb_marshall_buffer {
uint32_t db_id;
uint32_t count;
uint8_t data[1];
@ -1065,8 +1087,7 @@ struct ctdb_control_list_tunable {
struct ctdb_node_and_flags {
uint32_t pnn;
uint32_t flags;
struct sockaddr_in sin;
ctdb_sock_addr addr;
};
struct ctdb_node_map {
@ -1089,7 +1110,7 @@ struct ctdb_client_call_state {
struct ctdb_call *call;
struct {
void (*fn)(struct ctdb_client_call_state *);
void *private;
void *private_data;
} async;
};
@ -1112,6 +1133,11 @@ int32_t ctdb_ltdb_set_seqnum_frequency(struct ctdb_context *ctdb, uint32_t frequ
struct ctdb_rec_data *ctdb_marshall_record(TALLOC_CTX *mem_ctx, uint32_t reqid,
TDB_DATA key, struct ctdb_ltdb_header *, TDB_DATA data);
struct ctdb_rec_data *ctdb_marshall_loop_next(struct ctdb_marshall_buffer *m, struct ctdb_rec_data *r,
uint32_t *reqid,
struct ctdb_ltdb_header *header,
TDB_DATA *key, TDB_DATA *data);
int32_t ctdb_control_pull_db(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata);
int32_t ctdb_control_push_db(struct ctdb_context *ctdb, TDB_DATA indata);
int32_t ctdb_control_set_dmaster(struct ctdb_context *ctdb, TDB_DATA indata);
@ -1166,7 +1192,7 @@ int32_t ctdb_control_end_recovery(struct ctdb_context *ctdb,
struct ctdb_public_ip {
uint32_t pnn;
struct sockaddr_in sin;
ctdb_sock_addr addr;
};
int ctdb_ctrl_takeover_ip(struct ctdb_context *ctdb, struct timeval timeout,
uint32_t destnode, struct ctdb_public_ip *ip);
@ -1185,7 +1211,7 @@ int ctdb_ctrl_get_public_ips(struct ctdb_context *ctdb,
/* from takeover/system.c */
int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface);
bool ctdb_sys_have_ip(struct sockaddr_in ip);
bool ctdb_sys_have_ip(ctdb_sock_addr *addr);
int ctdb_sys_send_tcp(const ctdb_sock_addr *dest,
const ctdb_sock_addr *src,
uint32_t seq, uint32_t ack, int rst);
@ -1226,7 +1252,6 @@ int32_t ctdb_control_get_tunable(struct ctdb_context *ctdb, TDB_DATA indata,
TDB_DATA *outdata);
int32_t ctdb_control_set_tunable(struct ctdb_context *ctdb, TDB_DATA indata);
int32_t ctdb_control_list_tunables(struct ctdb_context *ctdb, TDB_DATA *outdata);
int32_t ctdb_control_get_reclock_file(struct ctdb_context *ctdb, TDB_DATA *outdata);
int32_t ctdb_control_try_delete_records(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata);
int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata);
int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata);
@ -1242,13 +1267,14 @@ int ctdb_ctrl_get_all_tunables(struct ctdb_context *ctdb,
void ctdb_start_freeze(struct ctdb_context *ctdb);
bool parse_ip_port(const char *s, ctdb_sock_addr *saddr);
bool parse_ip(const char *s, ctdb_sock_addr *saddr);
bool parse_ip_mask(const char *s, ctdb_sock_addr *addr, unsigned *mask);
bool parse_ip_port(const char *s, ctdb_sock_addr *addr);
bool parse_ip(const char *s, ctdb_sock_addr *addr);
int ctdb_sys_open_capture_socket(const char *iface, void **private_data);
int ctdb_sys_close_capture_socket(void *private_data);
int ctdb_sys_read_tcp_packet(int s, void *private_data, struct sockaddr_in *src, struct sockaddr_in *dst,
uint32_t *ack_seq, uint32_t *seq);
int ctdb_sys_read_tcp_packet(int s, void *private_data, ctdb_sock_addr *src, ctdb_sock_addr *dst, uint32_t *ack_seq, uint32_t *seq);
int ctdb_ctrl_killtcp(struct ctdb_context *ctdb,
struct timeval timeout,
@ -1275,7 +1301,7 @@ int ctdb_ctrl_get_tcp_tickles(struct ctdb_context *ctdb,
struct timeval timeout,
uint32_t destnode,
TALLOC_CTX *mem_ctx,
struct sockaddr_in *ip,
ctdb_sock_addr *addr,
struct ctdb_control_tcp_tickle_list **list);
@ -1299,6 +1325,9 @@ int32_t ctdb_control_persistent_store(struct ctdb_context *ctdb,
int32_t ctdb_control_update_record(struct ctdb_context *ctdb,
struct ctdb_req_control *c, TDB_DATA recdata,
bool *async_reply);
int32_t ctdb_control_trans2_commit(struct ctdb_context *ctdb,
struct ctdb_req_control *c,
TDB_DATA recdata, bool *async_reply);
int32_t ctdb_control_transaction_start(struct ctdb_context *ctdb, uint32_t id);
int32_t ctdb_control_transaction_commit(struct ctdb_context *ctdb, uint32_t id);
@ -1344,4 +1373,14 @@ int ctdb_control_reload_nodes_file(struct ctdb_context *ctdb, uint32_t opcode);
int32_t ctdb_dump_memory(struct ctdb_context *ctdb, TDB_DATA *outdata);
int32_t ctdb_control_get_capabilities(struct ctdb_context *ctdb, TDB_DATA *outdata);
int32_t ctdb_control_trans2_finished(struct ctdb_context *ctdb,
struct ctdb_req_control *c);
int32_t ctdb_control_trans2_error(struct ctdb_context *ctdb,
struct ctdb_req_control *c);
char *ctdb_addr_to_str(ctdb_sock_addr *addr);
void ctdb_canonicalize_ip(const ctdb_sock_addr *ip, ctdb_sock_addr *cip);
int32_t ctdb_control_recd_ping(struct ctdb_context *ctdb);
#endif

View File

@ -46,15 +46,15 @@ struct sigcounter {
the poor design of signals means that this table must be static global
*/
static struct sig_state {
struct signal_event *sig_handlers[NUM_SIGNALS];
struct sigaction *oldact[NUM_SIGNALS];
struct sigcounter signal_count[NUM_SIGNALS];
struct signal_event *sig_handlers[NUM_SIGNALS+1];
struct sigaction *oldact[NUM_SIGNALS+1];
struct sigcounter signal_count[NUM_SIGNALS+1];
struct sigcounter got_signal;
int pipe_hack[2];
#ifdef SA_SIGINFO
/* with SA_SIGINFO we get quite a lot of info per signal */
siginfo_t *sig_info[NUM_SIGNALS];
struct sigcounter sig_blocked[NUM_SIGNALS];
siginfo_t *sig_info[NUM_SIGNALS+1];
struct sigcounter sig_blocked[NUM_SIGNALS+1];
#endif
} *sig_state;

View File

@ -5,7 +5,7 @@ Vendor: Samba Team
Packager: Samba Team <samba@samba.org>
Name: ctdb
Version: 1.0
Release: 46
Release: 58
Epoch: 0
License: GNU GPL version 3
Group: System Environment/Daemons
@ -78,7 +78,7 @@ exit 0
%postun
if [ "$1" -ge "1" ]; then
%{initdir}/ctdb restart >/dev/null 2>&1
%{initdir}/ctdb restart >/dev/null 2>&1 || true
fi
@ -118,7 +118,69 @@ fi
%{_includedir}/ctdb_private.h
%changelog
* Fri Jul 11 2008 : Version pre_1.0.47
* Wed Aug 27 2008 : Version 1.0.58
- revert the name change tcp_tcp_client back to tcp_control_tcp so
samba can build.
- Updates to the init script from Abhijith Das <adas@redhat.com>
* Mon Aug 25 2008 : Version 1.0.57
- initial support for IPv6
* Mon Aug 11 2008 : Version 1.0.56
- fix a memory leak in the recovery daemon.
* Mon Aug 11 2008 : Version 1.0.55
- Fix the releaseip message we seond to samba.
* Fri Aug 8 2008 : Version 1.0.54
- fix a looping error in the transaction code
- provide a more detailed error code for persistent store errors
so clients can make more intelligent choices on how to try to recover
* Thu Aug 7 2008 : Version 1.0.53
- Remove the reclock.pnn file it can cause gpfs to fail to umount
- New transaction code
* Mon Aug 4 2008 : Version 1.0.52
- Send an explicit gratious arp when starting sending the tcp tickles.
- When doing failover, issue a killtcp to non-NFS/non-CIFS clients
so that they fail quickly. NFS and CIFS already fail and recover
quickly.
- Update the test scripts to handle CTRL-C to kill off the test.
* Mon Jul 28 2008 : Version 1.0.51
- Strip off the vlan tag from bond devices before we check in /proc
if the interface is up or not.
- Use testparm in the background in the scripts to allow probing
that the shares do exist.
- Fix a bug in the logging code to handle multiline entries better
- Rename private elements from private to private_data
* Fri Jul 18 2008 : Version 1.0.50
- Dont assume that just because we can establish a TCP connection
that we are actually talking to a functioning ctdb daemon.
So dont mark the node as CONNECTED just because the tcp handshake
was successful.
- Dont try to set the recmaster to ourself during elections for those
cases we know this will fail. To remove some annoying benign but scary
looking entries from the log.
- Bugfix for eventsystem for signal handling that could cause a node to
hang.
* Thu Jul 17 2008 : Version 1.0.49
- Update the safe persistent update fix to work with unpatched samba
servers.
* Thu Jul 17 2008 : Version 1.0.48
- Update the spec file.
- Do not start new user-triggered eventscripts if we are already
inside recovery mode.
- Add two new controls to start/cancel a persistent update.
A client such as samba can use these to tell ctdbd that it will soon
be writing directly to the persistent database tdb file. So if
samba is -9ed before it has eitehr done the persistent_store or
canceled the operation, ctdb knows that the persistent databases
'may' be out of sync and therefore a full blown recovery is called for.
- Add two new options :
CTDB_SAMBA_SKIP_CONF_CHECK and CTDB_SAMBA_CHECK_PORTS that can be used
to override what checks to do when monitoring samba health.
We can no longer use the smbstatus, net or testparm commands to check
if samba or its config is healthy since these commands may block
indefinitely and thus can not be used in scripts.
* Fri Jul 11 2008 : Version 1.0.47
- Fix a double free bug where if a user striggered (ctdb eventscript)
hung and while the timeout handler was being processed a new user
triggered eventscript was started we would free state twice.
- Rewrite of onnode and associated documentation.
* Thu Jul 10 2008 : Version 1.0.46
- Document both the LVS:cingle-ip-address and the REMOTE-NODE:wan-accelerator

View File

@ -325,7 +325,7 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
return ctdb_control_kill_tcp(ctdb, indata);
case CTDB_CONTROL_GET_TCP_TICKLE_LIST:
CHECK_CONTROL_DATA_SIZE(sizeof(struct sockaddr_in));
CHECK_CONTROL_DATA_SIZE(sizeof(ctdb_sock_addr));
return ctdb_control_get_tcp_tickle_list(ctdb, indata, outdata);
case CTDB_CONTROL_SET_TCP_TICKLE_LIST:
@ -378,10 +378,6 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
case CTDB_CONTROL_END_RECOVERY:
return ctdb_control_end_recovery(ctdb, c, async_reply);
case CTDB_CONTROL_GET_RECLOCK_FILE:
CHECK_CONTROL_DATA_SIZE(0);
return ctdb_control_get_reclock_file(ctdb, outdata);
case CTDB_CONTROL_TRY_DELETE_RECORDS:
return ctdb_control_try_delete_records(ctdb, indata, outdata);
@ -394,6 +390,26 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
case CTDB_CONTROL_GET_CAPABILITIES:
return ctdb_control_get_capabilities(ctdb, outdata);
case CTDB_CONTROL_START_PERSISTENT_UPDATE:
return ctdb_control_start_persistent_update(ctdb, c, indata);
case CTDB_CONTROL_CANCEL_PERSISTENT_UPDATE:
return ctdb_control_cancel_persistent_update(ctdb, c, indata);
case CTDB_CONTROL_TRANS2_COMMIT:
case CTDB_CONTROL_TRANS2_COMMIT_RETRY:
return ctdb_control_trans2_commit(ctdb, c, indata, async_reply);
case CTDB_CONTROL_TRANS2_ERROR:
return ctdb_control_trans2_error(ctdb, c);
case CTDB_CONTROL_TRANS2_FINISHED:
return ctdb_control_trans2_finished(ctdb, c);
case CTDB_CONTROL_RECD_PING:
CHECK_CONTROL_DATA_SIZE(0);
return ctdb_control_recd_ping(ctdb);
default:
DEBUG(DEBUG_CRIT,(__location__ " Unknown CTDB control opcode %u\n", opcode));
return -1;

View File

@ -53,7 +53,7 @@ static void flag_change_handler(struct ctdb_context *ctdb, uint64_t srvid,
ctdb->nodes[c->pnn]->flags =
(ctdb->nodes[c->pnn]->flags&NODE_FLAGS_DISCONNECTED)
| (c->new_flags & ~NODE_FLAGS_DISCONNECTED);
DEBUG(DEBUG_INFO,("Node flags for node %u are now 0x%x\n", c->pnn, ctdb->nodes[c->pnn]->flags));
DEBUG(DEBUG_DEBUG,("Node flags for node %u are now 0x%x\n", c->pnn, ctdb->nodes[c->pnn]->flags));
/* make sure we don't hold any IPs when we shouldn't */
if (c->pnn == ctdb->pnn &&
@ -103,6 +103,9 @@ static void ctdb_start_transport(struct ctdb_context *ctdb)
/* start periodic update of tcp tickle lists */
ctdb_start_tcp_tickle_update(ctdb);
/* start listening for recovery daemon pings */
ctdb_control_recd_ping(ctdb);
}
static void block_signal(int signum)
@ -210,6 +213,12 @@ static int ctdb_client_destructor(struct ctdb_client *client)
ctdb_takeover_client_destructor_hook(client);
ctdb_reqid_remove(client->ctdb, client->client_id);
client->ctdb->statistics.num_clients--;
if (client->num_persistent_updates != 0) {
DEBUG(DEBUG_ERR,(__location__ " Client disconnecting with %u persistent updates in flight. Starting recovery\n", client->num_persistent_updates));
client->ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
}
return 0;
}
@ -529,7 +538,7 @@ static void ctdb_daemon_read_cb(uint8_t *data, size_t cnt, void *args)
static void ctdb_accept_client(struct event_context *ev, struct fd_event *fde,
uint16_t flags, void *private_data)
{
struct sockaddr_in addr;
struct sockaddr_un addr;
socklen_t len;
int fd;
struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);

View File

@ -138,38 +138,39 @@ static void ctdb_log_handler(struct event_context *ev, struct fd_event *fde,
uint16_t flags, void *private)
{
struct ctdb_context *ctdb = talloc_get_type(private, struct ctdb_context);
int n1, n2;
char *p;
int n;
if (!(flags & EVENT_FD_READ)) {
return;
}
n1 = read(ctdb->log->pfd, &ctdb->log->buf[ctdb->log->buf_used],
n = read(ctdb->log->pfd, &ctdb->log->buf[ctdb->log->buf_used],
sizeof(ctdb->log->buf) - ctdb->log->buf_used);
if (n1 > 0) {
ctdb->log->buf_used += n1;
if (n > 0) {
ctdb->log->buf_used += n;
}
p = memchr(ctdb->log->buf, '\n', ctdb->log->buf_used);
if (!p) {
if (ctdb->log->buf_used == sizeof(ctdb->log->buf)) {
do_debug("%*.*s\n",
(int)ctdb->log->buf_used, (int)ctdb->log->buf_used, ctdb->log->buf);
ctdb->log->buf_used = 0;
while (ctdb->log->buf_used > 0 &&
(p = memchr(ctdb->log->buf, '\n', ctdb->log->buf_used)) != NULL) {
int n1 = (p - ctdb->log->buf)+1;
int n2 = n1 - 1;
/* swallow \r from child processes */
if (n2 > 0 && ctdb->log->buf[n2-1] == '\r') {
n2--;
}
return;
do_debug("%*.*s\n", n2, n2, ctdb->log->buf);
memmove(ctdb->log->buf, p+1, sizeof(ctdb->log->buf) - n1);
ctdb->log->buf_used -= n1;
}
n1 = (p - ctdb->log->buf)+1;
n2 = n1 - 1;
/* swallow \r from child processes */
if (n2 > 0 && ctdb->log->buf[n2-1] == '\r') {
n2--;
/* the buffer could have completely filled - unfortunately we have
no choice but to dump it out straight away */
if (ctdb->log->buf_used == sizeof(ctdb->log->buf)) {
do_debug("%*.*s\n",
(int)ctdb->log->buf_used, (int)ctdb->log->buf_used, ctdb->log->buf);
ctdb->log->buf_used = 0;
}
do_debug("%*.*s\n", n2, n2, ctdb->log->buf);
memmove(ctdb->log->buf, p+1, sizeof(ctdb->log->buf) - n1);
ctdb->log->buf_used -= n1;
}

View File

@ -330,7 +330,7 @@ int32_t ctdb_control_db_attach(struct ctdb_context *ctdb, TDB_DATA indata,
if (db) {
outdata->dptr = (uint8_t *)&db->db_id;
outdata->dsize = sizeof(db->db_id);
db->client_tdb_flags |= tdb_flags;
tdb_add_flags(db->ltdb->tdb, tdb_flags);
return 0;
}
@ -345,7 +345,7 @@ int32_t ctdb_control_db_attach(struct ctdb_context *ctdb, TDB_DATA indata,
}
/* remember the flags the client has specified */
db->client_tdb_flags = tdb_flags;
tdb_add_flags(db->ltdb->tdb, tdb_flags);
outdata->dptr = (uint8_t *)&db->db_id;
outdata->dsize = sizeof(db->db_id);

View File

@ -32,8 +32,16 @@ struct ctdb_persistent_state {
const char *errormsg;
uint32_t num_pending;
int32_t status;
uint32_t num_failed, num_sent;
};
/*
1) all nodes fail, and all nodes reply
2) some nodes fail, all nodes reply
3) some nodes timeout
4) all nodes succeed
*/
/*
called when a node has acknowledged a ctdb_control_update_record call
*/
@ -50,10 +58,19 @@ static void ctdb_persistent_callback(struct ctdb_context *ctdb,
status, errormsg));
state->status = status;
state->errormsg = errormsg;
state->num_failed++;
}
state->num_pending--;
if (state->num_pending == 0) {
ctdb_request_control_reply(state->ctdb, state->c, NULL, state->status, state->errormsg);
enum ctdb_trans2_commit_error etype;
if (state->num_failed == state->num_sent) {
etype = CTDB_TRANS2_COMMIT_ALLFAIL;
} else if (state->num_failed != 0) {
etype = CTDB_TRANS2_COMMIT_SOMEFAIL;
} else {
etype = CTDB_TRANS2_COMMIT_SUCCESS;
}
ctdb_request_control_reply(state->ctdb, state->c, NULL, etype, state->errormsg);
talloc_free(state);
}
}
@ -66,24 +83,58 @@ static void ctdb_persistent_store_timeout(struct event_context *ev, struct timed
{
struct ctdb_persistent_state *state = talloc_get_type(private_data, struct ctdb_persistent_state);
ctdb_request_control_reply(state->ctdb, state->c, NULL, -1, "timeout in ctdb_persistent_state");
ctdb_request_control_reply(state->ctdb, state->c, NULL, CTDB_TRANS2_COMMIT_TIMEOUT,
"timeout in ctdb_persistent_state");
talloc_free(state);
}
/*
store a persistent record - called from a ctdb client when it has updated
a record in a persistent database. The client will have the record
store a set of persistent records - called from a ctdb client when it has updated
some records in a persistent database. The client will have the record
locked for the duration of this call. The client is the dmaster when
this call is made
*/
int32_t ctdb_control_persistent_store(struct ctdb_context *ctdb,
struct ctdb_req_control *c,
TDB_DATA recdata, bool *async_reply)
int32_t ctdb_control_trans2_commit(struct ctdb_context *ctdb,
struct ctdb_req_control *c,
TDB_DATA recdata, bool *async_reply)
{
struct ctdb_client *client = ctdb_reqid_find(ctdb, c->client_id, struct ctdb_client);
struct ctdb_persistent_state *state;
int i;
if (client == NULL) {
DEBUG(DEBUG_ERR,(__location__ " can not match persistent_store to a client. Returning error\n"));
return -1;
}
/* handling num_persistent_updates is a bit strange -
there are 3 cases
1) very old clients, which never called CTDB_CONTROL_START_PERSISTENT_UPDATE
They don't expect num_persistent_updates to be used at all
2) less old clients, which uses CTDB_CONTROL_START_PERSISTENT_UPDATE, and expected
this commit to then decrement it
3) new clients which use TRANS2 commit functions, and
expect this function to increment the counter, and
then have it decremented in ctdb_control_trans2_error
or ctdb_control_trans2_finished
*/
switch (c->opcode) {
case CTDB_CONTROL_PERSISTENT_STORE:
if (client->num_persistent_updates > 0) {
client->num_persistent_updates--;
}
break;
case CTDB_CONTROL_TRANS2_COMMIT:
client->num_persistent_updates++;
break;
case CTDB_CONTROL_TRANS2_COMMIT_RETRY:
/* already updated from the first commit */
break;
}
state = talloc_zero(ctdb, struct ctdb_persistent_state);
CTDB_NO_MEMORY(ctdb, state);
@ -114,6 +165,7 @@ int32_t ctdb_control_persistent_store(struct ctdb_context *ctdb,
}
state->num_pending++;
state->num_sent++;
}
if (state->num_pending == 0) {
@ -138,10 +190,7 @@ int32_t ctdb_control_persistent_store(struct ctdb_context *ctdb,
struct ctdb_persistent_write_state {
struct ctdb_db_context *ctdb_db;
TDB_DATA key;
TDB_DATA data;
struct ctdb_ltdb_header *header;
struct tdb_context *tdb;
struct ctdb_marshall_buffer *m;
struct ctdb_req_control *c;
};
@ -151,32 +200,73 @@ struct ctdb_persistent_write_state {
*/
static int ctdb_persistent_store(struct ctdb_persistent_write_state *state)
{
struct ctdb_ltdb_header oldheader;
int ret;
int ret, i;
struct ctdb_rec_data *rec = NULL;
struct ctdb_marshall_buffer *m = state->m;
/* fetch the old header and ensure the rsn is less than the new rsn */
ret = ctdb_ltdb_fetch(state->ctdb_db, state->key, &oldheader, NULL, NULL);
if (ret != 0) {
DEBUG(DEBUG_ERR,("Failed to fetch old record for db_id 0x%08x in ctdb_persistent_store\n",
state->ctdb_db->db_id));
ret = tdb_transaction_start(state->ctdb_db->ltdb->tdb);
if (ret == -1) {
DEBUG(DEBUG_ERR,("Failed to start transaction for db_id 0x%08x in ctdb_persistent_store\n",
state->ctdb_db->db_id));
return -1;
}
if (oldheader.rsn >= state->header->rsn) {
DEBUG(DEBUG_CRIT,("existing header for db_id 0x%08x has larger RSN %llu than new RSN %llu in ctdb_persistent_store\n",
state->ctdb_db->db_id,
(unsigned long long)oldheader.rsn, (unsigned long long)state->header->rsn));
return -1;
for (i=0;i<m->count;i++) {
struct ctdb_ltdb_header oldheader;
struct ctdb_ltdb_header header;
TDB_DATA key, data, olddata;
TALLOC_CTX *tmp_ctx = talloc_new(state);
rec = ctdb_marshall_loop_next(m, rec, NULL, &header, &key, &data);
if (rec == NULL) {
DEBUG(DEBUG_ERR,("Failed to get next record %d for db_id 0x%08x in ctdb_persistent_store\n",
i, state->ctdb_db->db_id));
talloc_free(tmp_ctx);
goto failed;
}
/* fetch the old header and ensure the rsn is less than the new rsn */
ret = ctdb_ltdb_fetch(state->ctdb_db, key, &oldheader, tmp_ctx, &olddata);
if (ret != 0) {
DEBUG(DEBUG_ERR,("Failed to fetch old record for db_id 0x%08x in ctdb_persistent_store\n",
state->ctdb_db->db_id));
talloc_free(tmp_ctx);
goto failed;
}
if (oldheader.rsn >= header.rsn &&
(olddata.dsize != data.dsize ||
memcmp(olddata.dptr, data.dptr, data.dsize) != 0)) {
DEBUG(DEBUG_CRIT,("existing header for db_id 0x%08x has larger RSN %llu than new RSN %llu in ctdb_persistent_store\n",
state->ctdb_db->db_id,
(unsigned long long)oldheader.rsn, (unsigned long long)header.rsn));
talloc_free(tmp_ctx);
goto failed;
}
talloc_free(tmp_ctx);
ret = ctdb_ltdb_store(state->ctdb_db, key, &header, data);
if (ret != 0) {
DEBUG(DEBUG_CRIT,("Failed to store record for db_id 0x%08x in ctdb_persistent_store\n",
state->ctdb_db->db_id));
return -1;
}
}
ret = ctdb_ltdb_persistent_store(state->ctdb_db, state->key, state->header, state->data);
if (ret != 0) {
DEBUG(DEBUG_CRIT,("Failed to store record for db_id 0x%08x in ctdb_persistent_store\n",
state->ctdb_db->db_id));
ret = tdb_transaction_commit(state->ctdb_db->ltdb->tdb);
if (ret == -1) {
DEBUG(DEBUG_ERR,("Failed to commit transaction for db_id 0x%08x in ctdb_persistent_store\n",
state->ctdb_db->db_id));
return -1;
}
return 0;
failed:
tdb_transaction_cancel(state->ctdb_db->ltdb->tdb);
return -1;
}
@ -348,20 +438,19 @@ int32_t ctdb_control_update_record(struct ctdb_context *ctdb,
struct ctdb_req_control *c, TDB_DATA recdata,
bool *async_reply)
{
struct ctdb_rec_data *rec = (struct ctdb_rec_data *)&recdata.dptr[0];
struct ctdb_db_context *ctdb_db;
uint32_t db_id = rec->reqid;
struct ctdb_persistent_write_state *state;
struct childwrite_handle *handle;
struct ctdb_marshall_buffer *m = (struct ctdb_marshall_buffer *)recdata.dptr;
if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
DEBUG(DEBUG_DEBUG,("rejecting ctdb_control_update_record when recovery active\n"));
DEBUG(DEBUG_INFO,("rejecting ctdb_control_update_record when recovery active\n"));
return -1;
}
ctdb_db = find_ctdb_db(ctdb, db_id);
ctdb_db = find_ctdb_db(ctdb, m->db_id);
if (ctdb_db == NULL) {
DEBUG(DEBUG_ERR,("Unknown database 0x%08x in ctdb_control_update_record\n", db_id));
DEBUG(DEBUG_ERR,("Unknown database 0x%08x in ctdb_control_update_record\n", m->db_id));
return -1;
}
@ -370,23 +459,7 @@ int32_t ctdb_control_update_record(struct ctdb_context *ctdb,
state->ctdb_db = ctdb_db;
state->c = c;
state->tdb = ctdb_db->ltdb->tdb;
state->key.dptr = &rec->data[0];
state->key.dsize = rec->keylen;
state->data.dptr = &rec->data[rec->keylen];
state->data.dsize = rec->datalen;
if (state->data.dsize < sizeof(struct ctdb_ltdb_header)) {
DEBUG(DEBUG_CRIT,("Invalid data size %u in ctdb_control_update_record\n",
(unsigned)state->data.dsize));
talloc_free(state);
return -1;
}
state->header = (struct ctdb_ltdb_header *)&state->data.dptr[0];
state->data.dptr += sizeof(struct ctdb_ltdb_header);
state->data.dsize -= sizeof(struct ctdb_ltdb_header);
state->m = m;
/* create a child process to take out a transaction and
write the data.
@ -410,3 +483,127 @@ int32_t ctdb_control_update_record(struct ctdb_context *ctdb,
return 0;
}
/*
called when a client has finished a local commit in a transaction to
a persistent database
*/
int32_t ctdb_control_trans2_finished(struct ctdb_context *ctdb,
struct ctdb_req_control *c)
{
struct ctdb_client *client = ctdb_reqid_find(ctdb, c->client_id, struct ctdb_client);
if (client->num_persistent_updates == 0) {
DEBUG(DEBUG_ERR, (__location__ " ERROR: num_persistent_updates == 0\n"));
DEBUG(DEBUG_ERR,(__location__ " Forcing recovery\n"));
client->ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
return -1;
}
client->num_persistent_updates--;
return 0;
}
/*
called when a client gets an error committing its database
during a transaction commit
*/
int32_t ctdb_control_trans2_error(struct ctdb_context *ctdb,
struct ctdb_req_control *c)
{
struct ctdb_client *client = ctdb_reqid_find(ctdb, c->client_id, struct ctdb_client);
if (client->num_persistent_updates == 0) {
DEBUG(DEBUG_ERR, (__location__ " ERROR: num_persistent_updates == 0\n"));
} else {
client->num_persistent_updates--;
}
DEBUG(DEBUG_ERR,(__location__ " Forcing recovery\n"));
client->ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
return 0;
}
/*
backwards compatibility:
start a persistent store operation. passing both the key, header and
data to the daemon. If the client disconnects before it has issued
a persistent_update call to the daemon we trigger a full recovery
to ensure the databases are brought back in sync.
for now we ignore the recdata that the client has passed to us.
*/
int32_t ctdb_control_start_persistent_update(struct ctdb_context *ctdb,
struct ctdb_req_control *c,
TDB_DATA recdata)
{
struct ctdb_client *client = ctdb_reqid_find(ctdb, c->client_id, struct ctdb_client);
if (client == NULL) {
DEBUG(DEBUG_ERR,(__location__ " can not match start_persistent_update to a client. Returning error\n"));
return -1;
}
client->num_persistent_updates++;
return 0;
}
/*
backwards compatibility:
called to tell ctdbd that it is no longer doing a persistent update
*/
int32_t ctdb_control_cancel_persistent_update(struct ctdb_context *ctdb,
struct ctdb_req_control *c,
TDB_DATA recdata)
{
struct ctdb_client *client = ctdb_reqid_find(ctdb, c->client_id, struct ctdb_client);
if (client == NULL) {
DEBUG(DEBUG_ERR,(__location__ " can not match cancel_persistent_update to a client. Returning error\n"));
return -1;
}
if (client->num_persistent_updates > 0) {
client->num_persistent_updates--;
}
return 0;
}
/*
backwards compatibility:
single record varient of ctdb_control_trans2_commit for older clients
*/
int32_t ctdb_control_persistent_store(struct ctdb_context *ctdb,
struct ctdb_req_control *c,
TDB_DATA recdata, bool *async_reply)
{
struct ctdb_marshall_buffer *m;
struct ctdb_rec_data *rec = (struct ctdb_rec_data *)recdata.dptr;
TDB_DATA key, data;
if (recdata.dsize != offsetof(struct ctdb_rec_data, data) +
rec->keylen + rec->datalen) {
DEBUG(DEBUG_ERR, (__location__ " Bad data size in recdata\n"));
return -1;
}
key.dptr = &rec->data[0];
key.dsize = rec->keylen;
data.dptr = &rec->data[rec->keylen];
data.dsize = rec->datalen;
m = ctdb_marshall_add(c, NULL, rec->reqid, rec->reqid, key, NULL, data);
CTDB_NO_MEMORY(ctdb, m);
return ctdb_control_trans2_commit(ctdb, c, ctdb_marshall_finish(m), async_reply);
}

View File

@ -163,7 +163,10 @@ ctdb_control_getnodemap(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA ind
node_map = (struct ctdb_node_map *)outdata->dptr;
node_map->num = num_nodes;
for (i=0; i<num_nodes; i++) {
inet_aton(ctdb->nodes[i]->address.address, &node_map->nodes[i].sin.sin_addr);
if (parse_ip(ctdb->nodes[i]->address.address, &node_map->nodes[i].addr) == 0) {
DEBUG(DEBUG_ERR, (__location__ " Failed to parse %s into a sockaddr\n", ctdb->nodes[i]->address.address));
}
node_map->nodes[i].pnn = ctdb->nodes[i]->pnn;
node_map->nodes[i].flags = ctdb->nodes[i]->flags;
}
@ -219,7 +222,7 @@ ctdb_control_reload_nodes_file(struct ctdb_context *ctdb, uint32_t opcode)
*/
struct pulldb_data {
struct ctdb_context *ctdb;
struct ctdb_control_pulldb_reply *pulldata;
struct ctdb_marshall_buffer *pulldata;
uint32_t len;
bool failed;
};
@ -258,7 +261,7 @@ int32_t ctdb_control_pull_db(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DAT
struct ctdb_control_pulldb *pull;
struct ctdb_db_context *ctdb_db;
struct pulldb_data params;
struct ctdb_control_pulldb_reply *reply;
struct ctdb_marshall_buffer *reply;
if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
DEBUG(DEBUG_DEBUG,("rejecting ctdb_control_pull_db when not frozen\n"));
@ -273,14 +276,14 @@ int32_t ctdb_control_pull_db(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DAT
return -1;
}
reply = talloc_zero(outdata, struct ctdb_control_pulldb_reply);
reply = talloc_zero(outdata, struct ctdb_marshall_buffer);
CTDB_NO_MEMORY(ctdb, reply);
reply->db_id = pull->db_id;
params.ctdb = ctdb;
params.pulldata = reply;
params.len = offsetof(struct ctdb_control_pulldb_reply, data);
params.len = offsetof(struct ctdb_marshall_buffer, data);
params.failed = false;
if (ctdb_lock_all_databases_mark(ctdb) != 0) {
@ -308,7 +311,7 @@ int32_t ctdb_control_pull_db(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DAT
*/
int32_t ctdb_control_push_db(struct ctdb_context *ctdb, TDB_DATA indata)
{
struct ctdb_control_pulldb_reply *reply = (struct ctdb_control_pulldb_reply *)indata.dptr;
struct ctdb_marshall_buffer *reply = (struct ctdb_marshall_buffer *)indata.dptr;
struct ctdb_db_context *ctdb_db;
int i, ret;
struct ctdb_rec_data *rec;
@ -318,7 +321,7 @@ int32_t ctdb_control_push_db(struct ctdb_context *ctdb, TDB_DATA indata)
return -1;
}
if (indata.dsize < offsetof(struct ctdb_control_pulldb_reply, data)) {
if (indata.dsize < offsetof(struct ctdb_marshall_buffer, data)) {
DEBUG(DEBUG_ERR,(__location__ " invalid data in pulldb reply\n"));
return -1;
}
@ -865,35 +868,19 @@ int32_t ctdb_control_start_recovery(struct ctdb_context *ctdb,
return 0;
}
/*
report the location for the reclock file
*/
int32_t ctdb_control_get_reclock_file(struct ctdb_context *ctdb, TDB_DATA *outdata)
{
char *reclock = NULL;
reclock = talloc_strdup(outdata, ctdb->recovery_lock_file);
CTDB_NO_MEMORY(ctdb, reclock);
outdata->dsize = strlen(reclock)+1;
outdata->dptr = (uint8_t *)reclock;
return 0;
}
/*
try to delete all these records as part of the vacuuming process
and return the records we failed to delete
*/
int32_t ctdb_control_try_delete_records(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
{
struct ctdb_control_pulldb_reply *reply = (struct ctdb_control_pulldb_reply *)indata.dptr;
struct ctdb_marshall_buffer *reply = (struct ctdb_marshall_buffer *)indata.dptr;
struct ctdb_db_context *ctdb_db;
int i;
struct ctdb_rec_data *rec;
struct ctdb_control_pulldb_reply *records;
struct ctdb_marshall_buffer *records;
if (indata.dsize < offsetof(struct ctdb_control_pulldb_reply, data)) {
if (indata.dsize < offsetof(struct ctdb_marshall_buffer, data)) {
DEBUG(DEBUG_ERR,(__location__ " invalid data in try_delete_records\n"));
return -1;
}
@ -910,9 +897,9 @@ int32_t ctdb_control_try_delete_records(struct ctdb_context *ctdb, TDB_DATA inda
/* create a blob to send back the records we couldnt delete */
records = (struct ctdb_control_pulldb_reply *)
records = (struct ctdb_marshall_buffer *)
talloc_zero_size(outdata,
offsetof(struct ctdb_control_pulldb_reply, data));
offsetof(struct ctdb_marshall_buffer, data));
if (records == NULL) {
DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
return -1;
@ -984,3 +971,41 @@ int32_t ctdb_control_get_capabilities(struct ctdb_context *ctdb, TDB_DATA *outda
return 0;
}
static void ctdb_recd_ping_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p)
{
struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context);
DEBUG(DEBUG_ERR, (__location__ " Recovery daemon ping timeout. Shutting down ctdb daemon\n"));
ctdb_stop_recoverd(ctdb);
ctdb_stop_keepalive(ctdb);
ctdb_stop_monitoring(ctdb);
ctdb_release_all_ips(ctdb);
if (ctdb->methods != NULL) {
ctdb->methods->shutdown(ctdb);
}
ctdb_event_script(ctdb, "shutdown");
DEBUG(DEBUG_ERR, (__location__ " Recovery daemon ping timeout. Daemon has been shut down.\n"));
exit(0);
}
/* The recovery daemon will ping us at regular intervals.
If we havent been pinged for a while we assume the recovery
daemon is inoperable and we shut down.
*/
int32_t ctdb_control_recd_ping(struct ctdb_context *ctdb)
{
talloc_free(ctdb->recd_ping_ctx);
ctdb->recd_ping_ctx = talloc_new(ctdb);
CTDB_NO_MEMORY(ctdb, ctdb->recd_ping_ctx);
if (ctdb->tunable.recd_ping_timeout != 0) {
event_add_timed(ctdb->ev, ctdb->recd_ping_ctx,
timeval_current_ofs(ctdb->tunable.recd_ping_timeout, 0),
ctdb_recd_ping_timeout, ctdb);
}
return 0;
}

View File

@ -41,7 +41,6 @@ struct ban_state {
*/
struct ctdb_recoverd {
struct ctdb_context *ctdb;
int rec_file_fd;
uint32_t recmaster;
uint32_t num_active;
uint32_t num_connected;
@ -533,7 +532,7 @@ static int pull_one_remote_database(struct ctdb_context *ctdb, uint32_t srcnode,
{
int ret;
TDB_DATA outdata;
struct ctdb_control_pulldb_reply *reply;
struct ctdb_marshall_buffer *reply;
struct ctdb_rec_data *rec;
int i;
TALLOC_CTX *tmp_ctx = talloc_new(recdb);
@ -546,9 +545,9 @@ static int pull_one_remote_database(struct ctdb_context *ctdb, uint32_t srcnode,
return -1;
}
reply = (struct ctdb_control_pulldb_reply *)outdata.dptr;
reply = (struct ctdb_marshall_buffer *)outdata.dptr;
if (outdata.dsize < offsetof(struct ctdb_control_pulldb_reply, data)) {
if (outdata.dsize < offsetof(struct ctdb_marshall_buffer, data)) {
DEBUG(DEBUG_ERR,(__location__ " invalid data in pulldb reply\n"));
talloc_free(tmp_ctx);
return -1;
@ -764,7 +763,7 @@ struct vacuum_info {
struct ctdb_recoverd *rec;
uint32_t srcnode;
struct ctdb_db_context *ctdb_db;
struct ctdb_control_pulldb_reply *recs;
struct ctdb_marshall_buffer *recs;
struct ctdb_rec_data *r;
};
@ -775,7 +774,7 @@ static void vacuum_fetch_next(struct vacuum_info *v);
*/
static void vacuum_fetch_callback(struct ctdb_client_call_state *state)
{
struct vacuum_info *v = talloc_get_type(state->async.private, struct vacuum_info);
struct vacuum_info *v = talloc_get_type(state->async.private_data, struct vacuum_info);
talloc_free(state);
vacuum_fetch_next(v);
}
@ -841,7 +840,7 @@ static void vacuum_fetch_next(struct vacuum_info *v)
return;
}
state->async.fn = vacuum_fetch_callback;
state->async.private = v;
state->async.private_data = v;
return;
}
@ -866,7 +865,7 @@ static void vacuum_fetch_handler(struct ctdb_context *ctdb, uint64_t srvid,
TDB_DATA data, void *private_data)
{
struct ctdb_recoverd *rec = talloc_get_type(private_data, struct ctdb_recoverd);
struct ctdb_control_pulldb_reply *recs;
struct ctdb_marshall_buffer *recs;
int ret, i;
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
const char *name;
@ -877,7 +876,7 @@ static void vacuum_fetch_handler(struct ctdb_context *ctdb, uint64_t srvid,
uint32_t srcnode;
struct vacuum_info *v;
recs = (struct ctdb_control_pulldb_reply *)data.dptr;
recs = (struct ctdb_marshall_buffer *)data.dptr;
r = (struct ctdb_rec_data *)&recs->data[0];
if (recs->count == 0) {
@ -1136,7 +1135,7 @@ static struct tdb_wrap *create_recdb(struct ctdb_context *ctdb, TALLOC_CTX *mem_
*/
struct recdb_data {
struct ctdb_context *ctdb;
struct ctdb_control_pulldb_reply *recdata;
struct ctdb_marshall_buffer *recdata;
uint32_t len;
bool failed;
};
@ -1184,7 +1183,7 @@ static int push_recdb_database(struct ctdb_context *ctdb, uint32_t dbid,
struct tdb_wrap *recdb, struct ctdb_node_map *nodemap)
{
struct recdb_data params;
struct ctdb_control_pulldb_reply *recdata;
struct ctdb_marshall_buffer *recdata;
TDB_DATA outdata;
TALLOC_CTX *tmp_ctx;
uint32_t *nodes;
@ -1192,14 +1191,14 @@ static int push_recdb_database(struct ctdb_context *ctdb, uint32_t dbid,
tmp_ctx = talloc_new(ctdb);
CTDB_NO_MEMORY(ctdb, tmp_ctx);
recdata = talloc_zero(recdb, struct ctdb_control_pulldb_reply);
recdata = talloc_zero(recdb, struct ctdb_marshall_buffer);
CTDB_NO_MEMORY(ctdb, recdata);
recdata->db_id = dbid;
params.ctdb = ctdb;
params.recdata = recdata;
params.len = offsetof(struct ctdb_control_pulldb_reply, data);
params.len = offsetof(struct ctdb_marshall_buffer, data);
params.failed = false;
if (tdb_traverse_read(recdb->tdb, traverse_recdb, &params) == -1) {
@ -1654,7 +1653,7 @@ static bool ctdb_election_win(struct ctdb_recoverd *rec, struct election_message
/*
send out an election request
*/
static int send_election_request(struct ctdb_recoverd *rec, uint32_t pnn)
static int send_election_request(struct ctdb_recoverd *rec, uint32_t pnn, bool update_recmaster)
{
int ret;
TDB_DATA election_data;
@ -1670,19 +1669,26 @@ static int send_election_request(struct ctdb_recoverd *rec, uint32_t pnn)
election_data.dptr = (unsigned char *)&emsg;
/* first we assume we will win the election and set
recoverymaster to be ourself on the current node
*/
ret = ctdb_ctrl_setrecmaster(ctdb, CONTROL_TIMEOUT(), pnn, pnn);
if (ret != 0) {
DEBUG(DEBUG_ERR, (__location__ " failed to send recmaster election request\n"));
return -1;
}
/* send an election message to all active nodes */
ctdb_send_message(ctdb, CTDB_BROADCAST_ALL, srvid, election_data);
/* A new node that is already frozen has entered the cluster.
The existing nodes are not frozen and dont need to be frozen
until the election has ended and we start the actual recovery
*/
if (update_recmaster == true) {
/* first we assume we will win the election and set
recoverymaster to be ourself on the current node
*/
ret = ctdb_ctrl_setrecmaster(ctdb, CONTROL_TIMEOUT(), pnn, pnn);
if (ret != 0) {
DEBUG(DEBUG_ERR, (__location__ " failed to send recmaster election request\n"));
return -1;
}
}
return 0;
}
@ -1720,7 +1726,7 @@ static void election_send_request(struct event_context *ev, struct timed_event *
struct ctdb_recoverd *rec = talloc_get_type(p, struct ctdb_recoverd);
int ret;
ret = send_election_request(rec, ctdb_get_pnn(rec->ctdb));
ret = send_election_request(rec, ctdb_get_pnn(rec->ctdb), false);
if (ret != 0) {
DEBUG(DEBUG_ERR,("Failed to send election request!\n"));
}
@ -1856,7 +1862,7 @@ static void force_election(struct ctdb_recoverd *rec, uint32_t pnn,
timeval_current_ofs(ctdb->tunable.election_timeout, 0),
ctdb_election_timeout, rec);
ret = send_election_request(rec, pnn);
ret = send_election_request(rec, pnn, true);
if (ret!=0) {
DEBUG(DEBUG_ERR, (__location__ " failed to initiate recmaster election"));
return;
@ -2136,148 +2142,6 @@ static enum monitor_result verify_recmaster(struct ctdb_recoverd *rec, struct ct
return status;
}
/*
this function writes the number of connected nodes we have for this pnn
to the pnn slot in the reclock file
*/
static void
ctdb_recoverd_write_pnn_connect_count(struct ctdb_recoverd *rec)
{
const char count = rec->num_connected;
struct ctdb_context *ctdb = talloc_get_type(rec->ctdb, struct ctdb_context);
if (rec->rec_file_fd == -1) {
DEBUG(DEBUG_CRIT,(__location__ " Unable to write pnn count. pnnfile is not open.\n"));
return;
}
if (pwrite(rec->rec_file_fd, &count, 1, ctdb->pnn) == -1) {
DEBUG(DEBUG_CRIT, (__location__ " Failed to write pnn count\n"));
close(rec->rec_file_fd);
rec->rec_file_fd = -1;
}
}
/*
this function opens the reclock file and sets a byterage lock for the single
byte at position pnn+1.
the existence/non-existence of such a lock provides an alternative mechanism
to know whether a remote node(recovery daemon) is running or not.
*/
static void
ctdb_recoverd_get_pnn_lock(struct ctdb_recoverd *rec)
{
struct ctdb_context *ctdb = talloc_get_type(rec->ctdb, struct ctdb_context);
struct flock lock;
char *pnnfile = NULL;
DEBUG(DEBUG_INFO, ("Setting PNN lock for pnn:%d\n", ctdb->pnn));
if (rec->rec_file_fd != -1) {
close(rec->rec_file_fd);
rec->rec_file_fd = -1;
}
pnnfile = talloc_asprintf(rec, "%s.pnn", ctdb->recovery_lock_file);
CTDB_NO_MEMORY_FATAL(ctdb, pnnfile);
rec->rec_file_fd = open(pnnfile, O_RDWR|O_CREAT, 0600);
if (rec->rec_file_fd == -1) {
DEBUG(DEBUG_CRIT,(__location__ " Unable to open %s - (%s)\n",
pnnfile, strerror(errno)));
talloc_free(pnnfile);
return;
}
set_close_on_exec(rec->rec_file_fd);
lock.l_type = F_WRLCK;
lock.l_whence = SEEK_SET;
lock.l_start = ctdb->pnn;
lock.l_len = 1;
lock.l_pid = 0;
if (fcntl(rec->rec_file_fd, F_SETLK, &lock) != 0) {
close(rec->rec_file_fd);
rec->rec_file_fd = -1;
DEBUG(DEBUG_CRIT,(__location__ " Failed to get pnn lock on '%s'\n", pnnfile));
talloc_free(pnnfile);
return;
}
DEBUG(DEBUG_NOTICE,(__location__ " Got pnn lock on '%s'\n", pnnfile));
talloc_free(pnnfile);
/* we start out with 0 connected nodes */
ctdb_recoverd_write_pnn_connect_count(rec);
}
/*
called when we need to do the periodical reclock pnn count update
*/
static void ctdb_update_pnn_count(struct event_context *ev, struct timed_event *te,
struct timeval t, void *p)
{
int i, count;
struct ctdb_recoverd *rec = talloc_get_type(p, struct ctdb_recoverd);
struct ctdb_context *ctdb = rec->ctdb;
struct ctdb_node_map *nodemap = rec->nodemap;
/* close and reopen the pnn lock file */
ctdb_recoverd_get_pnn_lock(rec);
ctdb_recoverd_write_pnn_connect_count(rec);
event_add_timed(rec->ctdb->ev, rec->ctdb,
timeval_current_ofs(ctdb->tunable.reclock_ping_period, 0),
ctdb_update_pnn_count, rec);
/* check if there is a split cluster and yeld the recmaster role
it the other half of the cluster is larger
*/
DEBUG(DEBUG_DEBUG, ("CHECK FOR SPLIT CLUSTER\n"));
if (rec->nodemap == NULL) {
return;
}
if (rec->rec_file_fd == -1) {
return;
}
/* only test this if we think we are the recmaster */
if (ctdb->pnn != rec->recmaster) {
DEBUG(DEBUG_DEBUG, ("We are not recmaster, skip test\n"));
return;
}
if (ctdb->recovery_lock_fd == -1) {
DEBUG(DEBUG_ERR, (__location__ " Lost reclock pnn file. Yielding recmaster role\n"));
close(ctdb->recovery_lock_fd);
ctdb->recovery_lock_fd = -1;
force_election(rec, ctdb->pnn, rec->nodemap);
return;
}
for (i=0; i<nodemap->num; i++) {
/* we dont need to check ourself */
if (nodemap->nodes[i].pnn == ctdb->pnn) {
continue;
}
/* dont check nodes that are connected to us */
if (!(nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED)) {
continue;
}
/* check if the node is "connected" and how connected it it */
count = ctdb_read_pnn_lock(rec->rec_file_fd, nodemap->nodes[i].pnn);
if (count < 0) {
continue;
}
/* check if that node is more connected that us */
if (count > rec->num_connected) {
DEBUG(DEBUG_ERR, ("DISCONNECTED Node %u is more connected than we are, yielding recmaster role\n", nodemap->nodes[i].pnn));
close(ctdb->recovery_lock_fd);
ctdb->recovery_lock_fd = -1;
force_election(rec, ctdb->pnn, rec->nodemap);
return;
}
}
}
/* called to check that the allocation of public ip addresses is ok.
*/
@ -2289,7 +2153,7 @@ static int verify_ip_allocation(struct ctdb_context *ctdb, uint32_t pnn)
struct ctdb_uptime *uptime2 = NULL;
int ret, j;
ret = ctdb_ctrl_uptime(ctdb, ctdb, CONTROL_TIMEOUT(),
ret = ctdb_ctrl_uptime(ctdb, mem_ctx, CONTROL_TIMEOUT(),
CTDB_CURRENT_NODE, &uptime1);
if (ret != 0) {
DEBUG(DEBUG_ERR, ("Unable to get uptime from local node %u\n", pnn));
@ -2305,7 +2169,7 @@ static int verify_ip_allocation(struct ctdb_context *ctdb, uint32_t pnn)
return -1;
}
ret = ctdb_ctrl_uptime(ctdb, ctdb, CONTROL_TIMEOUT(),
ret = ctdb_ctrl_uptime(ctdb, mem_ctx, CONTROL_TIMEOUT(),
CTDB_CURRENT_NODE, &uptime2);
if (ret != 0) {
DEBUG(DEBUG_ERR, ("Unable to get uptime from local node %u\n", pnn));
@ -2343,8 +2207,9 @@ static int verify_ip_allocation(struct ctdb_context *ctdb, uint32_t pnn)
*/
for (j=0; j<ips->num; j++) {
if (ips->ips[j].pnn == pnn) {
if (!ctdb_sys_have_ip(ips->ips[j].sin)) {
DEBUG(DEBUG_CRIT,("Public address '%s' is missing and we should serve this ip\n", inet_ntoa(ips->ips[j].sin.sin_addr)));
if (!ctdb_sys_have_ip(&ips->ips[j].addr)) {
DEBUG(DEBUG_CRIT,("Public address '%s' is missing and we should serve this ip\n",
ctdb_addr_to_str(&ips->ips[j].addr)));
ret = ctdb_ctrl_freeze(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE);
if (ret != 0) {
DEBUG(DEBUG_ERR,(__location__ " Failed to freeze node due to public ip address mismatches\n"));
@ -2361,8 +2226,10 @@ static int verify_ip_allocation(struct ctdb_context *ctdb, uint32_t pnn)
}
}
} else {
if (ctdb_sys_have_ip(ips->ips[j].sin)) {
DEBUG(DEBUG_CRIT,("We are still serving a public address '%s' that we should not be serving.\n", inet_ntoa(ips->ips[j].sin.sin_addr)));
if (ctdb_sys_have_ip(&ips->ips[j].addr)) {
DEBUG(DEBUG_CRIT,("We are still serving a public address '%s' that we should not be serving.\n",
ctdb_addr_to_str(&ips->ips[j].addr)));
ret = ctdb_ctrl_freeze(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE);
if (ret != 0) {
DEBUG(DEBUG_ERR,(__location__ " Failed to freeze node due to public ip address mismatches\n"));
@ -2412,10 +2279,6 @@ static void monitor_cluster(struct ctdb_context *ctdb)
rec->priority_time = timeval_current();
/* open the rec file fd and lock our slot */
rec->rec_file_fd = -1;
ctdb_recoverd_get_pnn_lock(rec);
/* register a message port for sending memory dumps */
ctdb_set_message_handler(ctdb, CTDB_SRVID_MEM_DUMP, mem_dump_handler, rec);
@ -2434,11 +2297,6 @@ static void monitor_cluster(struct ctdb_context *ctdb)
/* register a message port for vacuum fetch */
ctdb_set_message_handler(ctdb, CTDB_SRVID_VACUUM_FETCH, vacuum_fetch_handler, rec);
/* update the reclock pnn file connected count on a regular basis */
event_add_timed(ctdb->ev, ctdb,
timeval_current_ofs(ctdb->tunable.reclock_ping_period, 0),
ctdb_update_pnn_count, rec);
again:
if (mem_ctx) {
talloc_free(mem_ctx);
@ -2459,6 +2317,9 @@ again:
exit(-1);
}
/* ping the local daemon to tell it we are alive */
ctdb_ctrl_recd_ping(ctdb);
if (rec->election_timeout) {
/* an election is in progress */
goto again;
@ -2772,7 +2633,7 @@ again:
}
if ((remote_nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) !=
(nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE)) {
DEBUG(DEBUG_ERR, (__location__ " Remote node:%u has different nodemap flag for %d (0x%x vs 0x%x)\n",
DEBUG(DEBUG_WARNING, (__location__ " Remote node:%u has different nodemap flag for %d (0x%x vs 0x%x)\n",
nodemap->nodes[j].pnn, i,
remote_nodemap->nodes[i].flags, nodemap->nodes[i].flags));
do_recovery(rec, mem_ctx, pnn, nodemap,
@ -2901,7 +2762,7 @@ again:
}
DEBUG(DEBUG_INFO, (__location__ " Update flags on all nodes\n"));
DEBUG(DEBUG_DEBUG, (__location__ " Update flags on all nodes\n"));
/*
update all nodes to have the same flags that we have
*/

View File

@ -56,7 +56,7 @@ struct ctdb_tcp_list {
struct ctdb_client_ip {
struct ctdb_client_ip *prev, *next;
struct ctdb_context *ctdb;
struct sockaddr_in ip;
ctdb_sock_addr addr;
uint32_t client_id;
};
@ -72,7 +72,6 @@ static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *
int i, ret;
struct ctdb_tcp_array *tcparray;
ret = ctdb_sys_send_arp(&arp->addr, arp->vnn->iface);
if (ret != 0) {
DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed (%s)\n", strerror(errno)));
@ -81,17 +80,20 @@ static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *
tcparray = arp->tcparray;
if (tcparray) {
for (i=0;i<tcparray->num;i++) {
struct ctdb_tcp_connection *tcon;
tcon = &tcparray->connections[i];
DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
(unsigned)ntohs(tcparray->connections[i].daddr.sin_port),
inet_ntoa(tcparray->connections[i].saddr.sin_addr),
(unsigned)ntohs(tcparray->connections[i].saddr.sin_port)));
(unsigned)ntohs(tcon->dst_addr.ip.sin_port),
ctdb_addr_to_str(&tcon->src_addr),
(unsigned)ntohs(tcon->src_addr.ip.sin_port)));
ret = ctdb_sys_send_tcp(
(ctdb_sock_addr *)&tcparray->connections[i].saddr,
(ctdb_sock_addr *)&tcparray->connections[i].daddr,
&tcon->src_addr,
&tcon->dst_addr,
0, 0, 0);
if (ret != 0) {
DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
inet_ntoa(tcparray->connections[i].saddr.sin_addr)));
ctdb_addr_to_str(&tcon->src_addr)));
}
}
}
@ -126,14 +128,9 @@ static void takeover_ip_callback(struct ctdb_context *ctdb, int status,
struct ctdb_tcp_array *tcparray;
if (status != 0) {
char ip[128] = "";
if (inet_ntop(state->addr->sa.sa_family, &state->addr->sa.sa_data[0], ip, sizeof(ip)) == NULL) {
DEBUG(DEBUG_ERR, (__location__ " inet_ntop() failed\n"));
}
DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
ip, state->vnn->iface));
ctdb_addr_to_str(state->addr),
state->vnn->iface));
ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
talloc_free(state);
return;
@ -181,12 +178,12 @@ failed:
Find the vnn of the node that has a public ip address
returns -1 if the address is not known as a public address
*/
static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, struct sockaddr_in ip)
static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
{
struct ctdb_vnn *vnn;
for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
if (ctdb_same_ipv4(&vnn->public_address, &ip)) {
if (ctdb_same_ip(&vnn->public_address, addr)) {
return vnn;
}
}
@ -209,16 +206,16 @@ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
struct ctdb_vnn *vnn;
/* update out vnn list */
vnn = find_public_ip_vnn(ctdb, pip->sin);
vnn = find_public_ip_vnn(ctdb, &pip->addr);
if (vnn == NULL) {
DEBUG(DEBUG_ERR,("takeoverip called for an ip '%s' that is not a public address\n",
inet_ntoa(pip->sin.sin_addr)));
ctdb_addr_to_str(&pip->addr)));
return 0;
}
vnn->pnn = pip->pnn;
/* if our kernel already has this IP, do nothing */
if (ctdb_sys_have_ip(pip->sin)) {
if (ctdb_sys_have_ip(&pip->addr)) {
return 0;
}
@ -229,24 +226,26 @@ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
state->addr = talloc(ctdb, ctdb_sock_addr);
CTDB_NO_MEMORY(ctdb, state->addr);
state->addr->ip = pip->sin; //qqq pip must be converted
state->vnn = vnn;
*state->addr = pip->addr;
state->vnn = vnn;
DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
inet_ntoa(pip->sin.sin_addr), vnn->public_netmask_bits,
vnn->iface));
ctdb_addr_to_str(&pip->addr),
vnn->public_netmask_bits,
vnn->iface));
ret = ctdb_event_script_callback(ctdb,
timeval_current_ofs(ctdb->tunable.script_timeout, 0),
state, takeover_ip_callback, state,
"takeip %s %s %u",
vnn->iface,
inet_ntoa(pip->sin.sin_addr),
talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
vnn->public_netmask_bits);
if (ret != 0) {
DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
inet_ntoa(pip->sin.sin_addr), vnn->iface));
ctdb_addr_to_str(&pip->addr),
vnn->iface));
talloc_free(state);
return -1;
}
@ -263,27 +262,32 @@ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
{
struct ctdb_client_ip *ip;
char cip[128] = "";
DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n", inet_ntop(addr->sa.sa_family, &addr->sa.sa_data[0], cip, sizeof(cip))));
DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
ctdb_addr_to_str(addr)));
for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
ctdb_sock_addr tmp_addr;
tmp_addr.ip = ip->ip; //qqq until ip->ip is no longer a sockaddr_in
tmp_addr = ip->addr;
DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n",
ip->client_id, inet_ntoa(ip->ip.sin_addr)));
ip->client_id,
ctdb_addr_to_str(&ip->addr)));
if (ctdb_same_ip(&tmp_addr, addr)) {
struct ctdb_client *client = ctdb_reqid_find(ctdb,
ip->client_id,
struct ctdb_client);
DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n",
ip->client_id, inet_ntoa(ip->ip.sin_addr), client->pid));
ip->client_id,
ctdb_addr_to_str(&ip->addr),
client->pid));
if (client->pid != 0) {
DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
(unsigned)client->pid,
inet_ntop(addr->sa.sa_family, &addr->sa.sa_data[0], cip, sizeof(cip)),
ip->client_id));
(unsigned)client->pid,
ctdb_addr_to_str(addr),
ip->client_id));
kill(client->pid, SIGKILL);
}
}
@ -298,18 +302,15 @@ static void release_ip_callback(struct ctdb_context *ctdb, int status,
{
struct takeover_callback_state *state =
talloc_get_type(private_data, struct takeover_callback_state);
char ip[128] = "";
TDB_DATA data;
/* send a message to all clients of this node telling them
that the cluster has been reconfigured and they should
release any sockets on this IP */
if (inet_ntop(state->addr->sa.sa_family, &state->addr->sa.sa_data[0], ip, sizeof(ip)) == NULL) {
DEBUG(DEBUG_ERR, (__location__ " inet_ntop() failed\n"));
}
data.dptr = (uint8_t *)ip;
data.dsize = strlen(ip)+1;
data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
data.dsize = strlen((char *)data.dptr)+1;
DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
@ -335,10 +336,10 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
struct ctdb_vnn *vnn;
/* update our vnn list */
vnn = find_public_ip_vnn(ctdb, pip->sin);
vnn = find_public_ip_vnn(ctdb, &pip->addr);
if (vnn == NULL) {
DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
inet_ntoa(pip->sin.sin_addr)));
DEBUG(DEBUG_ERR,("takeoverip called for an ip '%s' that is not a public address\n",
ctdb_addr_to_str(&pip->addr)));
return 0;
}
vnn->pnn = pip->pnn;
@ -347,16 +348,18 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
talloc_free(vnn->takeover_ctx);
vnn->takeover_ctx = NULL;
if (!ctdb_sys_have_ip(pip->sin)) {
if (!ctdb_sys_have_ip(&pip->addr)) {
DEBUG(DEBUG_INFO,("Redundant release of IP %s/%u on interface %s (ip not held)\n",
inet_ntoa(pip->sin.sin_addr), vnn->public_netmask_bits,
vnn->iface));
ctdb_addr_to_str(&pip->addr),
vnn->public_netmask_bits,
vnn->iface));
return 0;
}
DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s\n",
inet_ntoa(pip->sin.sin_addr), vnn->public_netmask_bits,
vnn->iface));
ctdb_addr_to_str(&pip->addr),
vnn->public_netmask_bits,
vnn->iface));
state = talloc(ctdb, struct takeover_callback_state);
CTDB_NO_MEMORY(ctdb, state);
@ -364,20 +367,20 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
state->c = talloc_steal(state, c);
state->addr = talloc(state, ctdb_sock_addr);
CTDB_NO_MEMORY(ctdb, state->addr);
state->addr->ip = pip->sin; //qqq pip must be converted
state->vnn = vnn;
*state->addr = pip->addr;
state->vnn = vnn;
ret = ctdb_event_script_callback(ctdb,
timeval_current_ofs(ctdb->tunable.script_timeout, 0),
state, release_ip_callback, state,
"releaseip %s %s %u",
vnn->iface,
inet_ntoa(pip->sin.sin_addr),
talloc_strdup(state, ctdb_addr_to_str(&pip->addr)),
vnn->public_netmask_bits);
if (ret != 0) {
DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
inet_ntoa(pip->sin.sin_addr), vnn->iface));
ctdb_addr_to_str(&pip->addr),
vnn->iface));
talloc_free(state);
return -1;
}
@ -389,15 +392,15 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
static int ctdb_add_public_address(struct ctdb_context *ctdb, struct sockaddr_in addr, unsigned mask, const char *iface)
static int ctdb_add_public_address(struct ctdb_context *ctdb, ctdb_sock_addr *addr, unsigned mask, const char *iface)
{
struct ctdb_vnn *vnn;
/* Verify that we dont have an entry for this ip yet */
for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
if (ctdb_same_sockaddr(&addr, &vnn->public_address)) {
if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n",
inet_ntoa(addr.sin_addr)));
ctdb_addr_to_str(addr)));
return -1;
}
}
@ -406,7 +409,7 @@ static int ctdb_add_public_address(struct ctdb_context *ctdb, struct sockaddr_in
vnn = talloc_zero(ctdb, struct ctdb_vnn);
CTDB_NO_MEMORY_FATAL(ctdb, vnn);
vnn->iface = talloc_strdup(vnn, iface);
vnn->public_address = addr;
vnn->public_address = *addr;
vnn->public_netmask_bits = mask;
vnn->pnn = -1;
@ -446,7 +449,7 @@ int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
for (i=0;i<nlines;i++) {
unsigned mask;
struct sockaddr_in addr;
ctdb_sock_addr addr;
const char *iface;
char *tok;
@ -469,7 +472,7 @@ int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
iface = tok;
}
if (ctdb_add_public_address(ctdb, addr, mask, iface)) {
if (ctdb_add_public_address(ctdb, &addr, mask, iface)) {
DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
talloc_free(lines);
return -1;
@ -486,7 +489,7 @@ int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
struct ctdb_public_ip_list {
struct ctdb_public_ip_list *next;
uint32_t pnn;
struct sockaddr_in sin;
ctdb_sock_addr addr;
};
@ -524,7 +527,7 @@ static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn,
}
for (i=0;i<public_ips->num;i++) {
if (ip->sin.sin_addr.s_addr == public_ips->ips[i].sin.sin_addr.s_addr) {
if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
/* yes, this node can serve this public ip */
return 0;
}
@ -574,7 +577,9 @@ static int find_takeover_node(struct ctdb_context *ctdb,
}
}
if (pnn == -1) {
DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n", inet_ntoa(ip->sin.sin_addr)));
DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
ctdb_addr_to_str(&ip->addr)));
return -1;
}
@ -593,8 +598,8 @@ add_ip_to_merged_list(struct ctdb_context *ctdb,
/* do we already have this ip in our merged list ?*/
for (tmp_ip=ip_list;tmp_ip;tmp_ip=tmp_ip->next) {
/* we already have this public ip in the list */
if (tmp_ip->sin.sin_addr.s_addr == ip->sin.sin_addr.s_addr) {
/* we already have this public ip in the list */
if (ctdb_same_ip(&tmp_ip->addr, &ip->addr)) {
return ip_list;
}
}
@ -603,7 +608,7 @@ add_ip_to_merged_list(struct ctdb_context *ctdb,
tmp_ip = talloc_zero(tmp_ctx, struct ctdb_public_ip_list);
CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
tmp_ip->pnn = ip->pnn;
tmp_ip->sin = ip->sin;
tmp_ip->addr = ip->addr;
tmp_ip->next = ip_list;
return tmp_ip;
@ -729,7 +734,8 @@ try_again:
for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
if (tmp_ip->pnn == -1) {
if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n", inet_ntoa(tmp_ip->sin.sin_addr)));
DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
ctdb_addr_to_str(&tmp_ip->addr)));
}
}
}
@ -796,7 +802,9 @@ try_again:
}
}
if (maxnode == -1) {
DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n", inet_ntoa(tmp_ip->sin.sin_addr)));
DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
ctdb_addr_to_str(&tmp_ip->addr)));
continue;
}
@ -859,9 +867,8 @@ finished:
*/
continue;
}
ip.pnn = tmp_ip->pnn;
ip.sin.sin_family = AF_INET;
ip.sin.sin_addr = tmp_ip->sin.sin_addr;
ip.pnn = tmp_ip->pnn;
ip.addr = tmp_ip->addr;
timeout = TAKEOVER_TIMEOUT();
data.dsize = sizeof(ip);
@ -895,9 +902,8 @@ finished:
/* this IP won't be taken over */
continue;
}
ip.pnn = tmp_ip->pnn;
ip.sin.sin_family = AF_INET;
ip.sin.sin_addr = tmp_ip->sin.sin_addr;
ip.pnn = tmp_ip->pnn;
ip.addr = tmp_ip->addr;
timeout = TAKEOVER_TIMEOUT();
data.dsize = sizeof(ip);
@ -931,7 +937,10 @@ finished:
static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
{
DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
inet_ntoa(ip->ip.sin_addr), ntohs(ip->ip.sin_port), ip->client_id));
ctdb_addr_to_str(&ip->addr),
ntohs(ip->addr.ip.sin_port),
ip->client_id));
DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
return 0;
}
@ -940,6 +949,9 @@ static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
called by a client to inform us of a TCP connection that it is managing
that should tickled with an ACK when IP takeover is done
*/
//qqq we need a new version of this control that takes ctdb_sock_addr
//and have samba move to that instead.
// This is IPV4 ONLY
int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
TDB_DATA indata)
{
@ -951,20 +963,23 @@ int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
TDB_DATA data;
struct ctdb_client_ip *ip;
struct ctdb_vnn *vnn;
ctdb_sock_addr addr;
vnn = find_public_ip_vnn(ctdb, p->dest);
ZERO_STRUCT(addr);
addr.ip = p->dest;
vnn = find_public_ip_vnn(ctdb, &addr);
if (vnn == NULL) {
if (ntohl(p->dest.sin_addr.s_addr) != INADDR_LOOPBACK) {
DEBUG(DEBUG_INFO,("Could not add client IP %s. This is not a public address.\n",
inet_ntoa(p->dest.sin_addr)));
ctdb_addr_to_str((ctdb_sock_addr *)&p->dest)));
}
return 0;
}
if (vnn->pnn != ctdb->pnn) {
DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
inet_ntoa(p->dest.sin_addr),
client_id, client->pid));
ctdb_addr_to_str((ctdb_sock_addr *)&p->dest),
client_id, client->pid));
/* failing this call will tell smbd to die */
return -1;
}
@ -972,8 +987,8 @@ int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
ip = talloc(client, struct ctdb_client_ip);
CTDB_NO_MEMORY(ctdb, ip);
ip->ctdb = ctdb;
ip->ip = p->dest;
ip->ctdb = ctdb;
ip->addr.ip = p->dest;
ip->client_id = client_id;
talloc_set_destructor(ip, ctdb_client_ip_destructor);
DLIST_ADD(ctdb->client_ip_list, ip);
@ -981,21 +996,21 @@ int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
tcp = talloc(client, struct ctdb_tcp_list);
CTDB_NO_MEMORY(ctdb, tcp);
tcp->connection.saddr = p->src;
tcp->connection.daddr = p->dest;
tcp->connection.src_addr.ip = p->src;
tcp->connection.dst_addr.ip = p->dest;
DLIST_ADD(client->tcp_list, tcp);
t.src = p->src;
t.dest = p->dest;
t.src.ip = p->src;
t.dest.ip = p->dest;
data.dptr = (uint8_t *)&t;
data.dsize = sizeof(t);
DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
(unsigned)ntohs(p->dest.sin_port),
inet_ntoa(p->src.sin_addr),
(unsigned)ntohs(p->src.sin_port), client_id, client->pid));
(unsigned)ntohs(p->dest.sin_port),
ctdb_addr_to_str((ctdb_sock_addr *)&p->src),
(unsigned)ntohs(p->src.sin_port), client_id, client->pid));
/* tell all nodes about this tcp connection */
ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
@ -1009,16 +1024,6 @@ int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
return 0;
}
/*
see if two sockaddr_in are the same
*/
static bool same_sockaddr_in(struct sockaddr_in *in1, struct sockaddr_in *in2)
{
return in1->sin_family == in2->sin_family &&
in1->sin_port == in2->sin_port &&
in1->sin_addr.s_addr == in2->sin_addr.s_addr;
}
/*
find a tcp address on a list
*/
@ -1032,8 +1037,8 @@ static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array,
}
for (i=0;i<array->num;i++) {
if (same_sockaddr_in(&array->connections[i].saddr, &tcp->saddr) &&
same_sockaddr_in(&array->connections[i].daddr, &tcp->daddr)) {
if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
return &array->connections[i];
}
}
@ -1052,10 +1057,11 @@ int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
struct ctdb_tcp_connection tcp;
struct ctdb_vnn *vnn;
vnn = find_public_ip_vnn(ctdb, p->dest);
vnn = find_public_ip_vnn(ctdb, &p->dest);
if (vnn == NULL) {
DEBUG(DEBUG_ERR,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
inet_ntoa(p->dest.sin_addr)));
DEBUG(DEBUG_ERR,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
ctdb_addr_to_str(&p->dest)));
return -1;
}
@ -1074,21 +1080,21 @@ int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
CTDB_NO_MEMORY(ctdb, tcparray->connections);
tcparray->connections[tcparray->num].saddr = p->src;
tcparray->connections[tcparray->num].daddr = p->dest;
tcparray->connections[tcparray->num].src_addr = p->src;
tcparray->connections[tcparray->num].dst_addr = p->dest;
tcparray->num++;
return 0;
}
/* Do we already have this tickle ?*/
tcp.saddr = p->src;
tcp.daddr = p->dest;
tcp.src_addr = p->src;
tcp.dst_addr = p->dest;
if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
inet_ntoa(tcp.daddr.sin_addr),
ntohs(tcp.daddr.sin_port),
vnn->pnn));
ctdb_addr_to_str(&tcp.dst_addr),
ntohs(tcp.dst_addr.ip.sin_port),
vnn->pnn));
return 0;
}
@ -1099,14 +1105,14 @@ int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
CTDB_NO_MEMORY(ctdb, tcparray->connections);
vnn->tcp_array = tcparray;
tcparray->connections[tcparray->num].saddr = p->src;
tcparray->connections[tcparray->num].daddr = p->dest;
tcparray->connections[tcparray->num].src_addr = p->src;
tcparray->connections[tcparray->num].dst_addr = p->dest;
tcparray->num++;
DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
inet_ntoa(tcp.daddr.sin_addr),
ntohs(tcp.daddr.sin_port),
vnn->pnn));
ctdb_addr_to_str(&tcp.dst_addr),
ntohs(tcp.dst_addr.ip.sin_port),
vnn->pnn));
return 0;
}
@ -1120,10 +1126,11 @@ int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
{
struct ctdb_tcp_connection *tcpp;
struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, conn->daddr);
struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
if (vnn == NULL) {
DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n", inet_ntoa(conn->daddr.sin_addr)));
DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
ctdb_addr_to_str(&conn->dst_addr)));
return;
}
@ -1132,8 +1139,8 @@ static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tc
*/
if (vnn->tcp_array == NULL) {
DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
inet_ntoa(conn->daddr.sin_addr),
ntohs(conn->daddr.sin_port)));
ctdb_addr_to_str(&conn->dst_addr),
ntohs(conn->dst_addr.ip.sin_port)));
return;
}
@ -1144,8 +1151,8 @@ static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tc
tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
if (tcpp == NULL) {
DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
inet_ntoa(conn->daddr.sin_addr),
ntohs(conn->daddr.sin_port)));
ctdb_addr_to_str(&conn->dst_addr),
ntohs(conn->dst_addr.ip.sin_port)));
return;
}
@ -1169,8 +1176,8 @@ static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tc
vnn->tcp_update_needed = true;
DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
inet_ntoa(conn->saddr.sin_addr),
ntohs(conn->saddr.sin_port)));
ctdb_addr_to_str(&conn->src_addr),
ntohs(conn->src_addr.ip.sin_port)));
}
@ -1207,15 +1214,14 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb)
struct ctdb_vnn *vnn;
for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
if (!ctdb_sys_have_ip(vnn->public_address)) {
if (!ctdb_sys_have_ip(&vnn->public_address)) {
continue;
}
ctdb_event_script(ctdb, "releaseip %s %s %u",
vnn->iface,
inet_ntoa(vnn->public_address.sin_addr),
talloc_strdup(ctdb, ctdb_addr_to_str(&vnn->public_address)),
vnn->public_netmask_bits);
// convert when vnn->public_address is no longer a sockaddr_in
release_kill_clients(ctdb, (ctdb_sock_addr *)&vnn->public_address);
release_kill_clients(ctdb, &vnn->public_address);
}
}
@ -1247,8 +1253,8 @@ int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb,
ips->num = num;
i = 0;
for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
ips->ips[i].pnn = vnn->pnn;
ips->ips[i].sin = vnn->public_address;
ips->ips[i].pnn = vnn->pnn;
ips->ips[i].addr = vnn->public_address;
i++;
}
@ -1274,8 +1280,8 @@ struct ctdb_kill_tcp {
a tcp connection that is to be killed
*/
struct ctdb_killtcp_con {
struct sockaddr_in src;
struct sockaddr_in dst;
ctdb_sock_addr src_addr;
ctdb_sock_addr dst_addr;
int count;
struct ctdb_kill_tcp *killtcp;
};
@ -1285,15 +1291,41 @@ struct ctdb_killtcp_con {
this key is used to insert and lookup matching socketpairs that are
to be tickled and RST
*/
#define KILLTCP_KEYLEN 4
static uint32_t *killtcp_key(struct sockaddr_in *src, struct sockaddr_in *dst)
#define KILLTCP_KEYLEN 10
static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
{
static uint32_t key[KILLTCP_KEYLEN];
key[0] = dst->sin_addr.s_addr;
key[1] = src->sin_addr.s_addr;
key[2] = dst->sin_port;
key[3] = src->sin_port;
bzero(key, sizeof(key));
if (src->sa.sa_family != dst->sa.sa_family) {
DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
return key;
}
switch (src->sa.sa_family) {
case AF_INET:
key[0] = dst->ip.sin_addr.s_addr;
key[1] = src->ip.sin_addr.s_addr;
key[2] = dst->ip.sin_port;
key[3] = src->ip.sin_port;
break;
case AF_INET6:
key[0] = dst->ip6.sin6_addr.s6_addr32[3];
key[1] = src->ip6.sin6_addr.s6_addr32[3];
key[2] = dst->ip6.sin6_addr.s6_addr32[2];
key[3] = src->ip6.sin6_addr.s6_addr32[2];
key[4] = dst->ip6.sin6_addr.s6_addr32[1];
key[5] = src->ip6.sin6_addr.s6_addr32[1];
key[6] = dst->ip6.sin6_addr.s6_addr32[0];
key[7] = src->ip6.sin6_addr.s6_addr32[0];
key[8] = dst->ip6.sin6_port;
key[9] = src->ip6.sin6_port;
break;
default:
DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
return key;
}
return key;
}
@ -1306,7 +1338,7 @@ static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
{
struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
struct ctdb_killtcp_con *con;
struct sockaddr_in src, dst;
ctdb_sock_addr src, dst;
uint32_t ack_seq, seq;
if (!(flags & EVENT_FD_READ)) {
@ -1334,12 +1366,12 @@ static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde,
/* This one has been tickled !
now reset him and remove him from the list.
*/
DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n", ntohs(con->dst.sin_port), inet_ntoa(con->src.sin_addr), ntohs(con->src.sin_port)));
DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
ntohs(con->dst_addr.ip.sin_port),
ctdb_addr_to_str(&con->src_addr),
ntohs(con->src_addr.ip.sin_port)));
ctdb_sys_send_tcp(
(ctdb_sock_addr *)&con->dst,
(ctdb_sock_addr *)&con->src,
ack_seq, seq, 1);
ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
talloc_free(con);
}
@ -1362,8 +1394,8 @@ static void tickle_connection_traverse(void *param, void *data)
/* othervise, try tickling it again */
con->count++;
ctdb_sys_send_tcp(
(ctdb_sock_addr *)&con->dst,
(ctdb_sock_addr *)&con->src,
(ctdb_sock_addr *)&con->dst_addr,
(ctdb_sock_addr *)&con->src_addr,
0, 0, 0);
}
@ -1422,20 +1454,25 @@ static void *add_killtcp_callback(void *parm, void *data)
add a tcp socket to the list of connections we want to RST
*/
static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
struct sockaddr_in *src, struct sockaddr_in *dst)
ctdb_sock_addr *s,
ctdb_sock_addr *d)
{
ctdb_sock_addr src, dst;
struct ctdb_kill_tcp *killtcp;
struct ctdb_killtcp_con *con;
struct ctdb_vnn *vnn;
vnn = find_public_ip_vnn(ctdb, *dst);
ctdb_canonicalize_ip(s, &src);
ctdb_canonicalize_ip(d, &dst);
vnn = find_public_ip_vnn(ctdb, &dst);
if (vnn == NULL) {
vnn = find_public_ip_vnn(ctdb, *src);
vnn = find_public_ip_vnn(ctdb, &src);
}
if (vnn == NULL) {
/* if it is not a public ip it could be our 'single ip' */
if (ctdb->single_ip_vnn) {
if (ctdb_same_ipv4(&ctdb->single_ip_vnn->public_address, dst)) {
if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
vnn = ctdb->single_ip_vnn;
}
}
@ -1470,14 +1507,14 @@ static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
*/
con = talloc(killtcp, struct ctdb_killtcp_con);
CTDB_NO_MEMORY(ctdb, con);
con->src = *src;
con->dst = *dst;
con->count = 0;
con->killtcp = killtcp;
con->src_addr = src;
con->dst_addr = dst;
con->count = 0;
con->killtcp = killtcp;
trbt_insertarray32_callback(killtcp->connections,
KILLTCP_KEYLEN, killtcp_key(&con->dst, &con->src),
KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
add_killtcp_callback, con);
/*
@ -1506,8 +1543,8 @@ static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb,
/* tickle him once now */
ctdb_sys_send_tcp(
(ctdb_sock_addr *)&con->dst,
(ctdb_sock_addr *)&con->src,
&con->dst_addr,
&con->src_addr,
0, 0, 0);
return 0;
@ -1525,7 +1562,7 @@ int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
{
struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
return ctdb_killtcp_add_connection(ctdb, &killtcp->src, &killtcp->dst);
return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
}
/*
@ -1558,10 +1595,11 @@ int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA ind
return -1;
}
vnn = find_public_ip_vnn(ctdb, list->ip);
vnn = find_public_ip_vnn(ctdb, &list->addr);
if (vnn == NULL) {
DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n",
inet_ntoa(list->ip.sin_addr)));
ctdb_addr_to_str(&list->addr)));
return 1;
}
@ -1592,16 +1630,17 @@ int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA ind
*/
int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
{
struct sockaddr_in *ip = (struct sockaddr_in *)indata.dptr;
ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
struct ctdb_control_tcp_tickle_list *list;
struct ctdb_tcp_array *tcparray;
int num;
struct ctdb_vnn *vnn;
vnn = find_public_ip_vnn(ctdb, *ip);
vnn = find_public_ip_vnn(ctdb, addr);
if (vnn == NULL) {
DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n",
inet_ntoa(ip->sin_addr)));
ctdb_addr_to_str(addr)));
return 1;
}
@ -1620,7 +1659,7 @@ int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA ind
CTDB_NO_MEMORY(ctdb, outdata->dptr);
list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
list->ip = *ip;
list->addr = *addr;
list->tickles.num = num;
if (num) {
memcpy(&list->tickles.connections[0], tcparray->connections,
@ -1636,7 +1675,7 @@ int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA ind
*/
static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
struct timeval timeout, uint32_t destnode,
struct sockaddr_in *ip,
ctdb_sock_addr *addr,
struct ctdb_tcp_array *tcparray)
{
int ret, num;
@ -1656,7 +1695,7 @@ static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb,
CTDB_NO_MEMORY(ctdb, data.dptr);
list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
list->ip = *ip;
list->addr = *addr;
list->tickles.num = num;
if (tcparray) {
memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
@ -1704,8 +1743,8 @@ static void ctdb_update_tcp_tickles(struct event_context *ev,
&vnn->public_address,
vnn->tcp_array);
if (ret != 0) {
DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
inet_ntoa(vnn->public_address.sin_addr)));
DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
ctdb_addr_to_str(&vnn->public_address)));
}
}
@ -1828,7 +1867,7 @@ int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA inda
return -1;
}
return ctdb_add_public_address(ctdb, pub->sin, pub->mask, &pub->iface[0]);
return ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
}
/*
@ -1864,7 +1903,7 @@ int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA inda
/* walk over all public addresses until we find a match */
for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
if (ctdb_same_ipv4(&vnn->public_address, &pub->sin)) {
if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
TALLOC_CTX *mem_ctx = talloc_new(ctdb);
DLIST_REMOVE(ctdb->vnn, vnn);
@ -1874,7 +1913,7 @@ int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA inda
mem_ctx, delete_ip_callback, mem_ctx,
"releaseip %s %s %u",
vnn->iface,
inet_ntoa(vnn->public_address.sin_addr),
talloc_strdup(mem_ctx, ctdb_addr_to_str(&vnn->public_address)),
vnn->public_netmask_bits);
talloc_free(vnn);
if (ret != 0) {

View File

@ -87,11 +87,19 @@ static int ctdb_traverse_local_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DAT
struct ctdb_rec_data *d;
struct ctdb_ltdb_header *hdr;
/* filter out non-authoritative and zero-length records */
hdr = (struct ctdb_ltdb_header *)data.dptr;
if (data.dsize <= sizeof(struct ctdb_ltdb_header) ||
hdr->dmaster != h->ctdb_db->ctdb->pnn) {
return 0;
if (h->ctdb_db->persistent == 0) {
/* filter out zero-length records */
if (data.dsize <= sizeof(struct ctdb_ltdb_header)) {
return 0;
}
/* filter out non-authoritative records */
if (hdr->dmaster != h->ctdb_db->ctdb->pnn) {
return 0;
}
}
d = ctdb_marshall_record(h, 0, key, NULL, data);
@ -174,6 +182,7 @@ static struct ctdb_traverse_local_handle *ctdb_traverse_local(struct ctdb_db_con
struct ctdb_traverse_all_handle {
struct ctdb_context *ctdb;
struct ctdb_db_context *ctdb_db;
uint32_t reqid;
ctdb_traverse_fn_t callback;
void *private_data;
@ -224,17 +233,19 @@ static struct ctdb_traverse_all_handle *ctdb_daemon_traverse_all(struct ctdb_db_
int ret;
TDB_DATA data;
struct ctdb_traverse_all r;
uint32_t destination;
state = talloc(ctdb_db, struct ctdb_traverse_all_handle);
if (state == NULL) {
return NULL;
}
state->ctdb = ctdb;
state->reqid = ctdb_reqid_new(ctdb_db->ctdb, state);
state->callback = callback;
state->ctdb = ctdb;
state->ctdb_db = ctdb_db;
state->reqid = ctdb_reqid_new(ctdb_db->ctdb, state);
state->callback = callback;
state->private_data = private_data;
state->null_count = 0;
state->null_count = 0;
talloc_set_destructor(state, ctdb_traverse_all_destructor);
@ -245,10 +256,37 @@ static struct ctdb_traverse_all_handle *ctdb_daemon_traverse_all(struct ctdb_db_
data.dptr = (uint8_t *)&r;
data.dsize = sizeof(r);
/* tell all the nodes in the cluster to start sending records to this node */
ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_VNNMAP, 0,
CTDB_CONTROL_TRAVERSE_ALL,
0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
if (ctdb_db->persistent == 0) {
/* normal database, traverse all nodes */
destination = CTDB_BROADCAST_VNNMAP;
} else {
int i;
/* persistent database, traverse one node, preferably
* the local one
*/
destination = ctdb->pnn;
/* check we are in the vnnmap */
for (i=0; i < ctdb->vnn_map->size; i++) {
if (ctdb->vnn_map->map[i] == ctdb->pnn) {
break;
}
}
/* if we are not in the vnn map we just pick the first
* node instead
*/
if (i == ctdb->vnn_map->size) {
destination = ctdb->vnn_map->map[0];
}
}
/* tell all the nodes in the cluster to start sending records to this
* node, or if it is a persistent database, just tell the local
* node
*/
ret = ctdb_daemon_send_control(ctdb, destination, 0,
CTDB_CONTROL_TRAVERSE_ALL,
0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
if (ret != 0) {
talloc_free(state);
return NULL;
@ -371,8 +409,13 @@ int32_t ctdb_control_traverse_data(struct ctdb_context *ctdb, TDB_DATA data, TDB
if (key.dsize == 0 && data.dsize == 0) {
state->null_count++;
if (state->null_count != ctdb_get_num_active_nodes(ctdb)) {
return 0;
/* Persistent databases are only scanned on one node (the local
* node)
*/
if (state->ctdb_db->persistent == 0) {
if (state->null_count != ctdb_get_num_active_nodes(ctdb)) {
return 0;
}
}
}

View File

@ -50,6 +50,7 @@ static const struct {
{ "ReclockPingPeriod", 60, offsetof(struct ctdb_tunable, reclock_ping_period) },
{ "NoIPFailback", 0, offsetof(struct ctdb_tunable, no_ip_failback) },
{ "VerboseMemoryNames", 0, offsetof(struct ctdb_tunable, verbose_memory_names) },
{ "RecdPingTimeout", 60, offsetof(struct ctdb_tunable, recd_ping_timeout) },
};
/*

View File

@ -271,13 +271,11 @@ int main(int argc, const char *argv[])
svnn->iface = talloc_strdup(svnn, options.public_interface);
CTDB_NO_MEMORY(ctdb, svnn->iface);
if (inet_aton(options.single_public_ip,
&svnn->public_address.sin_addr) == 0) {
if (parse_ip(options.single_public_ip,
&svnn->public_address) == 0) {
DEBUG(DEBUG_ALERT,("Invalid --single-public-ip argument : %s . This is not a valid ip address. Exiting.\n", options.single_public_ip));
exit(10);
}
svnn->public_address.sin_family = AF_INET;
svnn->public_address.sin_port = 0;
}
if (options.public_address_list) {

View File

@ -252,10 +252,15 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
void (*callback)(struct ctdb_context *, int, void *) = state->callback;
void *private_data = state->private_data;
struct ctdb_context *ctdb = state->ctdb;
char *options;
DEBUG(DEBUG_ERR,("Event script timed out : %s count : %u\n", state->options, ctdb->event_script_timeouts));
if (!strcmp(state->options, "monitor")) {
options = talloc_strdup(ctdb, state->options);
CTDB_NO_MEMORY_VOID(ctdb, options);
talloc_free(state);
if (!strcmp(options, "monitor")) {
/* if it is a monitor event, we allow it to "hang" a few times
before we declare it a failure and ban ourself (and make
ourself unhealthy)
@ -271,7 +276,7 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
} else {
callback(ctdb, 0, private_data);
}
} else if (!strcmp(state->options, "startup")) {
} else if (!strcmp(options, "startup")) {
DEBUG(DEBUG_ERR, (__location__ " eventscript for startup event timedout.\n"));
callback(ctdb, -1, private_data);
} else {
@ -281,7 +286,7 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve
callback(ctdb, -1, private_data);
}
talloc_free(state);
talloc_free(options);
}
/*
@ -480,10 +485,15 @@ int32_t ctdb_run_eventscripts(struct ctdb_context *ctdb,
state = talloc(ctdb->eventscripts_ctx, struct eventscript_callback_state);
CTDB_NO_MEMORY(ctdb, state);
state->c = talloc_steal(ctdb, c);
state->c = talloc_steal(state, c);
DEBUG(DEBUG_NOTICE,("Forced running of eventscripts with arguments %s\n", indata.dptr));
if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
DEBUG(DEBUG_ERR, (__location__ " Aborted running eventscript \"%s\" while in RECOVERY mode\n", indata.dptr));
return -1;
}
ctdb_disable_monitoring(ctdb);
ret = ctdb_event_script_callback(ctdb,

View File

@ -100,23 +100,15 @@ static void ctdb_node_connect_write(struct event_context *ev, struct fd_event *f
/* the queue subsystem now owns this fd */
tnode->fd = -1;
/* tell the ctdb layer we are connected */
node->ctdb->upcalls->node_connected(node);
}
static int ctdb_tcp_get_address(struct ctdb_context *ctdb,
const char *address, struct in_addr *addr)
const char *address, ctdb_sock_addr *addr)
{
if (inet_pton(AF_INET, address, addr) <= 0) {
struct hostent *he = gethostbyname(address);
if (he == NULL || he->h_length > sizeof(*addr)) {
ctdb_set_error(ctdb, "invalid nework address '%s'\n",
address);
return -1;
}
memcpy(addr, he->h_addr, he->h_length);
if (parse_ip(address, addr) == 0) {
DEBUG(DEBUG_CRIT, (__location__ " Unparsable address : %s.\n", address));
return -1;
}
return 0;
}
@ -132,26 +124,34 @@ void ctdb_tcp_node_connect(struct event_context *ev, struct timed_event *te,
struct ctdb_tcp_node *tnode = talloc_get_type(node->private_data,
struct ctdb_tcp_node);
struct ctdb_context *ctdb = node->ctdb;
struct sockaddr_in sock_in;
struct sockaddr_in sock_out;
ctdb_sock_addr sock_in;
ctdb_sock_addr sock_out;
ctdb_tcp_stop_connection(node);
tnode->fd = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
set_nonblocking(tnode->fd);
set_close_on_exec(tnode->fd);
ZERO_STRUCT(sock_out);
#ifdef HAVE_SOCK_SIN_LEN
sock_out.sin_len = sizeof(sock_out);
sock_out.ip.sin_len = sizeof(sock_out);
#endif
if (ctdb_tcp_get_address(ctdb, node->address.address, &sock_out.sin_addr) != 0) {
if (ctdb_tcp_get_address(ctdb, node->address.address, &sock_out) != 0) {
return;
}
switch (sock_out.sa.sa_family) {
case AF_INET:
sock_out.ip.sin_port = htons(node->address.port);
break;
case AF_INET6:
sock_out.ip6.sin6_port = htons(node->address.port);
break;
default:
DEBUG(DEBUG_ERR, (__location__ " unknown family %u\n",
sock_out.sa.sa_family));
return;
}
sock_out.sin_port = htons(node->address.port);
sock_out.sin_family = PF_INET;
tnode->fd = socket(sock_out.sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
set_nonblocking(tnode->fd);
set_close_on_exec(tnode->fd);
/* Bind our side of the socketpair to the same address we use to listen
* on incoming CTDB traffic.
@ -161,13 +161,11 @@ void ctdb_tcp_node_connect(struct event_context *ev, struct timed_event *te,
*/
ZERO_STRUCT(sock_in);
#ifdef HAVE_SOCK_SIN_LEN
sock_in.sin_len = sizeof(sock_in);
sock_in.ip.sin_len = sizeof(sock_in);
#endif
if (ctdb_tcp_get_address(ctdb, ctdb->address.address, &sock_in.sin_addr) != 0) {
if (ctdb_tcp_get_address(ctdb, ctdb->address.address, &sock_in) != 0) {
return;
}
sock_in.sin_port = htons(0); /* INPORT_ANY is not always available */
sock_in.sin_family = PF_INET;
bind(tnode->fd, (struct sockaddr *)&sock_in, sizeof(sock_in));
if (connect(tnode->fd, (struct sockaddr *)&sock_out, sizeof(sock_out)) != 0 &&
@ -201,7 +199,7 @@ static void ctdb_listen_event(struct event_context *ev, struct fd_event *fde,
{
struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
struct ctdb_tcp *ctcp = talloc_get_type(ctdb->private_data, struct ctdb_tcp);
struct sockaddr_in addr;
ctdb_sock_addr addr;
socklen_t len;
int fd, nodeid;
struct ctdb_incoming *in;
@ -213,7 +211,7 @@ static void ctdb_listen_event(struct event_context *ev, struct fd_event *fde,
fd = accept(ctcp->listen_fd, (struct sockaddr *)&addr, &len);
if (fd == -1) return;
incoming_node = inet_ntoa(addr.sin_addr);
incoming_node = ctdb_addr_to_str(&addr);
nodeid = ctdb_ip_to_nodeid(ctdb, incoming_node);
if (nodeid == -1) {
@ -243,10 +241,11 @@ static int ctdb_tcp_listen_automatic(struct ctdb_context *ctdb)
{
struct ctdb_tcp *ctcp = talloc_get_type(ctdb->private_data,
struct ctdb_tcp);
struct sockaddr_in sock;
ctdb_sock_addr sock;
int lock_fd, i;
const char *lock_path = "/tmp/.ctdb_socket_lock";
struct flock lock;
int one = 1;
/* in order to ensure that we don't get two nodes with the
same adddress, we must make the bind() and listen() calls
@ -282,16 +281,37 @@ static int ctdb_tcp_listen_automatic(struct ctdb_context *ctdb)
ZERO_STRUCT(sock);
#ifdef HAVE_SOCK_SIN_LEN
sock.sin_len = sizeof(sock);
sock.ip.sin_len = sizeof(sock);
#endif
sock.sin_port = htons(ctdb->nodes[i]->address.port);
sock.sin_family = PF_INET;
if (ctdb_tcp_get_address(ctdb,
ctdb->nodes[i]->address.address,
&sock.sin_addr) != 0) {
&sock) != 0) {
continue;
}
switch (sock.sa.sa_family) {
case AF_INET:
sock.ip.sin_port = htons(ctdb->nodes[i]->address.port);
break;
case AF_INET6:
sock.ip6.sin6_port = htons(ctdb->nodes[i]->address.port);
break;
default:
DEBUG(DEBUG_ERR, (__location__ " unknown family %u\n",
sock.sa.sa_family));
continue;
}
ctcp->listen_fd = socket(sock.sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
if (ctcp->listen_fd == -1) {
ctdb_set_error(ctdb, "socket failed\n");
continue;
}
set_close_on_exec(ctcp->listen_fd);
setsockopt(ctcp->listen_fd,SOL_SOCKET,SO_REUSEADDR,(char *)&one,sizeof(one));
if (bind(ctcp->listen_fd, (struct sockaddr * )&sock,
sizeof(sock)) == 0) {
break;
@ -344,19 +364,9 @@ int ctdb_tcp_listen(struct ctdb_context *ctdb)
{
struct ctdb_tcp *ctcp = talloc_get_type(ctdb->private_data,
struct ctdb_tcp);
struct sockaddr_in sock;
ctdb_sock_addr sock;
int one = 1;
ctcp->listen_fd = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
if (ctcp->listen_fd == -1) {
ctdb_set_error(ctdb, "socket failed\n");
return -1;
}
set_close_on_exec(ctcp->listen_fd);
setsockopt(ctcp->listen_fd,SOL_SOCKET,SO_REUSEADDR,(char *)&one,sizeof(one));
/* we can either auto-bind to the first available address, or we can
use a specified address */
if (!ctdb->address.address) {
@ -365,16 +375,36 @@ int ctdb_tcp_listen(struct ctdb_context *ctdb)
ZERO_STRUCT(sock);
#ifdef HAVE_SOCK_SIN_LEN
sock.sin_len = sizeof(sock);
sock.ip.sin_len = sizeof(sock);
#endif
sock.sin_port = htons(ctdb->address.port);
sock.sin_family = PF_INET;
if (ctdb_tcp_get_address(ctdb, ctdb->address.address,
&sock.sin_addr) != 0) {
&sock) != 0) {
goto failed;
}
switch (sock.sa.sa_family) {
case AF_INET:
sock.ip.sin_port = htons(ctdb->address.port);
break;
case AF_INET6:
sock.ip6.sin6_port = htons(ctdb->address.port);
break;
default:
DEBUG(DEBUG_ERR, (__location__ " unknown family %u\n",
sock.sa.sa_family));
goto failed;
}
ctcp->listen_fd = socket(sock.sa.sa_family, SOCK_STREAM, IPPROTO_TCP);
if (ctcp->listen_fd == -1) {
ctdb_set_error(ctdb, "socket failed\n");
return -1;
}
set_close_on_exec(ctcp->listen_fd);
setsockopt(ctcp->listen_fd,SOL_SOCKET,SO_REUSEADDR,(char *)&one,sizeof(one));
if (bind(ctcp->listen_fd, (struct sockaddr * )&sock, sizeof(sock)) != 0) {
goto failed;
}
@ -389,7 +419,9 @@ int ctdb_tcp_listen(struct ctdb_context *ctdb)
return 0;
failed:
close(ctcp->listen_fd);
if (ctcp->listen_fd != -1) {
close(ctcp->listen_fd);
}
ctcp->listen_fd = -1;
return -1;
}

View File

@ -57,7 +57,7 @@ static void each_second(struct event_context *ev, struct timed_event *te,
uint32_t *old_counters;
printf("Counters: ");
printf("[%4u] Counters: ", getpid());
old_counters = (uint32_t *)old_data.dptr;
for (i=0;i<old_data.dsize/sizeof(uint32_t); i++) {
printf("%6u ", old_counters[i]);
@ -78,7 +78,8 @@ static void check_counters(struct ctdb_context *ctdb, TDB_DATA data)
/* check that all the counters are monotonic increasing */
for (i=0; i<old_data.dsize/sizeof(uint32_t); i++) {
if (counters[i]<old_counters[i]) {
printf("ERROR: counters has decreased for node %u From %u to %u\n", i, old_counters[i], counters[i]);
printf("[%4u] ERROR: counters has decreased for node %u From %u to %u\n",
getpid(), i, old_counters[i], counters[i]);
success = false;
}
}
@ -95,13 +96,9 @@ static void check_counters(struct ctdb_context *ctdb, TDB_DATA data)
static void test_store_records(struct ctdb_context *ctdb, struct event_context *ev)
{
TDB_DATA key, data;
TDB_DATA key;
struct ctdb_db_context *ctdb_db;
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
int ret;
struct ctdb_record_handle *h;
uint32_t *counters;
int first_time = true;
ctdb_db = ctdb_db_handle(ctdb, "persistent.tdb");
key.dptr = discard_const("testkey");
@ -109,6 +106,12 @@ static void test_store_records(struct ctdb_context *ctdb, struct event_context *
start_timer();
while (end_timer() < timelimit) {
TDB_DATA data;
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
struct ctdb_record_handle *h;
int ret;
uint32_t *counters;
h = ctdb_fetch_lock(ctdb_db, tmp_ctx, key, &data);
if (h == NULL) {
printf("Failed to fetch record '%s' on node %d\n",
@ -135,11 +138,6 @@ static void test_store_records(struct ctdb_context *ctdb, struct event_context *
counters = (uint32_t *)data.dptr;
if (first_time) {
counters[pnn] = 0;
first_time = false;
}
/* bump our counter */
counters[pnn]++;
@ -155,9 +153,9 @@ static void test_store_records(struct ctdb_context *ctdb, struct event_context *
}
talloc_free(h);
talloc_free(tmp_ctx);
}
talloc_free(tmp_ctx);
}
/*
@ -181,6 +179,8 @@ int main(int argc, const char *argv[])
poptContext pc;
struct event_context *ev;
setlinebuf(stdout);
pc = poptGetContext(argv[0], argc, argv, popt_options, POPT_CONTEXT_KEEP_FIRST);
while ((opt = poptGetNextOpt(pc)) != -1) {

View File

@ -0,0 +1,258 @@
/*
simple tool to test persistent databases
Copyright (C) Andrew Tridgell 2006-2007
Copyright (c) Ronnie sahlberg 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "lib/events/events.h"
#include "system/filesys.h"
#include "popt.h"
#include "cmdline.h"
#include <sys/time.h>
#include <time.h>
static struct timeval tp1,tp2;
static void start_timer(void)
{
gettimeofday(&tp1,NULL);
}
static double end_timer(void)
{
gettimeofday(&tp2,NULL);
return (tp2.tv_sec + (tp2.tv_usec*1.0e-6)) -
(tp1.tv_sec + (tp1.tv_usec*1.0e-6));
}
static int timelimit = 10;
static unsigned int pnn;
static TDB_DATA old_data;
static int success = true;
static void each_second(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data)
{
struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
int i;
uint32_t *old_counters;
printf("[%4u] Counters: ", getpid());
old_counters = (uint32_t *)old_data.dptr;
for (i=0;i<old_data.dsize/sizeof(uint32_t); i++) {
printf("%6u ", old_counters[i]);
}
printf("\n");
event_add_timed(ev, ctdb, timeval_current_ofs(1, 0), each_second, ctdb);
}
static void check_counters(struct ctdb_context *ctdb, TDB_DATA data)
{
int i;
uint32_t *counters, *old_counters;
counters = (uint32_t *)data.dptr;
old_counters = (uint32_t *)old_data.dptr;
/* check that all the counters are monotonic increasing */
for (i=0; i<old_data.dsize/sizeof(uint32_t); i++) {
if (counters[i]<old_counters[i]) {
printf("[%4u] ERROR: counters has decreased for node %u From %u to %u\n",
getpid(), i, old_counters[i], counters[i]);
success = false;
}
}
if (old_data.dsize != data.dsize) {
old_data.dsize = data.dsize;
old_data.dptr = talloc_realloc_size(ctdb, old_data.dptr, old_data.dsize);
}
memcpy(old_data.dptr, data.dptr, data.dsize);
}
static void test_store_records(struct ctdb_context *ctdb, struct event_context *ev)
{
TDB_DATA key;
struct ctdb_db_context *ctdb_db;
int ret;
uint32_t *counters;
ctdb_db = ctdb_db_handle(ctdb, "transaction.tdb");
key.dptr = discard_const("testkey");
key.dsize = strlen((const char *)key.dptr)+1;
start_timer();
while (end_timer() < timelimit) {
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
TDB_DATA data;
struct ctdb_transaction_handle *h;
h = ctdb_transaction_start(ctdb_db, tmp_ctx);
if (h == NULL) {
printf("Failed to start transaction on node %d\n",
ctdb_get_pnn(ctdb));
talloc_free(tmp_ctx);
return;
}
ret = ctdb_transaction_fetch(h, tmp_ctx, key, &data);
if (ret != 0) {
DEBUG(DEBUG_ERR,("Failed to fetch record\n"));
exit(1);
}
if (data.dsize < sizeof(uint32_t) * (pnn+1)) {
unsigned char *ptr = data.dptr;
data.dptr = talloc_zero_size(tmp_ctx, sizeof(uint32_t) * (pnn+1));
memcpy(data.dptr, ptr, data.dsize);
talloc_free(ptr);
data.dsize = sizeof(uint32_t) * (pnn+1);
}
if (data.dptr == NULL) {
printf("Failed to realloc array\n");
talloc_free(tmp_ctx);
return;
}
counters = (uint32_t *)data.dptr;
/* bump our counter */
counters[pnn]++;
ret = ctdb_transaction_store(h, key, data);
if (ret != 0) {
DEBUG(DEBUG_ERR,("Failed to store record\n"));
exit(1);
}
ret = ctdb_transaction_commit(h);
if (ret != 0) {
DEBUG(DEBUG_ERR,("Failed to commit transaction\n"));
exit(1);
}
/* store the counters and verify that they are sane */
if (pnn == 0) {
check_counters(ctdb, data);
}
talloc_free(tmp_ctx);
}
}
/*
main program
*/
int main(int argc, const char *argv[])
{
struct ctdb_context *ctdb;
struct ctdb_db_context *ctdb_db;
int unsafe_writes = 0;
struct poptOption popt_options[] = {
POPT_AUTOHELP
POPT_CTDB_CMDLINE
{ "timelimit", 't', POPT_ARG_INT, &timelimit, 0, "timelimit", "integer" },
{ "unsafe-writes", 'u', POPT_ARG_NONE, &unsafe_writes, 0, "do not use tdb transactions when writing", NULL },
POPT_TABLEEND
};
int opt;
const char **extra_argv;
int extra_argc = 0;
poptContext pc;
struct event_context *ev;
setlinebuf(stdout);
pc = poptGetContext(argv[0], argc, argv, popt_options, POPT_CONTEXT_KEEP_FIRST);
while ((opt = poptGetNextOpt(pc)) != -1) {
switch (opt) {
default:
fprintf(stderr, "Invalid option %s: %s\n",
poptBadOption(pc, 0), poptStrerror(opt));
exit(1);
}
}
/* setup the remaining options for the main program to use */
extra_argv = poptGetArgs(pc);
if (extra_argv) {
extra_argv++;
while (extra_argv[extra_argc]) extra_argc++;
}
ev = event_context_init(NULL);
ctdb = ctdb_cmdline_client(ev);
if (ctdb == NULL) {
printf("Could not attach to daemon\n");
return 1;
}
/* attach to a specific database */
if (unsafe_writes == 1) {
ctdb_db = ctdb_attach(ctdb, "transaction.tdb", true, TDB_NOSYNC);
} else {
ctdb_db = ctdb_attach(ctdb, "transaction.tdb", true, 0);
}
if (!ctdb_db) {
printf("ctdb_attach failed - %s\n", ctdb_errstr(ctdb));
exit(1);
}
printf("Waiting for cluster\n");
while (1) {
uint32_t recmode=1;
ctdb_ctrl_getrecmode(ctdb, ctdb, timeval_zero(), CTDB_CURRENT_NODE, &recmode);
if (recmode == 0) break;
event_loop_once(ev);
}
pnn = ctdb_get_pnn(ctdb);
printf("Starting test on node %u. running for %u seconds\n", pnn, timelimit);
if (pnn == 0) {
event_add_timed(ev, ctdb, timeval_current_ofs(1, 0), each_second, ctdb);
}
test_store_records(ctdb, ev);
if (pnn == 0) {
if (success != true) {
printf("The test FAILED\n");
return 1;
} else {
printf("SUCCESS!\n");
}
}
return 0;
}

View File

@ -27,7 +27,7 @@
#include <sys/time.h>
#include <time.h>
static char *dbname = "test.tdb";
static const char *dbname = "test.tdb";
static int traverse_callback(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA data, void *private_data)
{

View File

@ -5,6 +5,8 @@ if [ $# -gt 0 ]; then
NUMNODES=$1
fi
trap 'echo "Killing test"; killall -9 -q ctdbd ctdb_fetch; exit 1' INT TERM
tests/start_daemons.sh $NUMNODES || exit 1

View File

@ -1,4 +1 @@
127.0.0.1
127.0.0.2
127.0.0.3
127.0.0.4

View File

@ -5,14 +5,19 @@ if [ $# -gt 0 ]; then
NUMNODES=$1
fi
killall -9 -q ctdb_persistent ctdbd
rm -rf test.db/persistent
echo "Starting $NUMNODES daemons for SAFE persistent writes"
tests/start_daemons.sh $NUMNODES || exit 1
killall -9 -q ctdb_persistent
trap 'echo "Killing test"; killall -9 -q ctdbd ctdb_persistent; exit 1' INT TERM
for i in `seq 1 $NUMNODES`; do
$VALGRIND bin/ctdb_persistent --timelimit 30 --socket sock.$i $* &
$VALGRIND bin/ctdb_persistent --timelimit 30 --socket sock.$i $* &
done
wait
@ -29,6 +34,7 @@ killall -9 -q ctdb_persistent
for i in `seq 1 $NUMNODES`; do
$VALGRIND bin/ctdb_persistent --unsafe-writes --timelimit 30 --socket sock.$i $* &
$VALGRIND bin/ctdb_persistent --unsafe-writes --timelimit 30 --socket sock.$i $* &
done
wait

View File

@ -188,61 +188,61 @@ int main(int argc, const char *argv[])
trbt_insertarray32_callback(tree, 3, key1, callback, u32array[0]);
data = trbt_lookuparray32(tree, 3, key1);
printf("key1 dataptr:0x%08x == %d\n",(int)data,data?*data:-1);
printf("key1 dataptr:%p == %d\n",data,data?*data:-1);
data = trbt_lookuparray32(tree, 3, key2);
printf("key2 dataptr:0x%08x == %d\n",(int)data,data?*data:-1);
printf("key2 dataptr:%p == %d\n",data,data?*data:-1);
data = trbt_lookuparray32(tree, 3, key3);
printf("key3 dataptr:0x%08x == %d\n",(int)data,data?*data:-1);
printf("key3 dataptr:%p == %d\n",data,data?*data:-1);
data = trbt_lookuparray32(tree, 3, key4);
printf("key4 dataptr:0x%08x == %d\n",(int)data,data?*data:-1);
printf("key4 dataptr:%p == %d\n",data,data?*data:-1);
trbt_traversearray32(tree, 3, traverse, NULL);
printf("\ndeleting key4\n");
talloc_free(trbt_lookuparray32(tree, 3, key4));
data = trbt_lookuparray32(tree, 3, key1);
printf("key1 dataptr:0x%08x == %d\n",(int)data,data?*data:-1);
printf("key1 dataptr:%p == %d\n",data,data?*data:-1);
data = trbt_lookuparray32(tree, 3, key2);
printf("key2 dataptr:0x%08x == %d\n",(int)data,data?*data:-1);
printf("key2 dataptr:%p == %d\n",data,data?*data:-1);
data = trbt_lookuparray32(tree, 3, key3);
printf("key3 dataptr:0x%08x == %d\n",(int)data,data?*data:-1);
printf("key3 dataptr:%p == %d\n",data,data?*data:-1);
data = trbt_lookuparray32(tree, 3, key4);
printf("key4 dataptr:0x%08x == %d\n",(int)data,data?*data:-1);
printf("key4 dataptr:%p == %d\n",data,data?*data:-1);
trbt_traversearray32(tree, 3, traverse, NULL);
printf("\ndeleting key2\n");
talloc_free(trbt_lookuparray32(tree, 3, key2));
data = trbt_lookuparray32(tree, 3, key1);
printf("key1 dataptr:0x%08x == %d\n",(int)data,data?*data:-1);
printf("key1 dataptr:%p == %d\n",data,data?*data:-1);
data = trbt_lookuparray32(tree, 3, key2);
printf("key2 dataptr:0x%08x == %d\n",(int)data,data?*data:-1);
printf("key2 dataptr:%p == %d\n",data,data?*data:-1);
data = trbt_lookuparray32(tree, 3, key3);
printf("key3 dataptr:0x%08x == %d\n",(int)data,data?*data:-1);
printf("key3 dataptr:%p == %d\n",data,data?*data:-1);
data = trbt_lookuparray32(tree, 3, key4);
printf("key4 dataptr:0x%08x == %d\n",(int)data,data?*data:-1);
printf("key4 dataptr:%p == %d\n",data,data?*data:-1);
trbt_traversearray32(tree, 3, traverse, NULL);
printf("\ndeleting key3\n");
talloc_free(trbt_lookuparray32(tree, 3, key3));
data = trbt_lookuparray32(tree, 3, key1);
printf("key1 dataptr:0x%08x == %d\n",(int)data,data?*data:-1);
printf("key1 dataptr:%p == %d\n",data,data?*data:-1);
data = trbt_lookuparray32(tree, 3, key2);
printf("key2 dataptr:0x%08x == %d\n",(int)data,data?*data:-1);
printf("key2 dataptr:%p == %d\n",data,data?*data:-1);
data = trbt_lookuparray32(tree, 3, key3);
printf("key3 dataptr:0x%08x == %d\n",(int)data,data?*data:-1);
printf("key3 dataptr:%p == %d\n",data,data?*data:-1);
data = trbt_lookuparray32(tree, 3, key4);
printf("key4 dataptr:0x%08x == %d\n",(int)data,data?*data:-1);
printf("key4 dataptr:%p == %d\n",data,data?*data:-1);
trbt_traversearray32(tree, 3, traverse, NULL);
printf("\ndeleting key1\n");
talloc_free(trbt_lookuparray32(tree, 3, key1));
data = trbt_lookuparray32(tree, 3, key1);
printf("key1 dataptr:0x%08x == %d\n",(int)data,data?*data:-1);
printf("key1 dataptr:%p == %d\n",data,data?*data:-1);
data = trbt_lookuparray32(tree, 3, key2);
printf("key2 dataptr:0x%08x == %d\n",(int)data,data?*data:-1);
printf("key2 dataptr:%p == %d\n",data,data?*data:-1);
data = trbt_lookuparray32(tree, 3, key3);
printf("key3 dataptr:0x%08x == %d\n",(int)data,data?*data:-1);
printf("key3 dataptr:%p == %d\n",data,data?*data:-1);
data = trbt_lookuparray32(tree, 3, key4);
printf("key4 dataptr:0x%08x == %d\n",(int)data,data?*data:-1);
printf("key4 dataptr:%p == %d\n",data,data?*data:-1);
trbt_traversearray32(tree, 3, traverse, NULL);
talloc_free(tree);

View File

@ -1,5 +1,7 @@
#!/bin/sh
trap 'echo "Killing test"; killall -9 -q ctdbd; exit 1' INT TERM
tests/fetch.sh 4 || exit 1
tests/bench.sh 4 || exit 1
tests/ctdbd.sh || exit 1

View File

@ -4,12 +4,17 @@ NUMNODES=2
if [ $# -gt 0 ]; then
NUMNODES=$1
fi
NODES="./tests/nodes.txt"
shift
NODES="./tests/nodes.txt"
rm -f $NODES
for i in `seq 1 $NUMNODES`; do
echo 127.0.0.$i >> $NODES
if [ "${CTDB_USE_IPV6}x" != "x" ]; then
echo ::$i >> $NODES
ip addr add ::$i/128 dev lo
else
echo 127.0.0.$i >> $NODES
fi
done
killall -q ctdbd

28
ctdb/tests/transaction.sh Executable file
View File

@ -0,0 +1,28 @@
#!/bin/sh
NUMNODES=4
if [ $# -gt 0 ]; then
NUMNODES=$1
fi
killall -9 -q ctdb_transaction ctdbd
rm -rf test.db/transaction
echo "Starting $NUMNODES daemons for transaction writes"
tests/start_daemons.sh $NUMNODES || exit 1
trap 'echo "Killing test"; killall -9 -q ctdbd ctdb_transaction; exit 1' INT TERM
VALGRIND="valgrind -q"
for i in `seq 1 $NUMNODES`; do
$VALGRIND bin/ctdb_transaction --timelimit 30 --socket sock.$i $* &
$VALGRIND bin/ctdb_transaction --timelimit 30 --socket sock.$i $* &
done
wait
echo "Shutting down"
bin/ctdb shutdown -n all --socket=sock.1
killall -9 ctdbd
exit 0

View File

@ -29,6 +29,7 @@
#include "../include/ctdb.h"
#include "../include/ctdb_private.h"
#include "../common/rb_tree.h"
#include "db_wrap.h"
static void usage(void);
@ -331,7 +332,7 @@ static int control_status(struct ctdb_context *ctdb, int argc, const char **argv
printf(":Node:IP:Disconnected:Banned:Disabled:Unhealthy:\n");
for(i=0;i<nodemap->num;i++){
printf(":%d:%s:%d:%d:%d:%d:\n", nodemap->nodes[i].pnn,
inet_ntoa(nodemap->nodes[i].sin.sin_addr),
ctdb_addr_to_str(&nodemap->nodes[i].addr),
!!(nodemap->nodes[i].flags&NODE_FLAGS_DISCONNECTED),
!!(nodemap->nodes[i].flags&NODE_FLAGS_BANNED),
!!(nodemap->nodes[i].flags&NODE_FLAGS_PERMANENTLY_DISABLED),
@ -369,7 +370,7 @@ static int control_status(struct ctdb_context *ctdb, int argc, const char **argv
CTDB_NO_MEMORY_FATAL(ctdb, flags_str);
}
printf("pnn:%d %-16s %s%s\n", nodemap->nodes[i].pnn,
inet_ntoa(nodemap->nodes[i].sin.sin_addr),
ctdb_addr_to_str(&nodemap->nodes[i].addr),
flags_str,
nodemap->nodes[i].pnn == mypnn?" (THIS NODE)":"");
talloc_free(flags_str);
@ -413,30 +414,29 @@ static int control_status(struct ctdb_context *ctdb, int argc, const char **argv
static int control_get_tickles(struct ctdb_context *ctdb, int argc, const char **argv)
{
struct ctdb_control_tcp_tickle_list *list;
struct sockaddr_in ip;
ctdb_sock_addr addr;
int i, ret;
if (argc < 1) {
usage();
}
ip.sin_family = AF_INET;
if (inet_aton(argv[0], &ip.sin_addr) == 0) {
if (parse_ip(argv[0], &addr) == 0) {
DEBUG(DEBUG_ERR,("Wrongly formed ip address '%s'\n", argv[0]));
return -1;
}
ret = ctdb_ctrl_get_tcp_tickles(ctdb, TIMELIMIT(), options.pnn, ctdb, &ip, &list);
ret = ctdb_ctrl_get_tcp_tickles(ctdb, TIMELIMIT(), options.pnn, ctdb, &addr, &list);
if (ret == -1) {
DEBUG(DEBUG_ERR, ("Unable to list tickles\n"));
return -1;
}
printf("Tickles for ip:%s\n", inet_ntoa(list->ip.sin_addr));
printf("Tickles for ip:%s\n", ctdb_addr_to_str(&list->addr));
printf("Num tickles:%u\n", list->tickles.num);
for (i=0;i<list->tickles.num;i++) {
printf("SRC: %s:%u ", inet_ntoa(list->tickles.connections[i].saddr.sin_addr), ntohs(list->tickles.connections[i].saddr.sin_port));
printf("DST: %s:%u\n", inet_ntoa(list->tickles.connections[i].daddr.sin_addr), ntohs(list->tickles.connections[i].daddr.sin_port));
printf("SRC: %s:%u ", ctdb_addr_to_str(&list->tickles.connections[i].src_addr), ntohs(list->tickles.connections[i].src_addr.ip.sin_port));
printf("DST: %s:%u\n", ctdb_addr_to_str(&list->tickles.connections[i].dst_addr), ntohs(list->tickles.connections[i].dst_addr.ip.sin_port));
}
talloc_free(list);
@ -446,7 +446,7 @@ static int control_get_tickles(struct ctdb_context *ctdb, int argc, const char *
/* send a release ip to all nodes */
static int control_send_release(struct ctdb_context *ctdb, uint32_t pnn,
struct sockaddr_in *sin)
ctdb_sock_addr *addr)
{
int ret;
struct ctdb_public_ip pip;
@ -460,11 +460,10 @@ struct sockaddr_in *sin)
}
/* send a moveip message to the recovery master */
pip.pnn = pnn;
pip.sin.sin_family = AF_INET;
pip.sin.sin_addr = sin->sin_addr;
pip.pnn = pnn;
pip.addr = *addr;
data.dsize = sizeof(pip);
data.dptr = (unsigned char *)&pip;
data.dptr = (unsigned char *)&pip;
/* send release ip to all nodes */
@ -485,7 +484,7 @@ struct sockaddr_in *sin)
static int control_moveip(struct ctdb_context *ctdb, int argc, const char **argv)
{
uint32_t pnn;
struct sockaddr_in ip;
ctdb_sock_addr addr;
uint32_t value;
struct ctdb_all_public_ips *ips;
int i, ret;
@ -494,8 +493,7 @@ static int control_moveip(struct ctdb_context *ctdb, int argc, const char **argv
usage();
}
ip.sin_family = AF_INET;
if (inet_aton(argv[0], &ip.sin_addr) == 0) {
if (parse_ip(argv[0], &addr) == 0) {
DEBUG(DEBUG_ERR,("Wrongly formed ip address '%s'\n", argv[0]));
return -1;
}
@ -534,22 +532,22 @@ static int control_moveip(struct ctdb_context *ctdb, int argc, const char **argv
}
for (i=0;i<ips->num;i++) {
if (ctdb_same_ipv4(&ip, &ips->ips[i].sin)) {
if (ctdb_same_ip(&addr, &ips->ips[i].addr)) {
break;
}
}
if (i==ips->num) {
DEBUG(DEBUG_ERR, ("Node %u can not host ip address '%s'\n",
pnn, inet_ntoa(ip.sin_addr)));
pnn, ctdb_addr_to_str(&addr)));
return -1;
}
if (ips->ips[i].pnn == pnn) {
DEBUG(DEBUG_ERR, ("Host %u is already hosting '%s'\n",
pnn, inet_ntoa(ips->ips[i].sin.sin_addr)));
pnn, ctdb_addr_to_str(&ips->ips[i].addr)));
return -1;
}
ret = control_send_release(ctdb, pnn, &ips->ips[i].sin);
ret = control_send_release(ctdb, pnn, &ips->ips[i].addr);
if (ret != 0) {
DEBUG(DEBUG_ERR, ("Failed to send 'change ip' to all nodes\n"));;
return -1;
@ -558,20 +556,15 @@ static int control_moveip(struct ctdb_context *ctdb, int argc, const char **argv
return 0;
}
struct node_ip {
uint32_t pnn;
struct sockaddr_in sin;
};
void getips_store_callback(void *param, void *data)
{
struct node_ip *node_ip = (struct node_ip *)data;
struct ctdb_public_ip *node_ip = (struct ctdb_public_ip *)data;
struct ctdb_all_public_ips *ips = param;
int i;
i = ips->num++;
ips->ips[i].pnn = node_ip->pnn;
ips->ips[i].sin = node_ip->sin;
ips->ips[i].pnn = node_ip->pnn;
ips->ips[i].addr = node_ip->addr;
}
void getips_count_callback(void *param, void *data)
@ -581,12 +574,42 @@ void getips_count_callback(void *param, void *data)
(*count)++;
}
#define IP_KEYLEN 4
static uint32_t *ip_key(ctdb_sock_addr *ip)
{
static uint32_t key[IP_KEYLEN];
bzero(key, sizeof(key));
switch (ip->sa.sa_family) {
case AF_INET:
key[0] = ip->ip.sin_addr.s_addr;
break;
case AF_INET6:
key[0] = ip->ip6.sin6_addr.s6_addr32[3];
key[1] = ip->ip6.sin6_addr.s6_addr32[2];
key[2] = ip->ip6.sin6_addr.s6_addr32[1];
key[3] = ip->ip6.sin6_addr.s6_addr32[0];
break;
default:
DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
return key;
}
return key;
}
static void *add_ip_callback(void *parm, void *data)
{
return parm;
}
static int
control_get_all_public_ips(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx, struct ctdb_all_public_ips **ips)
{
struct ctdb_all_public_ips *tmp_ips;
struct ctdb_node_map *nodemap=NULL;
trbt_tree_t *tree;
trbt_tree_t *ip_tree;
int i, j, len, ret;
uint32_t count;
@ -596,7 +619,7 @@ control_get_all_public_ips(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx, struc
return ret;
}
tree = trbt_create(tmp_ctx, 0);
ip_tree = trbt_create(tmp_ctx, 0);
for(i=0;i<nodemap->num;i++){
if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
@ -611,25 +634,28 @@ control_get_all_public_ips(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx, struc
}
for (j=0; j<tmp_ips->num;j++) {
struct node_ip *node_ip;
struct ctdb_public_ip *node_ip;
node_ip = talloc(tmp_ctx, struct node_ip);
node_ip->pnn = tmp_ips->ips[j].pnn;
node_ip->sin = tmp_ips->ips[j].sin;
node_ip = talloc(tmp_ctx, struct ctdb_public_ip);
node_ip->pnn = tmp_ips->ips[j].pnn;
node_ip->addr = tmp_ips->ips[j].addr;
trbt_insert32(tree, tmp_ips->ips[j].sin.sin_addr.s_addr, node_ip);
trbt_insertarray32_callback(ip_tree,
IP_KEYLEN, ip_key(&tmp_ips->ips[j].addr),
add_ip_callback,
node_ip);
}
talloc_free(tmp_ips);
}
/* traverse */
count = 0;
trbt_traversearray32(tree, 1, getips_count_callback, &count);
trbt_traversearray32(ip_tree, IP_KEYLEN, getips_count_callback, &count);
len = offsetof(struct ctdb_all_public_ips, ips) +
count*sizeof(struct ctdb_public_ip);
tmp_ips = talloc_zero_size(tmp_ctx, len);
trbt_traversearray32(tree, 1, getips_store_callback, tmp_ips);
trbt_traversearray32(ip_tree, IP_KEYLEN, getips_store_callback, tmp_ips);
*ips = tmp_ips;
@ -642,7 +668,7 @@ control_get_all_public_ips(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx, struc
* ip address or -1
*/
static int
find_other_host_for_public_ip(struct ctdb_context *ctdb, struct sockaddr_in *addr)
find_other_host_for_public_ip(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
{
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
struct ctdb_all_public_ips *ips;
@ -672,7 +698,7 @@ find_other_host_for_public_ip(struct ctdb_context *ctdb, struct sockaddr_in *add
}
for (j=0;j<ips->num;j++) {
if (ctdb_same_ipv4(addr, &ips->ips[j].sin)) {
if (ctdb_same_ip(addr, &ips->ips[j].addr)) {
talloc_free(tmp_ctx);
return nodemap->nodes[i].pnn;
}
@ -692,7 +718,7 @@ static int control_addip(struct ctdb_context *ctdb, int argc, const char **argv)
int i, ret;
int len;
unsigned mask;
struct sockaddr_in addr;
ctdb_sock_addr addr;
struct ctdb_control_ip_iface *pub;
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
struct ctdb_all_public_ips *ips;
@ -720,7 +746,7 @@ static int control_addip(struct ctdb_context *ctdb, int argc, const char **argv)
pub = talloc_size(tmp_ctx, len);
CTDB_NO_MEMORY(ctdb, pub);
pub->sin = addr;
pub->addr = addr;
pub->mask = mask;
pub->len = strlen(argv[1])+1;
memcpy(&pub->iface[0], argv[1], strlen(argv[1])+1);
@ -737,7 +763,7 @@ static int control_addip(struct ctdb_context *ctdb, int argc, const char **argv)
* we will claim it
*/
for (i=0;i<ips->num;i++) {
if (ctdb_same_ipv4(&addr, &ips->ips[i].sin)) {
if (ctdb_same_ip(&addr, &ips->ips[i].addr)) {
break;
}
}
@ -763,7 +789,7 @@ static int control_addip(struct ctdb_context *ctdb, int argc, const char **argv)
static int control_delip(struct ctdb_context *ctdb, int argc, const char **argv)
{
int i, ret;
struct sockaddr_in addr;
ctdb_sock_addr addr;
struct ctdb_control_ip_iface pub;
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
struct ctdb_all_public_ips *ips;
@ -773,13 +799,12 @@ static int control_delip(struct ctdb_context *ctdb, int argc, const char **argv)
usage();
}
addr.sin_family = AF_INET;
if (inet_aton(argv[0], &addr.sin_addr) == 0) {
if (parse_ip(argv[0], &addr) == 0) {
DEBUG(DEBUG_ERR,("Wrongly formed ip address '%s'\n", argv[0]));
return -1;
}
pub.sin = addr;
pub.addr = addr;
pub.mask = 0;
pub.len = 0;
@ -791,14 +816,14 @@ static int control_delip(struct ctdb_context *ctdb, int argc, const char **argv)
}
for (i=0;i<ips->num;i++) {
if (ctdb_same_ipv4(&addr, &ips->ips[i].sin)) {
if (ctdb_same_ip(&addr, &ips->ips[i].addr)) {
break;
}
}
if (i==ips->num) {
DEBUG(DEBUG_ERR, ("This node does not support this public address '%s'\n",
inet_ntoa(addr.sin_addr)));
ctdb_addr_to_str(&addr)));
talloc_free(tmp_ctx);
return -1;
}
@ -836,12 +861,12 @@ static int kill_tcp(struct ctdb_context *ctdb, int argc, const char **argv)
usage();
}
if (!parse_ip_port(argv[0], (ctdb_sock_addr *)&killtcp.src)) {
if (!parse_ip_port(argv[0], &killtcp.src_addr)) {
DEBUG(DEBUG_ERR, ("Bad IP:port '%s'\n", argv[0]));
return -1;
}
if (!parse_ip_port(argv[1], (ctdb_sock_addr *)&killtcp.dst)) {
if (!parse_ip_port(argv[1], &killtcp.dst_addr)) {
DEBUG(DEBUG_ERR, ("Bad IP:port '%s'\n", argv[1]));
return -1;
}
@ -1051,9 +1076,9 @@ static int control_ip(struct ctdb_context *ctdb, int argc, const char **argv)
for (i=1;i<=ips->num;i++) {
if (options.machinereadable){
printf(":%s:%d:\n", inet_ntoa(ips->ips[ips->num-i].sin.sin_addr), ips->ips[ips->num-i].pnn);
printf(":%s:%d:\n", ctdb_addr_to_str(&ips->ips[ips->num-i].addr), ips->ips[ips->num-i].pnn);
} else {
printf("%s %d\n", inet_ntoa(ips->ips[ips->num-i].sin.sin_addr), ips->ips[ips->num-i].pnn);
printf("%s %d\n", ctdb_addr_to_str(&ips->ips[ips->num-i].addr), ips->ips[ips->num-i].pnn);
}
}
@ -1315,7 +1340,8 @@ static int control_lvs(struct ctdb_context *ctdb, int argc, const char **argv)
}
}
printf("%d:%s\n", i, inet_ntoa(nodemap->nodes[i].sin.sin_addr));
printf("%d:%s\n", i,
ctdb_addr_to_str(&nodemap->nodes[i].addr));
}
return 0;
@ -1490,60 +1516,6 @@ static int control_getdbmap(struct ctdb_context *ctdb, int argc, const char **ar
return 0;
}
/*
get the filename of the reclock file
*/
static int control_getreclock(struct ctdb_context *ctdb, int argc, const char **argv)
{
int i, ret, fd;
const char *reclock;
struct ctdb_node_map *nodemap=NULL;
char *pnnfile;
ret = ctdb_ctrl_getreclock(ctdb, TIMELIMIT(), options.pnn, ctdb, &reclock);
if (ret != 0) {
DEBUG(DEBUG_ERR, ("Unable to get reclock file from node %u\n", options.pnn));
return ret;
}
ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), options.pnn, ctdb, &nodemap);
if (ret != 0) {
DEBUG(DEBUG_ERR, ("Unable to get nodemap from node %u\n", options.pnn));
return ret;
}
pnnfile = talloc_asprintf(ctdb, "%s.pnn", reclock);
CTDB_NO_MEMORY(ctdb, pnnfile);
fd = open(pnnfile, O_RDONLY);
if (fd == -1) {
DEBUG(DEBUG_CRIT,(__location__ " Failed to open reclock pnn file %s - (%s)\n",
pnnfile, strerror(errno)));
exit(10);
}
printf("Reclock file : %s\n", reclock);
for (i=0; i<nodemap->num; i++) {
int count;
count = ctdb_read_pnn_lock(fd, nodemap->nodes[i].pnn);
printf("pnn:%d %-16s", nodemap->nodes[i].pnn,
inet_ntoa(nodemap->nodes[i].sin.sin_addr));
if (count == -1) {
printf(" NOT ACTIVE\n");
} else {
printf(" ACTIVE with %d connections\n", count);
}
}
close(fd);
return 0;
}
/*
check if the local node is recmaster or not
it will return 1 if this node is the recmaster and 0 if it is not
@ -1805,7 +1777,7 @@ static int control_attach(struct ctdb_context *ctdb, int argc, const char **argv
}
/*
dump memory usage
run an eventscript on a node
*/
static int control_eventscript(struct ctdb_context *ctdb, int argc, const char **argv)
{
@ -1836,6 +1808,331 @@ static int control_eventscript(struct ctdb_context *ctdb, int argc, const char *
return 0;
}
#define DB_VERSION 1
#define MAX_DB_NAME 64
struct db_file_header {
unsigned long version;
time_t timestamp;
unsigned long persistent;
unsigned long size;
const char name[MAX_DB_NAME];
};
struct backup_data {
struct ctdb_marshall_buffer *records;
uint32_t len;
uint32_t total;
bool traverse_error;
};
static int backup_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *private)
{
struct backup_data *bd = talloc_get_type(private, struct backup_data);
struct ctdb_rec_data *rec;
/* add the record */
rec = ctdb_marshall_record(bd->records, 0, key, NULL, data);
if (rec == NULL) {
bd->traverse_error = true;
DEBUG(DEBUG_ERR,("Failed to marshall record\n"));
return -1;
}
bd->records = talloc_realloc_size(NULL, bd->records, rec->length + bd->len);
if (bd->records == NULL) {
DEBUG(DEBUG_ERR,("Failed to expand marshalling buffer\n"));
bd->traverse_error = true;
return -1;
}
bd->records->count++;
memcpy(bd->len+(uint8_t *)bd->records, rec, rec->length);
bd->len += rec->length;
talloc_free(rec);
bd->total++;
return 0;
}
/*
* backup a database to a file
*/
static int control_backupdb(struct ctdb_context *ctdb, int argc, const char **argv)
{
int i, ret;
struct ctdb_dbid_map *dbmap=NULL;
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
struct db_file_header dbhdr;
struct ctdb_db_context *ctdb_db;
struct backup_data *bd;
int fh;
if (argc != 2) {
DEBUG(DEBUG_ERR,("Invalid arguments\n"));
return -1;
}
ret = ctdb_ctrl_getdbmap(ctdb, TIMELIMIT(), options.pnn, tmp_ctx, &dbmap);
if (ret != 0) {
DEBUG(DEBUG_ERR, ("Unable to get dbids from node %u\n", options.pnn));
return ret;
}
for(i=0;i<dbmap->num;i++){
const char *name;
ctdb_ctrl_getdbname(ctdb, TIMELIMIT(), options.pnn, dbmap->dbs[i].dbid, tmp_ctx, &name);
if(!strcmp(argv[0], name)){
talloc_free(discard_const(name));
break;
}
talloc_free(discard_const(name));
}
if (i == dbmap->num) {
DEBUG(DEBUG_ERR,("No database with name '%s' found\n", argv[0]));
talloc_free(tmp_ctx);
return -1;
}
ctdb_db = ctdb_attach(ctdb, argv[0], dbmap->dbs[i].persistent, 0);
if (ctdb_db == NULL) {
DEBUG(DEBUG_ERR,("Unable to attach to database '%s'\n", argv[0]));
return -1;
}
ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
if (ret == -1) {
DEBUG(DEBUG_ERR,("Failed to start transaction\n"));
talloc_free(tmp_ctx);
return -1;
}
bd = talloc_zero(tmp_ctx, struct backup_data);
if (bd == NULL) {
DEBUG(DEBUG_ERR,("Failed to allocate backup_data\n"));
talloc_free(tmp_ctx);
return -1;
}
bd->records = talloc_zero(bd, struct ctdb_marshall_buffer);
if (bd->records == NULL) {
DEBUG(DEBUG_ERR,("Failed to allocate ctdb_marshall_buffer\n"));
talloc_free(tmp_ctx);
return -1;
}
bd->len = offsetof(struct ctdb_marshall_buffer, data);
bd->records->db_id = ctdb_db->db_id;
/* traverse the database collecting all records */
if (tdb_traverse_read(ctdb_db->ltdb->tdb, backup_traverse, bd) == -1 ||
bd->traverse_error) {
DEBUG(DEBUG_ERR,("Traverse error\n"));
talloc_free(tmp_ctx);
return -1;
}
tdb_transaction_cancel(ctdb_db->ltdb->tdb);
fh = open(argv[1], O_RDWR|O_CREAT, 0600);
if (fh == -1) {
DEBUG(DEBUG_ERR,("Failed to open file '%s'\n", argv[1]));
talloc_free(tmp_ctx);
return -1;
}
dbhdr.version = DB_VERSION;
dbhdr.timestamp = time(NULL);
dbhdr.persistent = dbmap->dbs[i].persistent;
dbhdr.size = bd->len;
if (strlen(argv[0]) >= MAX_DB_NAME) {
DEBUG(DEBUG_ERR,("Too long dbname\n"));
talloc_free(tmp_ctx);
return -1;
}
strncpy(discard_const(dbhdr.name), argv[0], MAX_DB_NAME);
write(fh, &dbhdr, sizeof(dbhdr));
write(fh, bd->records, bd->len);
close(fh);
talloc_free(tmp_ctx);
return 0;
}
/*
* restore a database from a file
*/
static int control_restoredb(struct ctdb_context *ctdb, int argc, const char **argv)
{
int ret;
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
TDB_DATA outdata;
TDB_DATA data;
struct db_file_header dbhdr;
struct ctdb_db_context *ctdb_db;
struct ctdb_node_map *nodemap=NULL;
struct ctdb_vnn_map *vnnmap=NULL;
int fh;
struct ctdb_control_wipe_database w;
uint32_t *nodes;
uint32_t generation;
struct tm *tm;
char tbuf[100];
if (argc != 1) {
DEBUG(DEBUG_ERR,("Invalid arguments\n"));
return -1;
}
fh = open(argv[0], O_RDONLY);
if (fh == -1) {
DEBUG(DEBUG_ERR,("Failed to open file '%s'\n", argv[0]));
talloc_free(tmp_ctx);
return -1;
}
read(fh, &dbhdr, sizeof(dbhdr));
if (dbhdr.version != DB_VERSION) {
DEBUG(DEBUG_ERR,("Invalid version of database dump. File is version %lu but expected version was %u\n", dbhdr.version, DB_VERSION));
talloc_free(tmp_ctx);
return -1;
}
outdata.dsize = dbhdr.size;
outdata.dptr = talloc_size(tmp_ctx, outdata.dsize);
if (outdata.dptr == NULL) {
DEBUG(DEBUG_ERR,("Failed to allocate data of size '%lu'\n", dbhdr.size));
close(fh);
talloc_free(tmp_ctx);
return -1;
}
read(fh, outdata.dptr, outdata.dsize);
close(fh);
tm = localtime(&dbhdr.timestamp);
strftime(tbuf,sizeof(tbuf)-1,"%Y/%m/%d %H:%M:%S", tm);
printf("Restoring database '%s' from backup @ %s\n",
dbhdr.name, tbuf);
ctdb_db = ctdb_attach(ctdb, dbhdr.name, dbhdr.persistent, 0);
if (ctdb_db == NULL) {
DEBUG(DEBUG_ERR,("Unable to attach to database '%s'\n", dbhdr.name));
talloc_free(tmp_ctx);
return -1;
}
ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), options.pnn, ctdb, &nodemap);
if (ret != 0) {
DEBUG(DEBUG_ERR, ("Unable to get nodemap from node %u\n", options.pnn));
talloc_free(tmp_ctx);
return ret;
}
ret = ctdb_ctrl_getvnnmap(ctdb, TIMELIMIT(), options.pnn, tmp_ctx, &vnnmap);
if (ret != 0) {
DEBUG(DEBUG_ERR, ("Unable to get vnnmap from node %u\n", options.pnn));
talloc_free(tmp_ctx);
return ret;
}
/* freeze all nodes */
nodes = list_of_active_nodes(ctdb, nodemap, tmp_ctx, true);
if (ctdb_client_async_control(ctdb, CTDB_CONTROL_FREEZE,
nodes, TIMELIMIT(),
false, tdb_null,
NULL, NULL,
NULL) != 0) {
DEBUG(DEBUG_ERR, ("Unable to freeze nodes.\n"));
ctdb_ctrl_setrecmode(ctdb, TIMELIMIT(), options.pnn, CTDB_RECOVERY_ACTIVE);
talloc_free(tmp_ctx);
return -1;
}
generation = vnnmap->generation;
data.dptr = (void *)&generation;
data.dsize = sizeof(generation);
/* start a cluster wide transaction */
nodes = list_of_active_nodes(ctdb, nodemap, tmp_ctx, true);
if (ctdb_client_async_control(ctdb, CTDB_CONTROL_TRANSACTION_START,
nodes,
TIMELIMIT(), false, data,
NULL, NULL,
NULL) != 0) {
DEBUG(DEBUG_ERR, ("Unable to start cluster wide transactions.\n"));
return -1;
}
w.db_id = ctdb_db->db_id;
w.transaction_id = generation;
data.dptr = (void *)&w;
data.dsize = sizeof(w);
/* wipe all the remote databases. */
nodes = list_of_active_nodes(ctdb, nodemap, tmp_ctx, true);
if (ctdb_client_async_control(ctdb, CTDB_CONTROL_WIPE_DATABASE,
nodes,
TIMELIMIT(), false, data,
NULL, NULL,
NULL) != 0) {
DEBUG(DEBUG_ERR, ("Unable to wipe database.\n"));
ctdb_ctrl_setrecmode(ctdb, TIMELIMIT(), options.pnn, CTDB_RECOVERY_ACTIVE);
talloc_free(tmp_ctx);
return -1;
}
/* push the database */
nodes = list_of_active_nodes(ctdb, nodemap, tmp_ctx, true);
if (ctdb_client_async_control(ctdb, CTDB_CONTROL_PUSH_DB,
nodes,
TIMELIMIT(), false, outdata,
NULL, NULL,
NULL) != 0) {
DEBUG(DEBUG_ERR, ("Failed to push database.\n"));
ctdb_ctrl_setrecmode(ctdb, TIMELIMIT(), options.pnn, CTDB_RECOVERY_ACTIVE);
talloc_free(tmp_ctx);
return -1;
}
data.dptr = (void *)&generation;
data.dsize = sizeof(generation);
/* commit all the changes */
if (ctdb_client_async_control(ctdb, CTDB_CONTROL_TRANSACTION_COMMIT,
nodes,
TIMELIMIT(), false, data,
NULL, NULL,
NULL) != 0) {
DEBUG(DEBUG_ERR, ("Unable to commit databases.\n"));
ctdb_ctrl_setrecmode(ctdb, TIMELIMIT(), options.pnn, CTDB_RECOVERY_ACTIVE);
talloc_free(tmp_ctx);
return -1;
}
/* thaw all nodes */
nodes = list_of_active_nodes(ctdb, nodemap, tmp_ctx, true);
if (ctdb_client_async_control(ctdb, CTDB_CONTROL_THAW,
nodes, TIMELIMIT(),
false, tdb_null,
NULL, NULL,
NULL) != 0) {
DEBUG(DEBUG_ERR, ("Unable to thaw nodes.\n"));
ctdb_ctrl_setrecmode(ctdb, TIMELIMIT(), options.pnn, CTDB_RECOVERY_ACTIVE);
talloc_free(tmp_ctx);
return -1;
}
talloc_free(tmp_ctx);
return 0;
}
/*
dump memory usage
*/
@ -1922,7 +2219,7 @@ static int control_listnodes(struct ctdb_context *ctdb, int argc, const char **a
}
for(i=0;i<nodemap->num;i++){
printf("%s\n", inet_ntoa(nodemap->nodes[i].sin.sin_addr));
printf("%s\n", ctdb_addr_to_str(&nodemap->nodes[i].addr));
}
return 0;
@ -2030,11 +2327,12 @@ static const struct {
{ "repack", ctdb_repack, false, "repack all databases", "[max_freelist]"},
{ "listnodes", control_listnodes, false, "list all nodes in the cluster"},
{ "reloadnodes", control_reload_nodes_file, false, "reload the nodes file and restart the transport on all nodes"},
{ "getreclock", control_getreclock, false, "get the path to the reclock file" },
{ "moveip", control_moveip, false, "move/failover an ip address to another node", "<ip> <node>"},
{ "addip", control_addip, true, "add a ip address to a node", "<ip/mask> <iface>"},
{ "delip", control_delip, false, "delete an ip address from a node", "<ip>"},
{ "eventscript", control_eventscript, true, "run the eventscript with the given parameters on a node", "<arguments>"},
{ "backupdb", control_backupdb, false, "backup the database into a file.", "<database> <file>"},
{ "restoredb", control_restoredb, false, "restore the database from a file.", "<file>"},
};
/*

View File

@ -39,7 +39,7 @@ struct vacuum_data {
struct ctdb_db_context *ctdb_db;
trbt_tree_t *delete_tree;
uint32_t delete_count;
struct ctdb_control_pulldb_reply **list;
struct ctdb_marshall_buffer **list;
bool traverse_error;
uint32_t total;
};
@ -153,7 +153,7 @@ static int vacuum_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data,
}
struct delete_records_list {
struct ctdb_control_pulldb_reply *records;
struct ctdb_marshall_buffer *records;
};
/*
@ -269,16 +269,16 @@ static int ctdb_vacuum_db(struct ctdb_context *ctdb, uint32_t db_id, struct ctdb
vdata->ctdb_db = ctdb_db;
/* the list needs to be of length num_nodes */
vdata->list = talloc_array(vdata, struct ctdb_control_pulldb_reply *, ctdb->vnn_map->size);
vdata->list = talloc_array(vdata, struct ctdb_marshall_buffer *, ctdb->vnn_map->size);
if (vdata->list == NULL) {
DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
talloc_free(vdata);
return -1;
}
for (i=0;i<ctdb->vnn_map->size;i++) {
vdata->list[i] = (struct ctdb_control_pulldb_reply *)
vdata->list[i] = (struct ctdb_marshall_buffer *)
talloc_zero_size(vdata->list,
offsetof(struct ctdb_control_pulldb_reply, data));
offsetof(struct ctdb_marshall_buffer, data));
if (vdata->list[i] == NULL) {
DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
talloc_free(vdata);
@ -332,9 +332,9 @@ static int ctdb_vacuum_db(struct ctdb_context *ctdb, uint32_t db_id, struct ctdb
DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
return -1;
}
recs->records = (struct ctdb_control_pulldb_reply *)
recs->records = (struct ctdb_marshall_buffer *)
talloc_zero_size(vdata,
offsetof(struct ctdb_control_pulldb_reply, data));
offsetof(struct ctdb_marshall_buffer, data));
if (recs->records == NULL) {
DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
return -1;
@ -353,7 +353,7 @@ static int ctdb_vacuum_db(struct ctdb_context *ctdb, uint32_t db_id, struct ctdb
(if possible)
*/
for (i=0;i<ctdb->vnn_map->size;i++) {
struct ctdb_control_pulldb_reply *records;
struct ctdb_marshall_buffer *records;
struct ctdb_rec_data *rec;
if (ctdb->vnn_map->map[i] == ctdb->pnn) {
@ -375,7 +375,7 @@ static int ctdb_vacuum_db(struct ctdb_context *ctdb, uint32_t db_id, struct ctdb
/* outdata countains the list of records coming back
from the node which the node could not delete
*/
records = (struct ctdb_control_pulldb_reply *)outdata.dptr;
records = (struct ctdb_marshall_buffer *)outdata.dptr;
rec = (struct ctdb_rec_data *)&records->data[0];
while (records->count-- > 1) {
TDB_DATA reckey, recdata;

View File

@ -1,4 +1,4 @@
#!/bin/sh
#!/bin/bash
# Run commands on CTDB nodes.
@ -171,9 +171,12 @@ trap 'kill -TERM $pids 2>/dev/null' INT TERM
# There's a small race here where the kill can fail if no processes
# have been added to $pids and the script is interrupted. However,
# the part of the window where it matter is very small.
retcode=0
for n in $nodes ; do
if $parallel ; then
if $verbose ; then
# pipefail is a bashism - is there some way to do this with plain sh?
set -o pipefail 2>/dev/null
($SSH $ssh_opts $EXTRA_SSH_OPTS $n "$command" 2>&1 | sed -e "s@^@[$n] @" )&
else
$SSH $ssh_opts $EXTRA_SSH_OPTS $n "$command" &
@ -184,8 +187,17 @@ for n in $nodes ; do
echo >&2 ; echo ">> NODE: $n <<" >&2
fi
$SSH $ssh_opts $EXTRA_SSH_OPTS $n "$command"
$SSH $ssh_opts $EXTRA_SSH_OPTS $n "$command"
[ $? = 0 ] || retcode=$?
fi
done
$parallel && wait
$parallel && {
for p in $pids; do
wait $p
[ $? = 0 ] || retcode=$?
done
}
exit $retcode

View File

@ -34,7 +34,7 @@
struct ipmux_node {
uint32_t pnn;
struct sockaddr_in sin;
ctdb_sock_addr addr;
};
struct ipmux_node *ipmux_nodes;
@ -188,8 +188,8 @@ int main(int argc, const char *argv[])
if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
continue;
}
ipmux_nodes[num_nodes].pnn = i;
ipmux_nodes[num_nodes].sin = nodemap->nodes[i].sin;
ipmux_nodes[num_nodes].pnn = i;
ipmux_nodes[num_nodes].addr = nodemap->nodes[i].addr;
num_nodes++;
}
@ -251,7 +251,7 @@ int main(int argc, const char *argv[])
send the packet off and tell the kernel to not worry
about this packet any more
*/
ret = sendto(s, &ipqp->payload[0], ipqp->data_len, 0, &ipmux_nodes[hash].sin, sizeof(struct sockaddr_in));
ret = sendto(s, &ipqp->payload[0], ipqp->data_len, 0, (struct sockaddr_in *)&ipmux_nodes[hash].addr, sizeof(ctdb_sock_addr));
ipq_set_verdict(ipqh, ipqp->packet_id, NF_DROP, 0, pktbuf);
}

View File

@ -129,7 +129,7 @@ projects that want to make their services cluster aware using CTDB.
<h2>Developers</h2>
<ul>
<li><a href="http://samba.org/~tridge/">Andrew Tridgell</a><br>
<li>Ronnie Sahlberg<br>
<li><a href="http://samba.org/~sahlberg/"Ronnie Sahlberg</a><br>
<li>Peter Somogyi<br>
<li><a href="http://sernet.de/Samba/">Volker Lendecke</a><br>
</ul>