1
0
mirror of https://github.com/samba-team/samba.git synced 2024-12-22 13:34:15 +03:00

s4:cluster: remove old/broken ctdb code

We don't need a full copy of ctdb...
If we want to readd cluster support,
we should just use a ctdb client library.

metze
This commit is contained in:
Stefan Metzmacher 2008-12-29 11:02:32 +01:00
parent aec16efaf2
commit e36d64728f
97 changed files with 1 additions and 28446 deletions

View File

@ -70,7 +70,6 @@ dynconfigsrcdir := $(samba4srcdir)/dynconfig
heimdalsrcdir := $(samba4srcdir)/heimdal
dsdbsrcdir := $(samba4srcdir)/dsdb
smbdsrcdir := $(samba4srcdir)/smbd
clustersrcdir := $(samba4srcdir)/cluster
libnetsrcdir := $(samba4srcdir)/libnet
authsrcdir := $(samba4srcdir)/auth
nsswitchsrcdir := $(samba4srcdir)/../nsswitch

View File

@ -1,7 +1,3 @@
ctdbsrcdir = $(clustersrcdir)/ctdb
mkinclude ctdb/config.mk
[SUBSYSTEM::CLUSTER]
PRIVATE_DEPENDENCIES = ctdb
CLUSTER_OBJ_FILES = $(addprefix $(clustersrcdir)/, cluster.o local.o)

View File

@ -1,158 +0,0 @@
#!gmake
#
CC = @CC@
prefix = @prefix@
exec_prefix = @exec_prefix@
datarootdir = @datarootdir@
includedir = @includedir@
libdir = @libdir@
bindir = @bindir@
sbindir = @sbindir@
mandir = @mandir@
localstatedir = @localstatedir@
VPATH = @srcdir@:@tdbdir@:@tallocdir@:@libreplacedir@:@poptdir@
srcdir = @srcdir@
etcdir = @sysconfdir@
builddir = @builddir@
DESTDIR = /
EXTRA_OBJ=@EXTRA_OBJ@
XSLTPROC = /usr/bin/xsltproc
INSTALLCMD = @INSTALL@
POPT_LIBS = @POPT_LIBS@
POPT_CFLAGS = @POPT_CFLAGS@
POPT_OBJ = @POPT_OBJ@
CFLAGS=-g -I$(srcdir)/include -Iinclude -Ilib -Ilib/util -I$(srcdir) \
-I@tallocdir@ -I@tdbdir@/include -I@libreplacedir@ \
-DVARDIR=\"$(localstatedir)\" -DETCDIR=\"$(etcdir)\" \
-DUSE_MMAP=1 @CFLAGS@ $(POPT_CFLAGS)
LIB_FLAGS=@LDFLAGS@ -Llib @LIBS@ $(POPT_LIBS) @INFINIBAND_LIBS@
UTIL_OBJ = lib/util/idtree.o lib/util/db_wrap.o lib/util/strlist.o lib/util/util.o \
lib/util/util_time.o lib/util/util_file.o
CTDB_COMMON_OBJ = common/ctdb_io.o common/ctdb_util.o \
common/ctdb_ltdb.o common/ctdb_message.o common/cmdline.o \
lib/util/debug.o common/system.o
CTDB_TCP_OBJ = tcp/tcp_connect.o tcp/tcp_io.o tcp/tcp_init.o
CTDB_CLIENT_OBJ = client/ctdb_client.o \
$(CTDB_COMMON_OBJ) $(POPT_OBJ) $(UTIL_OBJ) @TALLOC_OBJ@ @TDB_OBJ@ \
@LIBREPLACEOBJ@ $(EXTRA_OBJ) @EVENTS_OBJ@
CTDB_SERVER_OBJ = server/ctdbd.o server/ctdb_daemon.o server/ctdb_lockwait.o \
server/ctdb_recoverd.o server/ctdb_recover.o server/ctdb_freeze.o \
server/ctdb_tunables.o server/ctdb_monitor.o server/ctdb_server.o \
server/ctdb_control.o server/ctdb_call.o server/ctdb_ltdb_server.o \
server/ctdb_traverse.o server/eventscript.o server/ctdb_takeover.o \
$(CTDB_CLIENT_OBJ) $(CTDB_TCP_OBJ) @INFINIBAND_WRAPPER_OBJ@
TEST_BINS=bin/ctdb_bench bin/ctdb_fetch bin/ctdb_store @INFINIBAND_BINS@
BINS = bin/ctdb bin/scsi_io
SBINS = bin/ctdbd
DIRS = lib bin
.SUFFIXES: .c .o .h .1 .1.xml .1.html
all: showflags dirs doc $(CTDB_SERVER_OBJ) $(CTDB_CLIENT_OBJ) $(BINS) $(SBINS) $(TEST_BINS)
showflags:
@echo 'ctdb will be compiled with flags:'
@echo ' CFLAGS = $(CFLAGS)'
@echo ' LIBS = $(LIBS)'
.c.o:
@echo Compiling $*.c
@mkdir -p `dirname $@`
@$(CC) $(CFLAGS) -c $< -o $@
dirs:
@mkdir -p $(DIRS)
bin/ctdbd: $(CTDB_SERVER_OBJ)
@echo Linking $@
@$(CC) $(CFLAGS) -o $@ $(CTDB_SERVER_OBJ) $(LIB_FLAGS)
bin/scsi_io: $(CTDB_CLIENT_OBJ) scsi/scsi_io.o
@echo Linking $@
@$(CC) $(CFLAGS) -o $@ scsi/scsi_io.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
bin/ctdb: $(CTDB_CLIENT_OBJ) tools/ctdb.o
@echo Linking $@
@$(CC) $(CFLAGS) -o $@ tools/ctdb.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
bin/ctdb_bench: $(CTDB_CLIENT_OBJ) tests/ctdb_bench.o
@echo Linking $@
@$(CC) $(CFLAGS) -o $@ tests/ctdb_bench.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
bin/ctdb_fetch: $(CTDB_CLIENT_OBJ) tests/ctdb_fetch.o
@echo Linking $@
@$(CC) $(CFLAGS) -o $@ tests/ctdb_fetch.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
bin/ctdb_store: $(CTDB_CLIENT_OBJ) tests/ctdb_store.o
@echo Linking $@
@$(CC) $(CFLAGS) -o $@ tests/ctdb_store.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
bin/ibwrapper_test: $(CTDB_CLIENT_OBJ) ib/ibwrapper_test.o
@echo Linking $@
@$(CC) $(CFLAGS) -o $@ ib/ibwrapper_test.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
.1.xml.1:
-test -z "$(XSLTPROC)" || $(XSLTPROC) -o $@ http://docbook.sourceforge.net/release/xsl/current/manpages/docbook.xsl $<
.1.xml.1.html:
-test -z "$(XSLTPROC)" || $(XSLTPROC) -o $@ http://docbook.sourceforge.net/release/xsl/current/html/docbook.xsl $<
doc: doc/ctdb.1 doc/ctdb.1.html \
doc/ctdbd.1 doc/ctdbd.1.html \
doc/onnode.1 doc/onnode.1.html
clean:
rm -f *.o */*.o */*/*.o */*~
rm -f $(BINS) $(SBINS) $(TEST_BINS)
distclean: clean
rm -f *~ */*~
rm -rf bin
rm -f config.log config.status config.cache config.h
rm -f Makefile
install: all
mkdir -p $(DESTDIR)$(bindir)
mkdir -p $(DESTDIR)$(sbindir)
mkdir -p $(DESTDIR)$(includedir)
mkdir -p $(DESTDIR)$(etcdir)/ctdb
mkdir -p $(DESTDIR)$(etcdir)/ctdb/events.d
${INSTALLCMD} -m 755 bin/ctdb $(DESTDIR)$(bindir)
${INSTALLCMD} -m 755 bin/ctdbd $(DESTDIR)$(sbindir)
${INSTALLCMD} -m 644 include/ctdb.h $(DESTDIR)$(includedir)
${INSTALLCMD} -m 644 include/ctdb_private.h $(DESTDIR)$(includedir) # for samba3
${INSTALLCMD} -m 755 config/events $(DESTDIR)$(etcdir)/ctdb
${INSTALLCMD} -m 755 config/functions $(DESTDIR)$(etcdir)/ctdb
${INSTALLCMD} -m 755 config/statd-callout $(DESTDIR)$(etcdir)/ctdb
${INSTALLCMD} -m 755 config/events.d/10.interface $(DESTDIR)$(etcdir)/ctdb/events.d
${INSTALLCMD} -m 755 config/events.d/40.vsftpd $(DESTDIR)$(etcdir)/ctdb/events.d
${INSTALLCMD} -m 755 config/events.d/50.samba $(DESTDIR)$(etcdir)/ctdb/events.d
${INSTALLCMD} -m 755 config/events.d/60.nfs $(DESTDIR)$(etcdir)/ctdb/events.d
${INSTALLCMD} -m 755 config/events.d/61.nfstickle $(DESTDIR)$(etcdir)/ctdb/events.d
${INSTALLCMD} -m 755 tools/onnode.ssh $(DESTDIR)$(bindir)
${INSTALLCMD} -m 755 tools/onnode.rsh $(DESTDIR)$(bindir)
if [ -f doc/ctdb.1 ];then ${INSTALLCMD} -d $(DESTDIR)$(mandir)/man1; fi
if [ -f doc/ctdb.1 ];then ${INSTALLCMD} -m 644 doc/ctdb.1 $(DESTDIR)$(mandir)/man1; fi
if [ -f doc/ctdbd.1 ];then ${INSTALLCMD} -m 644 doc/ctdbd.1 $(DESTDIR)$(mandir)/man1; fi
if [ -f doc/onnode.1 ];then ${INSTALLCMD} -m 644 doc/onnode.1 $(DESTDIR)$(mandir)/man1; fi
cd $(DESTDIR)$(bindir) && ln -sf onnode.ssh onnode
test: all
tests/run_tests.sh
valgrindtest: all
VALGRIND="valgrind -q --trace-children=yes" tests/run_tests.sh
realdistclean: distclean
rm -f configure config.h.in

View File

@ -1 +0,0 @@
m4_include(libreplace.m4)

View File

@ -1,17 +0,0 @@
#!/bin/sh
rm -rf autom4te.cache
rm -f configure config.h.in
IPATHS="-I libreplace -I lib/replace -I ../libreplace -I ../replace"
IPATHS="$IPATHS -I lib/talloc -I talloc -I ../talloc"
IPATHS="$IPATHS -I lib/tdb -I tdb -I ../tdb"
IPATHS="$IPATHS -I lib/popt -I popt -I ../popt"
autoheader $IPATHS || exit 1
autoconf $IPATHS || exit 1
rm -rf autom4te.cache
echo "Now run ./configure and then make."
exit 0

View File

@ -1,945 +0,0 @@
/*
Unix SMB/CIFS implementation.
generic byte range locking code - ctdb backend
Copyright (C) Andrew Tridgell 2006
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "system/filesys.h"
#include "../tdb/include/tdb.h"
#include "messaging/messaging.h"
#include "lib/messaging/irpc.h"
#include "libcli/libcli.h"
#include "cluster/cluster.h"
#include "ntvfs/ntvfs.h"
#include "ntvfs/common/brlock.h"
#include "include/ctdb.h"
enum my_functions {FUNC_BRL_LOCK=1, FUNC_BRL_UNLOCK=2,
FUNC_BRL_REMOVE_PENDING=3, FUNC_BRL_LOCKTEST=4,
FUNC_BRL_CLOSE=5};
/*
in this module a "DATA_BLOB *file_key" is a blob that uniquely identifies
a file. For a local posix filesystem this will usually be a combination
of the device and inode numbers of the file, but it can be anything
that uniquely idetifies a file for locking purposes, as long
as it is applied consistently.
*/
/* this struct is typically attached to tcon */
struct brl_context {
struct ctdb_context *ctdb;
struct ctdb_db_context *ctdb_db;
struct server_id server;
struct messaging_context *messaging_ctx;
};
/*
the lock context contains the elements that define whether one
lock is the same as another lock
*/
struct lock_context {
struct server_id server;
uint16_t smbpid;
struct brl_context *ctx;
};
/* The data in brlock records is an unsorted linear array of these
records. It is unnecessary to store the count as tdb provides the
size of the record */
struct lock_struct {
struct lock_context context;
struct ntvfs_handle *ntvfs;
uint64_t start;
uint64_t size;
enum brl_type lock_type;
void *notify_ptr;
};
/* this struct is attached to on open file handle */
struct brl_handle {
DATA_BLOB key;
struct ntvfs_handle *ntvfs;
struct lock_struct last_lock;
};
#if 0
static void show_locks(const char *op, struct lock_struct *locks, int count)
{
int i;
DEBUG(0,("OP: %s\n", op));
if (locks == NULL) return;
for (i=0;i<count;i++) {
DEBUG(0,("%2d: %4d %4d %d.%d.%d %p %p\n",
i, (int)locks[i].start, (int)locks[i].size,
locks[i].context.server.node,
locks[i].context.server.id,
locks[i].context.smbpid,
locks[i].context.ctx,
locks[i].ntvfs));
}
}
#endif
/*
Open up the brlock.tdb database. Close it down using
talloc_free(). We need the messaging_ctx to allow for
pending lock notifications.
*/
static struct brl_context *brl_ctdb_init(TALLOC_CTX *mem_ctx, struct server_id server, struct loadparm_context *lp_ctx,
struct messaging_context *messaging_ctx)
{
struct ctdb_context *ctdb = talloc_get_type(cluster_backend_handle(),
struct ctdb_context);
struct brl_context *brl;
brl = talloc(mem_ctx, struct brl_context);
if (brl == NULL) {
return NULL;
}
brl->ctdb = ctdb;
brl->ctdb_db = ctdb_db_handle(ctdb, "brlock");
if (brl->ctdb_db == NULL) {
DEBUG(0,("Failed to get attached ctdb db handle for brlock\n"));
talloc_free(brl);
return NULL;
}
brl->server = server;
brl->messaging_ctx = messaging_ctx;
return brl;
}
static struct brl_handle *brl_ctdb_create_handle(TALLOC_CTX *mem_ctx, struct ntvfs_handle *ntvfs,
DATA_BLOB *file_key)
{
struct brl_handle *brlh;
brlh = talloc(mem_ctx, struct brl_handle);
if (brlh == NULL) {
return NULL;
}
brlh->key = *file_key;
brlh->ntvfs = ntvfs;
ZERO_STRUCT(brlh->last_lock);
return brlh;
}
/*
see if two locking contexts are equal
*/
static bool brl_ctdb_same_context(struct lock_context *ctx1, struct lock_context *ctx2)
{
return (cluster_id_equal(&ctx1->server, &ctx2->server) &&
ctx1->smbpid == ctx2->smbpid &&
ctx1->ctx == ctx2->ctx);
}
/*
see if lck1 and lck2 overlap
*/
static bool brl_ctdb_overlap(struct lock_struct *lck1,
struct lock_struct *lck2)
{
/* this extra check is not redundent - it copes with locks
that go beyond the end of 64 bit file space */
if (lck1->size != 0 &&
lck1->start == lck2->start &&
lck1->size == lck2->size) {
return true;
}
if (lck1->start >= (lck2->start+lck2->size) ||
lck2->start >= (lck1->start+lck1->size)) {
return false;
}
return true;
}
/*
See if lock2 can be added when lock1 is in place.
*/
static bool brl_ctdb_conflict(struct lock_struct *lck1,
struct lock_struct *lck2)
{
/* pending locks don't conflict with anything */
if (lck1->lock_type >= PENDING_READ_LOCK ||
lck2->lock_type >= PENDING_READ_LOCK) {
return false;
}
if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
return false;
}
if (brl_ctdb_same_context(&lck1->context, &lck2->context) &&
lck2->lock_type == READ_LOCK && lck1->ntvfs == lck2->ntvfs) {
return false;
}
return brl_ctdb_overlap(lck1, lck2);
}
/*
Check to see if this lock conflicts, but ignore our own locks on the
same fnum only.
*/
static bool brl_ctdb_conflict_other(struct lock_struct *lck1, struct lock_struct *lck2)
{
/* pending locks don't conflict with anything */
if (lck1->lock_type >= PENDING_READ_LOCK ||
lck2->lock_type >= PENDING_READ_LOCK) {
return false;
}
if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK)
return false;
/*
* note that incoming write calls conflict with existing READ
* locks even if the context is the same. JRA. See LOCKTEST7
* in smbtorture.
*/
if (brl_ctdb_same_context(&lck1->context, &lck2->context) &&
lck1->ntvfs == lck2->ntvfs &&
(lck2->lock_type == READ_LOCK || lck1->lock_type == WRITE_LOCK)) {
return false;
}
return brl_ctdb_overlap(lck1, lck2);
}
/*
amazingly enough, w2k3 "remembers" whether the last lock failure
is the same as this one and changes its error code. I wonder if any
app depends on this?
*/
static NTSTATUS brl_ctdb_lock_failed(struct brl_handle *brlh, struct lock_struct *lock)
{
/*
* this function is only called for non pending lock!
*/
/* in SMB2 mode always return NT_STATUS_LOCK_NOT_GRANTED! */
if (lock->ntvfs->ctx->protocol == PROTOCOL_SMB2) {
return NT_STATUS_LOCK_NOT_GRANTED;
}
/*
* if the notify_ptr is non NULL,
* it means that we're at the end of a pending lock
* and the real lock is requested after the timeout went by
* In this case we need to remember the last_lock and always
* give FILE_LOCK_CONFLICT
*/
if (lock->notify_ptr) {
brlh->last_lock = *lock;
return NT_STATUS_FILE_LOCK_CONFLICT;
}
/*
* amazing the little things you learn with a test
* suite. Locks beyond this offset (as a 64 bit
* number!) always generate the conflict error code,
* unless the top bit is set
*/
if (lock->start >= 0xEF000000 && (lock->start >> 63) == 0) {
brlh->last_lock = *lock;
return NT_STATUS_FILE_LOCK_CONFLICT;
}
/*
* if the current lock matches the last failed lock on the file handle
* and starts at the same offset, then FILE_LOCK_CONFLICT should be returned
*/
if (cluster_id_equal(&lock->context.server, &brlh->last_lock.context.server) &&
lock->context.ctx == brlh->last_lock.context.ctx &&
lock->ntvfs == brlh->last_lock.ntvfs &&
lock->start == brlh->last_lock.start) {
return NT_STATUS_FILE_LOCK_CONFLICT;
}
brlh->last_lock = *lock;
return NT_STATUS_LOCK_NOT_GRANTED;
}
struct ctdb_lock_req {
uint16_t smbpid;
uint64_t start;
uint64_t size;
enum brl_type lock_type;
void *notify_ptr;
struct server_id server;
struct brl_context *brl;
struct ntvfs_handle *ntvfs;
};
/*
ctdb call handling brl_lock()
*/
static int brl_ctdb_lock_func(struct ctdb_call_info *call)
{
struct ctdb_lock_req *req = (struct ctdb_lock_req *)call->call_data->dptr;
TDB_DATA dbuf;
int count=0, i;
struct lock_struct lock, *locks=NULL;
NTSTATUS status = NT_STATUS_OK;
/* if this is a pending lock, then with the chainlock held we
try to get the real lock. If we succeed then we don't need
to make it pending. This prevents a possible race condition
where the pending lock gets created after the lock that is
preventing the real lock gets removed */
if (req->lock_type >= PENDING_READ_LOCK) {
enum brl_type lock_type = req->lock_type;
req->lock_type = (req->lock_type==PENDING_READ_LOCK? READ_LOCK : WRITE_LOCK);
if (brl_ctdb_lock_func(call) == 0 && call->status == NT_STATUS_V(NT_STATUS_OK)) {
return 0;
}
req->lock_type = lock_type;
}
dbuf = call->record_data;
ZERO_STRUCT(lock);
lock.context.smbpid = req->smbpid;
lock.context.server = req->server;
lock.context.ctx = req->brl;
lock.ntvfs = req->ntvfs;
lock.start = req->start;
lock.size = req->size;
lock.lock_type = req->lock_type;
lock.notify_ptr = req->notify_ptr;
if (dbuf.dptr) {
/* there are existing locks - make sure they don't conflict */
locks = (struct lock_struct *)dbuf.dptr;
count = dbuf.dsize / sizeof(*locks);
for (i=0; i<count; i++) {
if (brl_ctdb_conflict(&locks[i], &lock)) {
status = NT_STATUS_LOCK_NOT_GRANTED;
goto reply;
}
}
}
call->new_data = talloc(call, TDB_DATA);
if (call->new_data == NULL) {
return CTDB_ERR_NOMEM;
}
call->new_data->dptr = talloc_size(call, dbuf.dsize + sizeof(lock));
if (call->new_data->dptr == NULL) {
return CTDB_ERR_NOMEM;
}
memcpy(call->new_data->dptr, locks, dbuf.dsize);
memcpy(call->new_data->dptr+dbuf.dsize, &lock, sizeof(lock));
call->new_data->dsize = dbuf.dsize + sizeof(lock);
if (req->lock_type >= PENDING_READ_LOCK) {
status = NT_STATUS_LOCK_NOT_GRANTED;
}
reply:
call->status = NT_STATUS_V(status);
return 0;
}
/*
Lock a range of bytes. The lock_type can be a PENDING_*_LOCK, in
which case a real lock is first tried, and if that fails then a
pending lock is created. When the pending lock is triggered (by
someone else closing an overlapping lock range) a messaging
notification is sent, identified by the notify_ptr
*/
static NTSTATUS brl_ctdb_lock(struct brl_context *brl,
struct brl_handle *brlh,
uint32_t smbpid,
uint64_t start, uint64_t size,
enum brl_type lock_type,
void *notify_ptr)
{
struct ctdb_lock_req req;
struct ctdb_call call;
int ret;
NTSTATUS status;
call.call_id = FUNC_BRL_LOCK;
call.key.dptr = brlh->key.data;
call.key.dsize = brlh->key.length;
call.call_data.dptr = (uint8_t *)&req;
call.call_data.dsize = sizeof(req);
call.flags = 0;
call.status = 0;
ZERO_STRUCT(req);
req.smbpid = smbpid;
req.start = start;
req.size = size;
req.lock_type = lock_type;
req.notify_ptr = notify_ptr;
req.server = brl->server;
req.brl = brl;
req.ntvfs = brlh->ntvfs;
ret = ctdb_call(brl->ctdb_db, &call);
if (ret == -1) {
return NT_STATUS_INTERNAL_DB_CORRUPTION;
}
status = NT_STATUS(call.status);
if (NT_STATUS_EQUAL(status, NT_STATUS_LOCK_NOT_GRANTED)) {
struct lock_struct lock;
lock.context.smbpid = smbpid;
lock.context.server = brl->server;
lock.context.ctx = brl;
lock.ntvfs = brlh->ntvfs;
lock.start = start;
lock.size = size;
lock.lock_type = lock_type;
lock.notify_ptr = notify_ptr;
status = brl_ctdb_lock_failed(brlh, &lock);
}
return status;
}
/*
we are removing a lock that might be holding up a pending lock. Scan
for pending locks that cover this range and if we find any then
notify the server that it should retry the lock. In this backend, we
notify by sending the list of locks that need to be notified on back
in the reply_data of the ctdb call. The caller then does the
messaging for us.
*/
static int brl_ctdb_notify_unlock(struct ctdb_call_info *call,
struct lock_struct *locks, int count,
struct lock_struct *removed_lock)
{
int i, last_notice;
/* the last_notice logic is to prevent stampeding on a lock
range. It prevents us sending hundreds of notifies on the
same range of bytes. It doesn't prevent all possible
stampedes, but it does prevent the most common problem */
last_notice = -1;
for (i=0;i<count;i++) {
if (locks[i].lock_type >= PENDING_READ_LOCK &&
brl_ctdb_overlap(&locks[i], removed_lock)) {
struct lock_struct *nlocks;
int ncount;
if (last_notice != -1 && brl_ctdb_overlap(&locks[i], &locks[last_notice])) {
continue;
}
if (locks[i].lock_type == PENDING_WRITE_LOCK) {
last_notice = i;
}
if (call->reply_data == NULL) {
call->reply_data = talloc_zero(call, TDB_DATA);
if (call->reply_data == NULL) {
return CTDB_ERR_NOMEM;
}
}
/* add to the list of pending locks to notify caller of */
ncount = call->reply_data->dsize / sizeof(struct lock_struct);
nlocks = talloc_realloc(call->reply_data, call->reply_data->dptr,
struct lock_struct, ncount + 1);
if (nlocks == NULL) {
return CTDB_ERR_NOMEM;
}
call->reply_data->dptr = (uint8_t *)nlocks;
nlocks[ncount] = locks[i];
call->reply_data->dsize += sizeof(struct lock_struct);
}
}
return 0;
}
/*
send notifications for all pending locks - the file is being closed by this
user
*/
static int brl_ctdb_notify_all(struct ctdb_call_info *call,
struct lock_struct *locks, int count)
{
int i;
for (i=0;i<count;i++) {
if (locks->lock_type >= PENDING_READ_LOCK) {
int ret = brl_ctdb_notify_unlock(call, locks, count, &locks[i]);
if (ret != 0) return ret;
}
}
return 0;
}
/*
send off any messages needed to notify of pending locks that should now retry
*/
static void brl_ctdb_notify_send(struct brl_context *brl, TDB_DATA *reply_data)
{
struct lock_struct *locks = (struct lock_struct *)reply_data->dptr;
int i, count = reply_data->dsize / sizeof(struct lock_struct);
for (i=0;i<count;i++) {
messaging_send_ptr(brl->messaging_ctx, locks[i].context.server,
MSG_BRL_RETRY, locks[i].notify_ptr);
}
}
struct ctdb_unlock_req {
uint16_t smbpid;
uint64_t start;
uint64_t size;
struct server_id server;
struct brl_context *brl;
struct ntvfs_handle *ntvfs;
};
/*
Unlock a range of bytes.
*/
static int brl_ctdb_unlock_func(struct ctdb_call_info *call)
{
struct ctdb_unlock_req *req = (struct ctdb_unlock_req *)call->call_data->dptr;
TDB_DATA dbuf;
int count, i;
struct lock_struct *locks, *lock;
struct lock_context context;
NTSTATUS status = NT_STATUS_OK;
dbuf = call->record_data;
context.smbpid = req->smbpid;
context.server = req->server;
context.ctx = req->brl;
/* there are existing locks - find a match */
locks = (struct lock_struct *)dbuf.dptr;
count = dbuf.dsize / sizeof(*locks);
for (i=0; i<count; i++) {
lock = &locks[i];
if (brl_ctdb_same_context(&lock->context, &context) &&
lock->ntvfs == req->ntvfs &&
lock->start == req->start &&
lock->size == req->size &&
lock->lock_type == WRITE_LOCK) {
break;
}
}
if (i < count) goto found;
for (i=0; i<count; i++) {
lock = &locks[i];
if (brl_ctdb_same_context(&lock->context, &context) &&
lock->ntvfs == req->ntvfs &&
lock->start == req->start &&
lock->size == req->size &&
lock->lock_type < PENDING_READ_LOCK) {
break;
}
}
found:
if (i < count) {
struct lock_struct removed_lock = *lock;
call->new_data = talloc(call, TDB_DATA);
if (call->new_data == NULL) {
return CTDB_ERR_NOMEM;
}
call->new_data->dptr = talloc_size(call, dbuf.dsize - sizeof(*lock));
if (call->new_data->dptr == NULL) {
return CTDB_ERR_NOMEM;
}
call->new_data->dsize = dbuf.dsize - sizeof(*lock);
memcpy(call->new_data->dptr, locks, i*sizeof(*lock));
memcpy(call->new_data->dptr+i*sizeof(*lock), locks+i+1,
(count-(i+1))*sizeof(*lock));
if (count > 1) {
int ret = brl_ctdb_notify_unlock(call, locks, count, &removed_lock);
if (ret != 0) return ret;
}
}
if (i == count) {
/* we didn't find it */
status = NT_STATUS_RANGE_NOT_LOCKED;
}
call->status = NT_STATUS_V(status);
return 0;
}
/*
Unlock a range of bytes.
*/
static NTSTATUS brl_ctdb_unlock(struct brl_context *brl,
struct brl_handle *brlh,
uint32_t smbpid,
uint64_t start, uint64_t size)
{
struct ctdb_call call;
struct ctdb_unlock_req req;
int ret;
call.call_id = FUNC_BRL_UNLOCK;
call.key.dptr = brlh->key.data;
call.key.dsize = brlh->key.length;
call.call_data.dptr = (uint8_t *)&req;
call.call_data.dsize = sizeof(req);
ZERO_STRUCT(req);
req.smbpid = smbpid;
req.start = start;
req.size = size;
req.server = brl->server;
req.brl = brl;
req.ntvfs = brlh->ntvfs;
ret = ctdb_call(brl->ctdb_db, &call);
if (ret == -1) {
DEBUG(0,("ctdb_call failed - %s\n", __location__));
return NT_STATUS_INTERNAL_DB_CORRUPTION;
}
brl_ctdb_notify_send(brl, &call.reply_data);
return NT_STATUS(call.status);
}
struct ctdb_remove_pending_req {
struct server_id server;
void *notify_ptr;
};
/*
remove a pending lock. This is called when the caller has either
given up trying to establish a lock or when they have succeeded in
getting it. In either case they no longer need to be notified.
*/
static int brl_ctdb_remove_pending_func(struct ctdb_call_info *call)
{
struct ctdb_remove_pending_req *req = (struct ctdb_remove_pending_req *)call->call_data->dptr;
TDB_DATA dbuf;
int count, i;
struct lock_struct *locks;
NTSTATUS status = NT_STATUS_OK;
dbuf = call->record_data;
/* there are existing locks - find a match */
locks = (struct lock_struct *)dbuf.dptr;
count = dbuf.dsize / sizeof(*locks);
for (i=0; i<count; i++) {
struct lock_struct *lock = &locks[i];
if (lock->lock_type >= PENDING_READ_LOCK &&
lock->notify_ptr == req->notify_ptr &&
cluster_id_equal(&lock->context.server, &req->server)) {
call->new_data = talloc(call, TDB_DATA);
if (call->new_data == NULL) {
return CTDB_ERR_NOMEM;
}
call->new_data->dptr = talloc_size(call, dbuf.dsize - sizeof(*lock));
if (call->new_data->dptr == NULL) {
return CTDB_ERR_NOMEM;
}
call->new_data->dsize = dbuf.dsize - sizeof(*lock);
memcpy(call->new_data->dptr, locks, i*sizeof(*lock));
memcpy(call->new_data->dptr+i*sizeof(*lock), locks+i+1,
(count-(i+1))*sizeof(*lock));
break;
}
}
if (i == count) {
/* we didn't find it */
status = NT_STATUS_RANGE_NOT_LOCKED;
}
call->status = NT_STATUS_V(status);
return 0;
}
static NTSTATUS brl_ctdb_remove_pending(struct brl_context *brl,
struct brl_handle *brlh,
void *notify_ptr)
{
struct ctdb_call call;
struct ctdb_remove_pending_req req;
int ret;
call.call_id = FUNC_BRL_REMOVE_PENDING;
call.key.dptr = brlh->key.data;
call.key.dsize = brlh->key.length;
call.call_data.dptr = (uint8_t *)&req;
call.call_data.dsize = sizeof(req);
ZERO_STRUCT(req);
req.notify_ptr = notify_ptr;
req.server = brl->server;
ret = ctdb_call(brl->ctdb_db, &call);
if (ret == -1) {
DEBUG(0,("ctdb_call failed - %s\n", __location__));
return NT_STATUS_INTERNAL_DB_CORRUPTION;
}
return NT_STATUS(call.status);
}
struct ctdb_locktest_req {
uint16_t smbpid;
uint64_t start;
uint64_t size;
enum brl_type lock_type;
struct brl_context *brl;
struct server_id server;
struct ntvfs_handle *ntvfs;
};
/*
remove a pending lock. This is called when the caller has either
given up trying to establish a lock or when they have succeeded in
getting it. In either case they no longer need to be notified.
*/
static int brl_ctdb_locktest_func(struct ctdb_call_info *call)
{
struct ctdb_locktest_req *req = (struct ctdb_locktest_req *)call->call_data->dptr;
TDB_DATA dbuf;
int count, i;
struct lock_struct *locks, lock;
NTSTATUS status = NT_STATUS_OK;
lock.context.smbpid = req->smbpid;
lock.context.server = req->server;
lock.context.ctx = req->brl;
lock.ntvfs = req->ntvfs;
lock.start = req->start;
lock.size = req->size;
lock.lock_type = req->lock_type;
dbuf = call->record_data;
/* there are existing locks - find a match */
locks = (struct lock_struct *)dbuf.dptr;
count = dbuf.dsize / sizeof(*locks);
for (i=0; i<count; i++) {
if (brl_ctdb_conflict_other(&locks[i], &lock)) {
status = NT_STATUS_FILE_LOCK_CONFLICT;
break;
}
}
call->status = NT_STATUS_V(status);
return 0;
}
/*
Test if we are allowed to perform IO on a region of an open file
*/
static NTSTATUS brl_ctdb_locktest(struct brl_context *brl,
struct brl_handle *brlh,
uint32_t smbpid,
uint64_t start, uint64_t size,
enum brl_type lock_type)
{
struct ctdb_call call;
struct ctdb_locktest_req req;
int ret;
call.call_id = FUNC_BRL_LOCKTEST;
call.key.dptr = brlh->key.data;
call.key.dsize = brlh->key.length;
call.call_data.dptr = (uint8_t *)&req;
call.call_data.dsize = sizeof(req);
ZERO_STRUCT(req);
req.smbpid = smbpid;
req.start = start;
req.size = size;
req.lock_type = lock_type;
req.server = brl->server;
req.brl = brl;
req.ntvfs = brlh->ntvfs;
ret = ctdb_call(brl->ctdb_db, &call);
if (ret == -1) {
DEBUG(0,("ctdb_call failed - %s\n", __location__));
return NT_STATUS_INTERNAL_DB_CORRUPTION;
}
return NT_STATUS(call.status);
}
struct ctdb_close_req {
struct brl_context *brl;
struct server_id server;
struct ntvfs_handle *ntvfs;
};
/*
remove a pending lock. This is called when the caller has either
given up trying to establish a lock or when they have succeeded in
getting it. In either case they no longer need to be notified.
*/
static int brl_ctdb_close_func(struct ctdb_call_info *call)
{
struct ctdb_close_req *req = (struct ctdb_close_req *)call->call_data->dptr;
TDB_DATA dbuf;
int count, dcount=0, i;
struct lock_struct *locks;
NTSTATUS status = NT_STATUS_OK;
dbuf = call->record_data;
/* there are existing locks - find a match */
locks = (struct lock_struct *)dbuf.dptr;
count = dbuf.dsize / sizeof(*locks);
for (i=0; i<count; i++) {
struct lock_struct *lock = &locks[i];
if (lock->context.ctx == req->brl &&
cluster_id_equal(&lock->context.server, &req->server) &&
lock->ntvfs == req->ntvfs) {
/* found it - delete it */
if (count > 1 && i < count-1) {
memmove(&locks[i], &locks[i+1],
sizeof(*locks)*((count-1) - i));
}
count--;
i--;
dcount++;
}
}
if (dcount > 0) {
call->new_data = talloc(call, TDB_DATA);
if (call->new_data == NULL) {
return CTDB_ERR_NOMEM;
}
brl_ctdb_notify_all(call, locks, count);
call->new_data->dptr = talloc_size(call, count*sizeof(struct lock_struct));
if (call->new_data->dptr == NULL) {
return CTDB_ERR_NOMEM;
}
call->new_data->dsize = count*sizeof(struct lock_struct);
memcpy(call->new_data->dptr, locks, count*sizeof(struct lock_struct));
}
call->status = NT_STATUS_V(status);
return 0;
}
/*
Test if we are allowed to perform IO on a region of an open file
*/
static NTSTATUS brl_ctdb_close(struct brl_context *brl,
struct brl_handle *brlh)
{
struct ctdb_call call;
struct ctdb_close_req req;
int ret;
call.call_id = FUNC_BRL_CLOSE;
call.key.dptr = brlh->key.data;
call.key.dsize = brlh->key.length;
call.call_data.dptr = (uint8_t *)&req;
call.call_data.dsize = sizeof(req);
ZERO_STRUCT(req);
req.brl = brl;
req.server = brl->server;
req.ntvfs = brlh->ntvfs;
ret = ctdb_call(brl->ctdb_db, &call);
if (ret == -1) {
DEBUG(0,("ctdb_call failed - %s\n", __location__));
return NT_STATUS_INTERNAL_DB_CORRUPTION;
}
brl_ctdb_notify_send(brl, &call.reply_data);
return NT_STATUS(call.status);
}
static const struct brlock_ops brlock_tdb_ops = {
.brl_init = brl_ctdb_init,
.brl_create_handle = brl_ctdb_create_handle,
.brl_lock = brl_ctdb_lock,
.brl_unlock = brl_ctdb_unlock,
.brl_remove_pending = brl_ctdb_remove_pending,
.brl_locktest = brl_ctdb_locktest,
.brl_close = brl_ctdb_close
};
void brl_ctdb_init_ops(void)
{
struct ctdb_context *ctdb = talloc_get_type(cluster_backend_handle(),
struct ctdb_context);
struct ctdb_db_context *ctdb_db;
brl_set_ops(&brlock_tdb_ops);
ctdb_db = ctdb_db_handle(ctdb, "brlock");
if (ctdb_db == NULL) {
DEBUG(0,("Failed to get attached ctdb db handle for brlock\n"));
return;
}
ctdb_set_call(ctdb_db, brl_ctdb_lock_func, FUNC_BRL_LOCK);
ctdb_set_call(ctdb_db, brl_ctdb_unlock_func, FUNC_BRL_UNLOCK);
ctdb_set_call(ctdb_db, brl_ctdb_remove_pending_func, FUNC_BRL_REMOVE_PENDING);
ctdb_set_call(ctdb_db, brl_ctdb_locktest_func, FUNC_BRL_LOCKTEST);
ctdb_set_call(ctdb_db, brl_ctdb_close_func, FUNC_BRL_CLOSE);
}

File diff suppressed because it is too large Load Diff

View File

@ -1,132 +0,0 @@
/*
common commandline code to ctdb test tools
Copyright (C) Andrew Tridgell 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "lib/events/events.h"
#include "system/filesys.h"
#include "popt.h"
#include "../include/ctdb.h"
#include "../include/ctdb_private.h"
/* Handle common command line options for ctdb test progs
*/
static struct {
const char *socketname;
int torture;
const char *events;
} ctdb_cmdline = {
.socketname = CTDB_PATH,
.torture = 0,
};
enum {OPT_EVENTSYSTEM=1};
static void ctdb_cmdline_callback(poptContext con,
enum poptCallbackReason reason,
const struct poptOption *opt,
const char *arg, const void *data)
{
switch (opt->val) {
case OPT_EVENTSYSTEM:
event_set_default_backend(arg);
break;
}
}
struct poptOption popt_ctdb_cmdline[] = {
{ NULL, 0, POPT_ARG_CALLBACK, (void *)ctdb_cmdline_callback },
{ "socket", 0, POPT_ARG_STRING, &ctdb_cmdline.socketname, 0, "local socket name", "filename" },
{ "debug", 'd', POPT_ARG_INT, &LogLevel, 0, "debug level"},
{ "torture", 0, POPT_ARG_NONE, &ctdb_cmdline.torture, 0, "enable nastiness in library", NULL },
{ "events", 0, POPT_ARG_STRING, NULL, OPT_EVENTSYSTEM, "event system", NULL },
{ NULL }
};
/*
startup daemon side of ctdb according to command line options
*/
struct ctdb_context *ctdb_cmdline_init(struct event_context *ev)
{
struct ctdb_context *ctdb;
int ret;
/* initialise ctdb */
ctdb = ctdb_init(ev);
if (ctdb == NULL) {
printf("Failed to init ctdb\n");
exit(1);
}
if (ctdb_cmdline.torture) {
ctdb_set_flags(ctdb, CTDB_FLAG_TORTURE);
}
/* tell ctdb the socket address */
ret = ctdb_set_socketname(ctdb, ctdb_cmdline.socketname);
if (ret == -1) {
printf("ctdb_set_socketname failed - %s\n", ctdb_errstr(ctdb));
exit(1);
}
return ctdb;
}
/*
startup a client only ctdb context
*/
struct ctdb_context *ctdb_cmdline_client(struct event_context *ev)
{
struct ctdb_context *ctdb;
int ret;
/* initialise ctdb */
ctdb = ctdb_init(ev);
if (ctdb == NULL) {
fprintf(stderr, "Failed to init ctdb\n");
exit(1);
}
/* tell ctdb the socket address */
ret = ctdb_set_socketname(ctdb, ctdb_cmdline.socketname);
if (ret == -1) {
fprintf(stderr, "ctdb_set_socketname failed - %s\n", ctdb_errstr(ctdb));
exit(1);
}
ret = ctdb_socket_connect(ctdb);
if (ret != 0) {
fprintf(stderr, __location__ " Failed to connect to daemon\n");
talloc_free(ctdb);
return NULL;
}
/* get our vnn */
ctdb->vnn = ctdb_ctrl_getvnn(ctdb, timeval_zero(), CTDB_CURRENT_NODE);
if (ctdb->vnn == (uint32_t)-1) {
DEBUG(0,(__location__ " Failed to get ctdb vnn\n"));
talloc_free(ctdb);
return NULL;
}
return ctdb;
}

View File

@ -1,338 +0,0 @@
/*
ctdb database library
Utility functions to read/write blobs of data from a file descriptor
and handle the case where we might need multiple read/writes to get all the
data.
Copyright (C) Andrew Tridgell 2006
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "../tdb/include/tdb.h"
#include "lib/events/events.h"
#include "../lib/util/dlinklist.h"
#include "system/network.h"
#include "system/filesys.h"
#include "../include/ctdb_private.h"
#include "../include/ctdb.h"
/* structures for packet queueing - see common/ctdb_io.c */
struct ctdb_partial {
uint8_t *data;
uint32_t length;
};
struct ctdb_queue_pkt {
struct ctdb_queue_pkt *next, *prev;
uint8_t *data;
uint32_t length;
uint32_t full_length;
};
struct ctdb_queue {
struct ctdb_context *ctdb;
struct ctdb_partial partial; /* partial input packet */
struct ctdb_queue_pkt *out_queue;
struct fd_event *fde;
int fd;
size_t alignment;
void *private_data;
ctdb_queue_cb_fn_t callback;
};
/*
called when an incoming connection is readable
*/
static void queue_io_read(struct ctdb_queue *queue)
{
int num_ready = 0;
ssize_t nread;
uint8_t *data, *data_base;
if (ioctl(queue->fd, FIONREAD, &num_ready) != 0) {
return;
}
if (num_ready == 0) {
/* the descriptor has been closed */
goto failed;
}
queue->partial.data = talloc_realloc(queue, queue->partial.data,
uint8_t,
num_ready + queue->partial.length);
if (queue->partial.data == NULL) {
DEBUG(0,("read error alloc failed for %u\n",
num_ready + queue->partial.length));
goto failed;
}
nread = read(queue->fd, queue->partial.data + queue->partial.length, num_ready);
if (nread <= 0) {
DEBUG(0,("read error nread=%d\n", (int)nread));
goto failed;
}
data = queue->partial.data;
nread += queue->partial.length;
queue->partial.data = NULL;
queue->partial.length = 0;
if (nread >= 4 && *(uint32_t *)data == nread) {
/* it is the responsibility of the incoming packet
function to free 'data' */
queue->callback(data, nread, queue->private_data);
return;
}
data_base = data;
while (nread >= 4 && *(uint32_t *)data <= nread) {
/* we have at least one packet */
uint8_t *d2;
uint32_t len;
len = *(uint32_t *)data;
if (len == 0) {
/* bad packet! treat as EOF */
DEBUG(0,("Invalid packet of length 0\n"));
goto failed;
}
d2 = (uint8_t *)talloc_memdup(queue, data, len);
if (d2 == NULL) {
DEBUG(0,("read error memdup failed for %u\n", len));
/* sigh */
goto failed;
}
queue->callback(d2, len, queue->private_data);
data += len;
nread -= len;
}
if (nread > 0) {
/* we have only part of a packet */
if (data_base == data) {
queue->partial.data = data;
queue->partial.length = nread;
} else {
queue->partial.data = (uint8_t *)talloc_memdup(queue, data, nread);
if (queue->partial.data == NULL) {
DEBUG(0,("read error memdup partial failed for %u\n",
(unsigned)nread));
goto failed;
}
queue->partial.length = nread;
talloc_free(data_base);
}
return;
}
talloc_free(data_base);
return;
failed:
queue->callback(NULL, 0, queue->private_data);
}
/* used when an event triggers a dead queue */
static void queue_dead(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data)
{
struct ctdb_queue *queue = talloc_get_type(private_data, struct ctdb_queue);
queue->callback(NULL, 0, queue->private_data);
}
/*
called when an incoming connection is writeable
*/
static void queue_io_write(struct ctdb_queue *queue)
{
while (queue->out_queue) {
struct ctdb_queue_pkt *pkt = queue->out_queue;
ssize_t n;
if (queue->ctdb->flags & CTDB_FLAG_TORTURE) {
n = write(queue->fd, pkt->data, 1);
} else {
n = write(queue->fd, pkt->data, pkt->length);
}
if (n == -1 && errno != EAGAIN && errno != EWOULDBLOCK) {
if (pkt->length != pkt->full_length) {
/* partial packet sent - we have to drop it */
DLIST_REMOVE(queue->out_queue, pkt);
talloc_free(pkt);
}
talloc_free(queue->fde);
queue->fde = NULL;
queue->fd = -1;
event_add_timed(queue->ctdb->ev, queue, timeval_zero(),
queue_dead, queue);
return;
}
if (n <= 0) return;
if (n != pkt->length) {
pkt->length -= n;
pkt->data += n;
return;
}
DLIST_REMOVE(queue->out_queue, pkt);
talloc_free(pkt);
}
EVENT_FD_NOT_WRITEABLE(queue->fde);
}
/*
called when an incoming connection is readable or writeable
*/
static void queue_io_handler(struct event_context *ev, struct fd_event *fde,
uint16_t flags, void *private_data)
{
struct ctdb_queue *queue = talloc_get_type(private_data, struct ctdb_queue);
if (flags & EVENT_FD_READ) {
queue_io_read(queue);
} else {
queue_io_write(queue);
}
}
/*
queue a packet for sending
*/
int ctdb_queue_send(struct ctdb_queue *queue, uint8_t *data, uint32_t length)
{
struct ctdb_queue_pkt *pkt;
uint32_t length2, full_length;
if (queue->alignment) {
/* enforce the length and alignment rules from the tcp packet allocator */
length2 = (length+(queue->alignment-1)) & ~(queue->alignment-1);
*(uint32_t *)data = length2;
} else {
length2 = length;
}
if (length2 != length) {
memset(data+length, 0, length2-length);
}
full_length = length2;
/* if the queue is empty then try an immediate write, avoiding
queue overhead. This relies on non-blocking sockets */
if (queue->out_queue == NULL && queue->fd != -1 &&
!(queue->ctdb->flags & CTDB_FLAG_TORTURE)) {
ssize_t n = write(queue->fd, data, length2);
if (n == -1 && errno != EAGAIN && errno != EWOULDBLOCK) {
talloc_free(queue->fde);
queue->fde = NULL;
queue->fd = -1;
event_add_timed(queue->ctdb->ev, queue, timeval_zero(),
queue_dead, queue);
/* yes, we report success, as the dead node is
handled via a separate event */
return 0;
}
if (n > 0) {
data += n;
length2 -= n;
}
if (length2 == 0) return 0;
}
pkt = talloc(queue, struct ctdb_queue_pkt);
CTDB_NO_MEMORY(queue->ctdb, pkt);
pkt->data = (uint8_t *)talloc_memdup(pkt, data, length2);
CTDB_NO_MEMORY(queue->ctdb, pkt->data);
pkt->length = length2;
pkt->full_length = full_length;
if (queue->out_queue == NULL && queue->fd != -1) {
EVENT_FD_WRITEABLE(queue->fde);
}
DLIST_ADD_END(queue->out_queue, pkt, struct ctdb_queue_pkt *);
return 0;
}
/*
setup the fd used by the queue
*/
int ctdb_queue_set_fd(struct ctdb_queue *queue, int fd)
{
queue->fd = fd;
talloc_free(queue->fde);
queue->fde = NULL;
if (fd != -1) {
queue->fde = event_add_fd(queue->ctdb->ev, queue, fd, EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
queue_io_handler, queue);
if (queue->fde == NULL) {
return -1;
}
if (queue->out_queue) {
EVENT_FD_WRITEABLE(queue->fde);
}
}
return 0;
}
/*
setup a packet queue on a socket
*/
struct ctdb_queue *ctdb_queue_setup(struct ctdb_context *ctdb,
TALLOC_CTX *mem_ctx, int fd, int alignment,
ctdb_queue_cb_fn_t callback,
void *private_data)
{
struct ctdb_queue *queue;
queue = talloc_zero(mem_ctx, struct ctdb_queue);
CTDB_NO_MEMORY_NULL(ctdb, queue);
queue->ctdb = ctdb;
queue->fd = fd;
queue->alignment = alignment;
queue->private_data = private_data;
queue->callback = callback;
if (fd != -1) {
if (ctdb_queue_set_fd(queue, fd) != 0) {
talloc_free(queue);
return NULL;
}
}
return queue;
}

View File

@ -1,177 +0,0 @@
/*
ctdb ltdb code
Copyright (C) Andrew Tridgell 2006
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "../tdb/include/tdb.h"
#include "system/network.h"
#include "system/filesys.h"
#include "../include/ctdb_private.h"
#include "tdb_wrap.h"
#include "../lib/util/dlinklist.h"
/*
find an attached ctdb_db handle given a name
*/
struct ctdb_db_context *ctdb_db_handle(struct ctdb_context *ctdb, const char *name)
{
struct ctdb_db_context *tmp_db;
for (tmp_db=ctdb->db_list;tmp_db;tmp_db=tmp_db->next) {
if (strcmp(name, tmp_db->db_name) == 0) {
return tmp_db;
}
}
return NULL;
}
/*
return the lmaster given a key
*/
uint32_t ctdb_lmaster(struct ctdb_context *ctdb, const TDB_DATA *key)
{
uint32_t idx, lmaster;
idx = ctdb_hash(key) % ctdb->vnn_map->size;
lmaster = ctdb->vnn_map->map[idx];
return lmaster;
}
/*
construct an initial header for a record with no ltdb header yet
*/
static void ltdb_initial_header(struct ctdb_db_context *ctdb_db,
TDB_DATA key,
struct ctdb_ltdb_header *header)
{
header->rsn = 0;
/* initial dmaster is the lmaster */
header->dmaster = ctdb_lmaster(ctdb_db->ctdb, &key);
header->laccessor = header->dmaster;
header->lacount = 0;
}
/*
fetch a record from the ltdb, separating out the header information
and returning the body of the record. A valid (initial) header is
returned if the record is not present
*/
int ctdb_ltdb_fetch(struct ctdb_db_context *ctdb_db,
TDB_DATA key, struct ctdb_ltdb_header *header,
TALLOC_CTX *mem_ctx, TDB_DATA *data)
{
TDB_DATA rec;
struct ctdb_context *ctdb = ctdb_db->ctdb;
rec = tdb_fetch(ctdb_db->ltdb->tdb, key);
if (rec.dsize < sizeof(*header)) {
TDB_DATA d2;
/* return an initial header */
if (rec.dptr) free(rec.dptr);
if (ctdb->vnn_map == NULL) {
/* called from the client */
ZERO_STRUCTP(data);
header->dmaster = (uint32_t)-1;
return -1;
}
ltdb_initial_header(ctdb_db, key, header);
ZERO_STRUCT(d2);
if (data) {
*data = d2;
}
ctdb_ltdb_store(ctdb_db, key, header, d2);
return 0;
}
*header = *(struct ctdb_ltdb_header *)rec.dptr;
if (data) {
data->dsize = rec.dsize - sizeof(struct ctdb_ltdb_header);
data->dptr = (unsigned char *)talloc_memdup(mem_ctx,
sizeof(struct ctdb_ltdb_header)+rec.dptr,
data->dsize);
}
free(rec.dptr);
if (data) {
CTDB_NO_MEMORY(ctdb, data->dptr);
}
return 0;
}
/*
fetch a record from the ltdb, separating out the header information
and returning the body of the record. A valid (initial) header is
returned if the record is not present
*/
int ctdb_ltdb_store(struct ctdb_db_context *ctdb_db, TDB_DATA key,
struct ctdb_ltdb_header *header, TDB_DATA data)
{
struct ctdb_context *ctdb = ctdb_db->ctdb;
TDB_DATA rec;
int ret;
if (ctdb->flags & CTDB_FLAG_TORTURE) {
struct ctdb_ltdb_header *h2;
rec = tdb_fetch(ctdb_db->ltdb->tdb, key);
h2 = (struct ctdb_ltdb_header *)rec.dptr;
if (rec.dptr && rec.dsize >= sizeof(h2) && h2->rsn > header->rsn) {
DEBUG(0,("RSN regression! %llu %llu\n",
(unsigned long long)h2->rsn, (unsigned long long)header->rsn));
}
if (rec.dptr) free(rec.dptr);
}
rec.dsize = sizeof(*header) + data.dsize;
rec.dptr = (unsigned char *)talloc_size(ctdb, rec.dsize);
CTDB_NO_MEMORY(ctdb, rec.dptr);
memcpy(rec.dptr, header, sizeof(*header));
memcpy(rec.dptr + sizeof(*header), data.dptr, data.dsize);
ret = tdb_store(ctdb_db->ltdb->tdb, key, rec, TDB_REPLACE);
talloc_free(rec.dptr);
return ret;
}
/*
lock a record in the ltdb, given a key
*/
int ctdb_ltdb_lock(struct ctdb_db_context *ctdb_db, TDB_DATA key)
{
return tdb_chainlock(ctdb_db->ltdb->tdb, key);
}
/*
unlock a record in the ltdb, given a key
*/
int ctdb_ltdb_unlock(struct ctdb_db_context *ctdb_db, TDB_DATA key)
{
int ret = tdb_chainunlock(ctdb_db->ltdb->tdb, key);
if (ret != 0) {
DEBUG(0,("tdb_chainunlock failed\n"));
}
return ret;
}

View File

@ -1,111 +0,0 @@
/*
ctdb_message protocol code
Copyright (C) Andrew Tridgell 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
/*
see http://wiki.samba.org/index.php/Samba_%26_Clustering for
protocol design and packet details
*/
#include "includes.h"
#include "../tdb/include/tdb.h"
#include "system/network.h"
#include "system/filesys.h"
#include "../include/ctdb_private.h"
#include "../lib/util/dlinklist.h"
/*
this dispatches the messages to the registered ctdb message handler
*/
int ctdb_dispatch_message(struct ctdb_context *ctdb, uint64_t srvid, TDB_DATA data)
{
struct ctdb_message_list *ml;
for (ml=ctdb->message_list;ml;ml=ml->next) {
if (ml->srvid == srvid || ml->srvid == CTDB_SRVID_ALL) {
ml->message_handler(ctdb, srvid, data, ml->message_private);
}
}
return 0;
}
/*
called when a CTDB_REQ_MESSAGE packet comes in
*/
void ctdb_request_message(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
{
struct ctdb_req_message *c = (struct ctdb_req_message *)hdr;
TDB_DATA data;
data.dptr = &c->data[0];
data.dsize = c->datalen;
ctdb_dispatch_message(ctdb, c->srvid, data);
}
/*
when a client goes away, we need to remove its srvid handler from the list
*/
static int message_handler_destructor(struct ctdb_message_list *m)
{
DLIST_REMOVE(m->ctdb->message_list, m);
return 0;
}
/*
setup handler for receipt of ctdb messages from ctdb_send_message()
*/
int ctdb_register_message_handler(struct ctdb_context *ctdb,
TALLOC_CTX *mem_ctx,
uint64_t srvid,
ctdb_message_fn_t handler,
void *private_data)
{
struct ctdb_message_list *m;
m = talloc(mem_ctx, struct ctdb_message_list);
CTDB_NO_MEMORY(ctdb, m);
m->ctdb = ctdb;
m->srvid = srvid;
m->message_handler = handler;
m->message_private = private_data;
DLIST_ADD(ctdb->message_list, m);
talloc_set_destructor(m, message_handler_destructor);
return 0;
}
/*
setup handler for receipt of ctdb messages from ctdb_send_message()
*/
int ctdb_deregister_message_handler(struct ctdb_context *ctdb, uint64_t srvid, void *private_data)
{
struct ctdb_message_list *m;
for (m=ctdb->message_list;m;m=m->next) {
if (m->srvid == srvid && m->message_private == private_data) {
talloc_free(m);
return 0;
}
}
return -1;
}

View File

@ -1,284 +0,0 @@
/*
ctdb utility code
Copyright (C) Andrew Tridgell 2006
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "../tdb/include/tdb.h"
#include "system/network.h"
#include "system/filesys.h"
#include "../include/ctdb_private.h"
int LogLevel;
/*
return error string for last error
*/
const char *ctdb_errstr(struct ctdb_context *ctdb)
{
return ctdb->err_msg;
}
/*
remember an error message
*/
void ctdb_set_error(struct ctdb_context *ctdb, const char *fmt, ...)
{
va_list ap;
talloc_free(ctdb->err_msg);
va_start(ap, fmt);
ctdb->err_msg = talloc_vasprintf(ctdb, fmt, ap);
DEBUG(0,("ctdb error: %s\n", ctdb->err_msg));
va_end(ap);
}
/*
a fatal internal error occurred - no hope for recovery
*/
_NORETURN_ void ctdb_fatal(struct ctdb_context *ctdb, const char *msg)
{
DEBUG(0,("ctdb fatal error: %s\n", msg));
abort();
}
/*
parse a IP:port pair
*/
int ctdb_parse_address(struct ctdb_context *ctdb,
TALLOC_CTX *mem_ctx, const char *str,
struct ctdb_address *address)
{
struct servent *se;
setservent(0);
se = getservbyname("ctdb", "tcp");
endservent();
address->address = talloc_strdup(mem_ctx, str);
if (se == NULL) {
address->port = CTDB_PORT;
} else {
address->port = ntohs(se->s_port);
}
return 0;
}
/*
check if two addresses are the same
*/
bool ctdb_same_address(struct ctdb_address *a1, struct ctdb_address *a2)
{
return strcmp(a1->address, a2->address) == 0 && a1->port == a2->port;
}
/*
hash function for mapping data to a VNN - taken from tdb
*/
uint32_t ctdb_hash(const TDB_DATA *key)
{
uint32_t value; /* Used to compute the hash value. */
uint32_t i; /* Used to cycle through random values. */
/* Set the initial value from the key size. */
for (value = 0x238F13AF * key->dsize, i=0; i < key->dsize; i++)
value = (value + (key->dptr[i] << (i*5 % 24)));
return (1103515243 * value + 12345);
}
/*
a type checking varient of idr_find
*/
static void *_idr_find_type(struct idr_context *idp, int id, const char *type, const char *location)
{
void *p = idr_find(idp, id);
if (p && talloc_check_name(p, type) == NULL) {
DEBUG(0,("%s idr_find_type expected type %s but got %s\n",
location, type, talloc_get_name(p)));
return NULL;
}
return p;
}
/*
update a max latency number
*/
void ctdb_latency(double *latency, struct timeval t)
{
double l = timeval_elapsed(&t);
if (l > *latency) {
*latency = l;
}
}
uint32_t ctdb_reqid_new(struct ctdb_context *ctdb, void *state)
{
uint32_t id;
id = ctdb->idr_cnt++ & 0xFFFF;
id |= (idr_get_new(ctdb->idr, state, 0xFFFF)<<16);
return id;
}
void *_ctdb_reqid_find(struct ctdb_context *ctdb, uint32_t reqid, const char *type, const char *location)
{
void *p;
p = _idr_find_type(ctdb->idr, (reqid>>16)&0xFFFF, type, location);
if (p == NULL) {
DEBUG(0, ("Could not find idr:%u\n",reqid));
}
return p;
}
void ctdb_reqid_remove(struct ctdb_context *ctdb, uint32_t reqid)
{
int ret;
ret = idr_remove(ctdb->idr, (reqid>>16)&0xFFFF);
if (ret != 0) {
DEBUG(0, ("Removing idr that does not exist\n"));
}
}
/*
form a ctdb_rec_data record from a key/data pair
*/
struct ctdb_rec_data *ctdb_marshall_record(TALLOC_CTX *mem_ctx, uint32_t reqid, TDB_DATA key, TDB_DATA data)
{
size_t length;
struct ctdb_rec_data *d;
length = offsetof(struct ctdb_rec_data, data) + key.dsize + data.dsize;
d = (struct ctdb_rec_data *)talloc_size(mem_ctx, length);
if (d == NULL) {
return NULL;
}
d->length = length;
d->reqid = reqid;
d->keylen = key.dsize;
d->datalen = data.dsize;
memcpy(&d->data[0], key.dptr, key.dsize);
memcpy(&d->data[key.dsize], data.dptr, data.dsize);
return d;
}
#if HAVE_SCHED_H
#include <sched.h>
#endif
/*
if possible, make this task real time
*/
void ctdb_set_scheduler(struct ctdb_context *ctdb)
{
#if HAVE_SCHED_SETSCHEDULER
struct sched_param p;
if (ctdb->saved_scheduler_param == NULL) {
ctdb->saved_scheduler_param = talloc_size(ctdb, sizeof(p));
}
if (sched_getparam(0, (struct sched_param *)ctdb->saved_scheduler_param) == -1) {
DEBUG(0,("Unable to get old scheduler params\n"));
return;
}
p = *(struct sched_param *)ctdb->saved_scheduler_param;
p.sched_priority = 1;
if (sched_setscheduler(0, SCHED_FIFO, &p) == -1) {
DEBUG(0,("Unable to set scheduler to SCHED_FIFO (%s)\n",
strerror(errno)));
} else {
DEBUG(0,("Set scheduler to SCHED_FIFO\n"));
}
#endif
}
/*
restore previous scheduler parameters
*/
void ctdb_restore_scheduler(struct ctdb_context *ctdb)
{
#if HAVE_SCHED_SETSCHEDULER
if (ctdb->saved_scheduler_param == NULL) {
ctdb_fatal(ctdb, "No saved scheduler parameters\n");
}
if (sched_setscheduler(0, SCHED_OTHER, (struct sched_param *)ctdb->saved_scheduler_param) == -1) {
ctdb_fatal(ctdb, "Unable to restore old scheduler parameters\n");
}
#endif
}
void set_nonblocking(int fd)
{
unsigned v;
v = fcntl(fd, F_GETFL, 0);
fcntl(fd, F_SETFL, v | O_NONBLOCK);
}
void set_close_on_exec(int fd)
{
unsigned v;
v = fcntl(fd, F_GETFD, 0);
fcntl(fd, F_SETFD, v | FD_CLOEXEC);
}
/*
parse a ip:port pair
*/
bool parse_ip_port(const char *s, struct sockaddr_in *ip)
{
const char *p;
char *endp = NULL;
unsigned port;
char buf[16];
ip->sin_family = AF_INET;
p = strchr(s, ':');
if (p == NULL) {
return false;
}
if (p - s > 15) {
return false;
}
port = strtoul(p+1, &endp, 10);
if (endp == NULL || *endp != 0) {
/* trailing garbage */
return false;
}
ip->sin_port = htons(port);
strlcpy(buf, s, 1+p-s);
if (inet_aton(buf, &ip->sin_addr) == 0) {
return false;
}
return true;
}

View File

@ -1,385 +0,0 @@
/*
ctdb recovery code
Copyright (C) Ronnie Sahlberg 2007
Copyright (C) Andrew Tridgell 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "system/network.h"
#include "system/filesys.h"
#include "system/wait.h"
#include "../include/ctdb_private.h"
#include "lib/events/events.h"
#include <net/ethernet.h>
#include <net/if_arp.h>
/*
send gratuitous arp reply after we have taken over an ip address
saddr is the address we are trying to claim
iface is the interface name we will be using to claim the address
*/
int ctdb_sys_send_arp(const struct sockaddr_in *saddr, const char *iface)
{
int s, ret;
struct sockaddr sa;
struct ether_header *eh;
struct arphdr *ah;
struct ifreq if_hwaddr;
unsigned char buffer[64]; /*minimum eth frame size */
char *ptr;
/* for now, we only handle AF_INET addresses */
if (saddr->sin_family != AF_INET) {
DEBUG(0,(__location__ " not an ipv4 address (family is %u)\n", saddr->sin_family));
return -1;
}
s = socket(AF_INET, SOCK_PACKET, htons(ETHERTYPE_ARP));
if (s == -1){
DEBUG(0,(__location__ " failed to open raw socket\n"));
return -1;
}
/* get the mac address */
strcpy(if_hwaddr.ifr_name, iface);
ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
if ( ret < 0 ) {
close(s);
DEBUG(0,(__location__ " ioctl failed\n"));
return -1;
}
if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) {
DEBUG(3,("Ignoring loopback arp request\n"));
close(s);
return 0;
}
if (if_hwaddr.ifr_hwaddr.sa_family != AF_LOCAL) {
close(s);
errno = EINVAL;
DEBUG(0,(__location__ " not an ethernet address family (0x%x)\n",
if_hwaddr.ifr_hwaddr.sa_family));
return -1;
}
memset(buffer, 0 , 64);
eh = (struct ether_header *)buffer;
memset(eh->ether_dhost, 0xff, ETH_ALEN);
memcpy(eh->ether_shost, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
eh->ether_type = htons(ETHERTYPE_ARP);
ah = (struct arphdr *)&buffer[sizeof(struct ether_header)];
ah->ar_hrd = htons(ARPHRD_ETHER);
ah->ar_pro = htons(ETH_P_IP);
ah->ar_hln = ETH_ALEN;
ah->ar_pln = 4;
/* send a gratious arp */
ah->ar_op = htons(ARPOP_REQUEST);
ptr = (char *)&ah[1];
memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
ptr+=ETH_ALEN;
memcpy(ptr, &saddr->sin_addr, 4);
ptr+=4;
memset(ptr, 0, ETH_ALEN);
ptr+=ETH_ALEN;
memcpy(ptr, &saddr->sin_addr, 4);
ptr+=4;
strncpy(sa.sa_data, iface, sizeof(sa.sa_data));
ret = sendto(s, buffer, 64, 0, &sa, sizeof(sa));
if (ret < 0 ){
close(s);
DEBUG(0,(__location__ " failed sendto\n"));
return -1;
}
/* send unsolicited arp reply broadcast */
ah->ar_op = htons(ARPOP_REPLY);
ptr = (char *)&ah[1];
memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
ptr+=ETH_ALEN;
memcpy(ptr, &saddr->sin_addr, 4);
ptr+=4;
memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
ptr+=ETH_ALEN;
memcpy(ptr, &saddr->sin_addr, 4);
ptr+=4;
strncpy(sa.sa_data, iface, sizeof(sa.sa_data));
ret = sendto(s, buffer, 64, 0, &sa, sizeof(sa));
if (ret < 0 ){
DEBUG(0,(__location__ " failed sendto\n"));
return -1;
}
close(s);
return 0;
}
/*
uint16 checksum for n bytes
*/
static uint32_t uint16_checksum(uint16_t *data, size_t n)
{
uint32_t sum=0;
while (n>=2) {
sum += (uint32_t)ntohs(*data);
data++;
n -= 2;
}
if (n == 1) {
sum += (uint32_t)ntohs(*(uint8_t *)data);
}
return sum;
}
/*
simple TCP checksum - assumes data is multiple of 2 bytes long
*/
static uint16_t tcp_checksum(uint16_t *data, size_t n, struct iphdr *ip)
{
uint32_t sum = uint16_checksum(data, n);
uint16_t sum2;
sum += uint16_checksum((uint16_t *)&ip->saddr, sizeof(ip->saddr));
sum += uint16_checksum((uint16_t *)&ip->daddr, sizeof(ip->daddr));
sum += ip->protocol + n;
sum = (sum & 0xFFFF) + (sum >> 16);
sum = (sum & 0xFFFF) + (sum >> 16);
sum2 = htons(sum);
sum2 = ~sum2;
if (sum2 == 0) {
return 0xFFFF;
}
return sum2;
}
/*
Send tcp segment from the specified IP/port to the specified
destination IP/port.
This is used to trigger the receiving host into sending its own ACK,
which should trigger early detection of TCP reset by the client
after IP takeover
This can also be used to send RST segments (if rst is true) and also
if correct seq and ack numbers are provided.
*/
int ctdb_sys_send_tcp(const struct sockaddr_in *dest,
const struct sockaddr_in *src,
uint32_t seq, uint32_t ack, int rst)
{
int s, ret;
uint32_t one = 1;
struct {
struct iphdr ip;
struct tcphdr tcp;
} pkt;
/* for now, we only handle AF_INET addresses */
if (src->sin_family != AF_INET || dest->sin_family != AF_INET) {
DEBUG(0,(__location__ " not an ipv4 address\n"));
return -1;
}
s = socket(AF_INET, SOCK_RAW, htons(IPPROTO_RAW));
if (s == -1) {
DEBUG(0,(__location__ " failed to open raw socket (%s)\n",
strerror(errno)));
return -1;
}
ret = setsockopt(s, SOL_IP, IP_HDRINCL, &one, sizeof(one));
if (ret != 0) {
DEBUG(0,(__location__ " failed to setup IP headers (%s)\n",
strerror(errno)));
close(s);
return -1;
}
ZERO_STRUCT(pkt);
pkt.ip.version = 4;
pkt.ip.ihl = sizeof(pkt.ip)/4;
pkt.ip.tot_len = htons(sizeof(pkt));
pkt.ip.ttl = 255;
pkt.ip.protocol = IPPROTO_TCP;
pkt.ip.saddr = src->sin_addr.s_addr;
pkt.ip.daddr = dest->sin_addr.s_addr;
pkt.ip.check = 0;
pkt.tcp.source = src->sin_port;
pkt.tcp.dest = dest->sin_port;
pkt.tcp.seq = seq;
pkt.tcp.ack_seq = ack;
pkt.tcp.ack = 1;
if (rst) {
pkt.tcp.rst = 1;
}
pkt.tcp.doff = sizeof(pkt.tcp)/4;
pkt.tcp.window = htons(1234);
pkt.tcp.check = tcp_checksum((uint16_t *)&pkt.tcp, sizeof(pkt.tcp), &pkt.ip);
ret = sendto(s, &pkt, sizeof(pkt), 0, dest, sizeof(*dest));
if (ret != sizeof(pkt)) {
DEBUG(0,(__location__ " failed sendto (%s)\n", strerror(errno)));
close(s);
return -1;
}
close(s);
return 0;
}
/*
see if we currently have an interface with the given IP
we try to bind to it, and if that fails then we don't have that IP
on an interface
*/
bool ctdb_sys_have_ip(const char *ip)
{
struct sockaddr_in sin;
int s;
int ret;
sin.sin_port = 0;
inet_aton(ip, &sin.sin_addr);
sin.sin_family = AF_INET;
s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
if (s == -1) {
return false;
}
ret = bind(s, (struct sockaddr *)&sin, sizeof(sin));
close(s);
return ret == 0;
}
static void ctdb_wait_handler(struct event_context *ev, struct timed_event *te,
struct timeval yt, void *p)
{
uint32_t *timed_out = (uint32_t *)p;
(*timed_out) = 1;
}
/* This function is used to kill (RST) the specified tcp connection.
This function is not asynchronous and will block until the operation
was successful or it timesout.
*/
int ctdb_sys_kill_tcp(struct event_context *ev,
const struct sockaddr_in *dst,
const struct sockaddr_in *src)
{
int s, ret;
uint32_t timedout;
TALLOC_CTX *tmp_ctx = talloc_new(NULL);
#define RCVPKTSIZE 100
char pkt[RCVPKTSIZE];
struct ether_header *eth;
struct iphdr *ip;
struct tcphdr *tcp;
/* Open a socket to capture all traffic */
s=socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
if (s == -1){
DEBUG(0,(__location__ " failed to open raw socket\n"));
return -1;
}
/* We wait for up to 1 second for the ACK coming back */
timedout = 0;
event_add_timed(ev, tmp_ctx, timeval_current_ofs(1, 0), ctdb_wait_handler, &timedout);
/* Send a tickle ack to probe what the real seq/ack numbers are */
ctdb_sys_send_tcp(dst, src, 0, 0, 0);
/* Wait until we either time out or we succeeds in sending the RST */
while (timedout==0) {
event_loop_once(ev);
ret = recv(s, pkt, RCVPKTSIZE, MSG_TRUNC);
if (ret < sizeof(*eth)+sizeof(*ip)) {
continue;
}
/* Ethernet */
eth = (struct ether_header *)pkt;
/* We only want IP packets */
if (ntohs(eth->ether_type) != ETHERTYPE_IP) {
continue;
}
/* IP */
ip = (struct iphdr *)(eth+1);
/* We only want IPv4 packets */
if (ip->version != 4) {
continue;
}
/* Dont look at fragments */
if ((ntohs(ip->frag_off)&0x1fff) != 0) {
continue;
}
/* we only want TCP */
if (ip->protocol != IPPROTO_TCP) {
continue;
}
/* We only want packets sent from the guy we tickled */
if (ip->saddr != dst->sin_addr.s_addr) {
continue;
}
/* We only want packets sent to us */
if (ip->daddr != src->sin_addr.s_addr) {
continue;
}
/* make sure its not a short packet */
if (offsetof(struct tcphdr, ack_seq) + 4 +
(ip->ihl*4) + sizeof(*eth) > ret) {
continue;
}
/* TCP */
tcp = (struct tcphdr *)((ip->ihl*4) + (char *)ip);
/* We only want replies from the port we tickled */
if (tcp->source != dst->sin_port) {
continue;
}
if (tcp->dest != src->sin_port) {
continue;
}
ctdb_sys_send_tcp(dst, src, tcp->ack_seq, tcp->seq, 1);
close(s);
talloc_free(tmp_ctx);
return 0;
}
close(s);
talloc_free(tmp_ctx);
DEBUG(0,(__location__ " timedout waiting for tickle ack reply\n"));
return -1;
}

File diff suppressed because it is too large Load Diff

View File

@ -1,756 +0,0 @@
/* config.h.in. Generated from configure.ac by autoheader. */
/* Whether strndup is broken */
#undef BROKEN_STRNDUP
/* Whether strnlen is broken */
#undef BROKEN_STRNLEN
/* Define to 1 if you have the <acl/libacl.h> header file. */
#undef HAVE_ACL_LIBACL_H
/* Define to 1 if you have the <alloca.h> header file. */
#undef HAVE_ALLOCA_H
/* Define to 1 if you have the <arpa/inet.h> header file. */
#undef HAVE_ARPA_INET_H
/* Define to 1 if you have the `asprintf' function. */
#undef HAVE_ASPRINTF
/* Whether the bool type is available */
#undef HAVE_BOOL
/* Define to 1 if you have the `bzero' function. */
#undef HAVE_BZERO
/* Whether there is a C99 compliant vsnprintf */
#undef HAVE_C99_VSNPRINTF
/* Define to 1 if you have the `chroot' function. */
#undef HAVE_CHROOT
/* Define to 1 if you have the `chsize' function. */
#undef HAVE_CHSIZE
/* Whether or not we have comparison_fn_t */
#undef HAVE_COMPARISON_FN_T
/* Define to 1 if you have the <compat.h> header file. */
#undef HAVE_COMPAT_H
/* Define to 1 if you have the <ctype.h> header file. */
#undef HAVE_CTYPE_H
/* Define to 1 if you have the declaration of `asprintf', and to 0 if you
don't. */
#undef HAVE_DECL_ASPRINTF
/* Define to 1 if you have the declaration of `snprintf', and to 0 if you
don't. */
#undef HAVE_DECL_SNPRINTF
/* Define to 1 if you have the declaration of `vasprintf', and to 0 if you
don't. */
#undef HAVE_DECL_VASPRINTF
/* Define to 1 if you have the declaration of `vsnprintf', and to 0 if you
don't. */
#undef HAVE_DECL_VSNPRINTF
/* Define to 1 if you have the <direct.h> header file. */
#undef HAVE_DIRECT_H
/* Define to 1 if you have the <dirent.h> header file, and it defines `DIR'.
*/
#undef HAVE_DIRENT_H
/* Define to 1 if you have the `dlclose' function. */
#undef HAVE_DLCLOSE
/* Define to 1 if you have the `dlerror' function. */
#undef HAVE_DLERROR
/* Define to 1 if you have the <dlfcn.h> header file. */
#undef HAVE_DLFCN_H
/* Define to 1 if you have the `dlopen' function. */
#undef HAVE_DLOPEN
/* Define to 1 if you have the `dlsym' function. */
#undef HAVE_DLSYM
/* Define to 1 if you have the `epoll_create' function. */
#undef HAVE_EPOLL_CREATE
/* Whether errno() is available */
#undef HAVE_ERRNO_DECL
/* Whether epoll available */
#undef HAVE_EVENTS_EPOLL
/* Define to 1 if you have the <fcntl.h> header file. */
#undef HAVE_FCNTL_H
/* Define to 1 if you have the <float.h> header file. */
#undef HAVE_FLOAT_H
/* Define to 1 if you have the <fnmatch.h> header file. */
#undef HAVE_FNMATCH_H
/* Define to 1 if you have the `ftruncate' function. */
#undef HAVE_FTRUNCATE
/* Whether there is a __FUNCTION__ macro */
#undef HAVE_FUNCTION_MACRO
/* Define to 1 if you have the `getdents' function. */
#undef HAVE_GETDENTS
/* Define to 1 if you have the `getdirentries' function. */
#undef HAVE_GETDIRENTRIES
/* Define to 1 if you have the <getopt.h> header file. */
#undef HAVE_GETOPT_H
/* Define to 1 if you have the `getpagesize' function. */
#undef HAVE_GETPAGESIZE
/* Define to 1 if you have the `getpgrp' function. */
#undef HAVE_GETPGRP
/* Define to 1 if you have the <grp.h> header file. */
#undef HAVE_GRP_H
/* Whether the compiler supports immediate structures */
#undef HAVE_IMMEDIATE_STRUCTURES
/* Define to 1 if you have the <infiniband/verbs.h> header file. */
#undef HAVE_INFINIBAND_VERBS_H
/* Define to 1 if you have the `initgroups' function. */
#undef HAVE_INITGROUPS
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
/* Define to 1 if you have the `ibverbs' library (-libverbs). */
#undef HAVE_LIBIBVERBS
/* Define to 1 if you have the `rdmacm' library (-lrdmacm). */
#undef HAVE_LIBRDMACM
/* Define to 1 if you have the <limits.h> header file. */
#undef HAVE_LIMITS_H
/* Define to 1 if you have the <locale.h> header file. */
#undef HAVE_LOCALE_H
/* Define to 1 if the system has the type `long long'. */
#undef HAVE_LONG_LONG
/* Define to 1 if you have the `lstat' function. */
#undef HAVE_LSTAT
/* Define to 1 if you have the `memcpy' function. */
#undef HAVE_MEMCPY
/* Define to 1 if you have the `memmove' function. */
#undef HAVE_MEMMOVE
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H
/* Define to 1 if you have the `memset' function. */
#undef HAVE_MEMSET
/* Define if target mkdir supports mode option */
#undef HAVE_MKDIR_MODE
/* Define to 1 if you have the `mkdtemp' function. */
#undef HAVE_MKDTEMP
/* Define to 1 if you have the `mktime' function. */
#undef HAVE_MKTIME
/* Define to 1 if you have the `mmap' function. */
#undef HAVE_MMAP
/* Define to 1 if you have the <mntent.h> header file. */
#undef HAVE_MNTENT_H
/* Define to 1 if you have the <ndir.h> header file, and it defines `DIR'. */
#undef HAVE_NDIR_H
/* Define to 1 if you have the <netdb.h> header file. */
#undef HAVE_NETDB_H
/* Define to 1 if you have the <netinet/in.h> header file. */
#undef HAVE_NETINET_IN_H
/* Define to 1 if you have the <netinet/in_ip.h> header file. */
#undef HAVE_NETINET_IN_IP_H
/* Define to 1 if you have the <netinet/in_systm.h> header file. */
#undef HAVE_NETINET_IN_SYSTM_H
/* Define to 1 if you have the <netinet/ip.h> header file. */
#undef HAVE_NETINET_IP_H
/* Define to 1 if you have the <netinet/tcp.h> header file. */
#undef HAVE_NETINET_TCP_H
/* usability of net/if.h */
#undef HAVE_NET_IF_H
/* Whether the open(2) accepts O_DIRECT */
#undef HAVE_OPEN_O_DIRECT
/* Define to 1 if you have the `pipe' function. */
#undef HAVE_PIPE
/* Define to 1 if you have the <popt.h> header file. */
#undef HAVE_POPT_H
/* Define to 1 if you have the `pread' function. */
#undef HAVE_PREAD
/* Whether pread() is available */
#undef HAVE_PREAD_DECL
/* Define to 1 if you have the `printf' function. */
#undef HAVE_PRINTF
/* Define to 1 if you have the <pwd.h> header file. */
#undef HAVE_PWD_H
/* Define to 1 if you have the `pwrite' function. */
#undef HAVE_PWRITE
/* Whether pwrite() is available */
#undef HAVE_PWRITE_DECL
/* Define to 1 if you have the `rand' function. */
#undef HAVE_RAND
/* Define to 1 if you have the `random' function. */
#undef HAVE_RANDOM
/* Define to 1 if you have the <rdma/rdma_cma.h> header file. */
#undef HAVE_RDMA_RDMA_CMA_H
/* Define to 1 if you have the `rename' function. */
#undef HAVE_RENAME
/* Define to 1 if you have the <sched.h> header file. */
#undef HAVE_SCHED_H
/* Define to 1 if you have the `sched_setscheduler' function. */
#undef HAVE_SCHED_SETSCHEDULER
/* Whether mkstemp is secure */
#undef HAVE_SECURE_MKSTEMP
/* Define to 1 if you have the `setbuffer' function. */
#undef HAVE_SETBUFFER
/* Define to 1 if you have the `setegid' function. */
#undef HAVE_SETEGID
/* Define to 1 if you have the `setenv' function. */
#undef HAVE_SETENV
/* Whether setenv() is available */
#undef HAVE_SETENV_DECL
/* Define to 1 if you have the `seteuid' function. */
#undef HAVE_SETEUID
/* Define to 1 if you have the <setjmp.h> header file. */
#undef HAVE_SETJMP_H
/* Define to 1 if you have the `setlinebuf' function. */
#undef HAVE_SETLINEBUF
/* Define to 1 if you have the `setresgid' function. */
#undef HAVE_SETRESGID
/* Whether setresgid() is available */
#undef HAVE_SETRESGID_DECL
/* Define to 1 if you have the `setresuid' function. */
#undef HAVE_SETRESUID
/* Whether setresuid() is available */
#undef HAVE_SETRESUID_DECL
/* Define to 1 if you have the <shadow.h> header file. */
#undef HAVE_SHADOW_H
/* Whether we have the atomic_t variable type */
#undef HAVE_SIG_ATOMIC_T_TYPE
/* Define to 1 if you have the `snprintf' function. */
#undef HAVE_SNPRINTF
/* Define to 1 if you have the `socketpair' function. */
#undef HAVE_SOCKETPAIR
/* Whether the sockaddr_in struct has a sin_len property */
#undef HAVE_SOCK_SIN_LEN
/* Define to 1 if you have the `srand' function. */
#undef HAVE_SRAND
/* Define to 1 if you have the `srandom' function. */
#undef HAVE_SRANDOM
/* Define to 1 if you have the <standards.h> header file. */
#undef HAVE_STANDARDS_H
/* Define to 1 if you have the <stdarg.h> header file. */
#undef HAVE_STDARG_H
/* Define to 1 if you have the <stdbool.h> header file. */
#undef HAVE_STDBOOL_H
/* Define to 1 if you have the <stdint.h> header file. */
#undef HAVE_STDINT_H
/* Define to 1 if you have the <stdio.h> header file. */
#undef HAVE_STDIO_H
/* Define to 1 if you have the <stdlib.h> header file. */
#undef HAVE_STDLIB_H
/* Define to 1 if you have the `strcasestr' function. */
#undef HAVE_STRCASESTR
/* Define to 1 if you have the `strdup' function. */
#undef HAVE_STRDUP
/* Define to 1 if you have the `strerror' function. */
#undef HAVE_STRERROR
/* Define to 1 if you have the `strftime' function. */
#undef HAVE_STRFTIME
/* Define to 1 if you have the <strings.h> header file. */
#undef HAVE_STRINGS_H
/* Define to 1 if you have the <string.h> header file. */
#undef HAVE_STRING_H
/* Define to 1 if you have the `strlcat' function. */
#undef HAVE_STRLCAT
/* Define to 1 if you have the `strlcpy' function. */
#undef HAVE_STRLCPY
/* Define to 1 if you have the `strndup' function. */
#undef HAVE_STRNDUP
/* Define to 1 if you have the `strnlen' function. */
#undef HAVE_STRNLEN
/* Define to 1 if you have the `strtok_r' function. */
#undef HAVE_STRTOK_R
/* Define to 1 if you have the `strtoll' function. */
#undef HAVE_STRTOLL
/* Define to 1 if you have the `strtoq' function. */
#undef HAVE_STRTOQ
/* Define to 1 if you have the `strtoull' function. */
#undef HAVE_STRTOULL
/* Define to 1 if you have the `strtouq' function. */
#undef HAVE_STRTOUQ
/* Define to 1 if `st_rdev' is member of `struct stat'. */
#undef HAVE_STRUCT_STAT_ST_RDEV
/* Define to 1 if your `struct stat' has `st_rdev'. Deprecated, use
`HAVE_STRUCT_STAT_ST_RDEV' instead. */
#undef HAVE_ST_RDEV
/* Define to 1 if you have the `syslog' function. */
#undef HAVE_SYSLOG
/* Define to 1 if you have the <syslog.h> header file. */
#undef HAVE_SYSLOG_H
/* Define to 1 if you have the <sys/acl.h> header file. */
#undef HAVE_SYS_ACL_H
/* Define to 1 if you have the <sys/capability.h> header file. */
#undef HAVE_SYS_CAPABILITY_H
/* Define to 1 if you have the <sys/dir.h> header file, and it defines `DIR'.
*/
#undef HAVE_SYS_DIR_H
/* Define to 1 if you have the <sys/epoll.h> header file. */
#undef HAVE_SYS_EPOLL_H
/* Define to 1 if you have the <sys/fcntl.h> header file. */
#undef HAVE_SYS_FCNTL_H
/* Define to 1 if you have the <sys/filio.h> header file. */
#undef HAVE_SYS_FILIO_H
/* Define to 1 if you have the <sys/filsys.h> header file. */
#undef HAVE_SYS_FILSYS_H
/* Define to 1 if you have the <sys/fs/s5param.h> header file. */
#undef HAVE_SYS_FS_S5PARAM_H
/* Define to 1 if you have the <sys/id.h> header file. */
#undef HAVE_SYS_ID_H
/* Define to 1 if you have the <sys/ioctl.h> header file. */
#undef HAVE_SYS_IOCTL_H
/* Define to 1 if you have the <sys/ipc.h> header file. */
#undef HAVE_SYS_IPC_H
/* Define to 1 if you have the <sys/mman.h> header file. */
#undef HAVE_SYS_MMAN_H
/* Define to 1 if you have the <sys/mode.h> header file. */
#undef HAVE_SYS_MODE_H
/* Define to 1 if you have the <sys/mount.h> header file. */
#undef HAVE_SYS_MOUNT_H
/* Define to 1 if you have the <sys/ndir.h> header file, and it defines `DIR'.
*/
#undef HAVE_SYS_NDIR_H
/* Define to 1 if you have the <sys/param.h> header file. */
#undef HAVE_SYS_PARAM_H
/* Define to 1 if you have the <sys/priv.h> header file. */
#undef HAVE_SYS_PRIV_H
/* Define to 1 if you have the <sys/resource.h> header file. */
#undef HAVE_SYS_RESOURCE_H
/* Define to 1 if you have the <sys/security.h> header file. */
#undef HAVE_SYS_SECURITY_H
/* Define to 1 if you have the <sys/select.h> header file. */
#undef HAVE_SYS_SELECT_H
/* Define to 1 if you have the <sys/shm.h> header file. */
#undef HAVE_SYS_SHM_H
/* Define to 1 if you have the <sys/socket.h> header file. */
#undef HAVE_SYS_SOCKET_H
/* Define to 1 if you have the <sys/sockio.h> header file. */
#undef HAVE_SYS_SOCKIO_H
/* Define to 1 if you have the <sys/stat.h> header file. */
#undef HAVE_SYS_STAT_H
/* Define to 1 if you have the <sys/syslog.h> header file. */
#undef HAVE_SYS_SYSLOG_H
/* Define to 1 if you have the <sys/termio.h> header file. */
#undef HAVE_SYS_TERMIO_H
/* Define to 1 if you have the <sys/time.h> header file. */
#undef HAVE_SYS_TIME_H
/* Define to 1 if you have the <sys/types.h> header file. */
#undef HAVE_SYS_TYPES_H
/* Define to 1 if you have the <sys/un.h> header file. */
#undef HAVE_SYS_UN_H
/* Define to 1 if you have <sys/wait.h> that is POSIX.1 compatible. */
#undef HAVE_SYS_WAIT_H
/* Define to 1 if you have the <termios.h> header file. */
#undef HAVE_TERMIOS_H
/* Define to 1 if you have the <termio.h> header file. */
#undef HAVE_TERMIO_H
/* Define to 1 if you have the `timegm' function. */
#undef HAVE_TIMEGM
/* Define to 1 if you have the <time.h> header file. */
#undef HAVE_TIME_H
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* Define to 1 if you have the `unsetenv' function. */
#undef HAVE_UNSETENV
/* Define to 1 if you have the `usleep' function. */
#undef HAVE_USLEEP
/* Define to 1 if you have the `utime' function. */
#undef HAVE_UTIME
/* Define to 1 if you have the <utime.h> header file. */
#undef HAVE_UTIME_H
/* Define to 1 if you have the <vararg.h> header file. */
#undef HAVE_VARARG_H
/* Define to 1 if you have the `vasprintf' function. */
#undef HAVE_VASPRINTF
/* Whether va_copy() is available */
#undef HAVE_VA_COPY
/* Whether the C compiler understands volatile */
#undef HAVE_VOLATILE
/* Define to 1 if you have the `vsnprintf' function. */
#undef HAVE_VSNPRINTF
/* Define to 1 if you have the `vsyslog' function. */
#undef HAVE_VSYSLOG
/* Define to 1 if you have the `waitpid' function. */
#undef HAVE_WAITPID
/* Define to 1 if you have the <windows.h> header file. */
#undef HAVE_WINDOWS_H
/* Define to 1 if you have the <winsock2.h> header file. */
#undef HAVE_WINSOCK2_H
/* Define to 1 if you have the <ws2tcpip.h> header file. */
#undef HAVE_WS2TCPIP_H
/* Whether the _Bool type is available */
#undef HAVE__Bool
/* Whether the __VA_ARGS__ macro is available */
#undef HAVE__VA_ARGS__MACRO
/* Define to 1 if you have the `__strtoll' function. */
#undef HAVE___STRTOLL
/* Define to 1 if you have the `__strtoull' function. */
#undef HAVE___STRTOULL
/* Whether __va_copy() is available */
#undef HAVE___VA_COPY
/* Whether there is a __func__ macro */
#undef HAVE_func_MACRO
/* Whether MMAP is broken */
#undef MMAP_BLACKLIST
/* Define to the address where bug reports for this package should be sent. */
#undef PACKAGE_BUGREPORT
/* Define to the full name of this package. */
#undef PACKAGE_NAME
/* Define to the full name and version of this package. */
#undef PACKAGE_STRING
/* Define to the one symbol short name of this package. */
#undef PACKAGE_TARNAME
/* Define to the version of this package. */
#undef PACKAGE_VERSION
/* Whether getpass should be replaced */
#undef REPLACE_GETPASS
/* Whether inet_ntoa should be replaced */
#undef REPLACE_INET_NTOA
/* replace readdir */
#undef REPLACE_READDIR
/* replace readdir using getdents() */
#undef REPLACE_READDIR_GETDENTS
/* replace readdir using getdirentries() */
#undef REPLACE_READDIR_GETDIRENTRIES
/* Whether strptime should be replaced */
#undef REPLACE_STRPTIME
/* Define as the return type of signal handlers (`int' or `void'). */
#undef RETSIGTYPE
/* Whether seekdir returns an int */
#undef SEEKDIR_RETURNS_INT
/* The size of `char', as computed by sizeof. */
#undef SIZEOF_CHAR
/* The size of `int', as computed by sizeof. */
#undef SIZEOF_INT
/* The size of `long', as computed by sizeof. */
#undef SIZEOF_LONG
/* The size of `long long', as computed by sizeof. */
#undef SIZEOF_LONG_LONG
/* The size of `off_t', as computed by sizeof. */
#undef SIZEOF_OFF_T
/* The size of `short', as computed by sizeof. */
#undef SIZEOF_SHORT
/* The size of `size_t', as computed by sizeof. */
#undef SIZEOF_SIZE_T
/* The size of `ssize_t', as computed by sizeof. */
#undef SIZEOF_SSIZE_T
/* The size of `void *', as computed by sizeof. */
#undef SIZEOF_VOID_P
/* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS
/* Whether telldir takes a const pointer */
#undef TELLDIR_TAKES_CONST_DIR
/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
#undef TIME_WITH_SYS_TIME
/* Use infiniband */
#undef USE_INFINIBAND
/* Define to 1 if your processor stores words with the most significant byte
first (like Motorola and SPARC, unlike Intel and VAX). */
#undef WORDS_BIGENDIAN
/* Define to 1 if on AIX 3.
System headers sometimes define this.
We just want to avoid a redefinition error message. */
#ifndef _ALL_SOURCE
# undef _ALL_SOURCE
#endif
/* Number of bits in a file offset, on hosts where this is settable. */
#undef _FILE_OFFSET_BITS
/* Enable GNU extensions on systems that have them. */
#ifndef _GNU_SOURCE
# undef _GNU_SOURCE
#endif
/* Define for large files, on AIX-style hosts. */
#undef _LARGE_FILES
/* Define to 1 if on MINIX. */
#undef _MINIX
#ifndef _OSF_SOURCE
# define _OSF_SOURCE 1
#endif
/* Define to 2 if the system does not provide POSIX.1 features except with
this defined. */
#undef _POSIX_1_SOURCE
/* Whether to enable POSIX support */
#undef _POSIX_C_SOURCE
/* Define to 1 if you need to in order for `stat' and other things to work. */
#undef _POSIX_SOURCE
/* Whether to enable System V compatibility */
#undef _SYSV
#ifndef _XOPEN_SOURCE_EXTENDED
# define _XOPEN_SOURCE_EXTENDED 1
#endif
/* Enable extensions on Solaris. */
#ifndef __EXTENSIONS__
# undef __EXTENSIONS__
#endif
#ifndef _POSIX_PTHREAD_SEMANTICS
# undef _POSIX_PTHREAD_SEMANTICS
#endif
/* Define to `int' if <sys/types.h> doesn't define. */
#undef gid_t
/* Define to `__inline__' or `__inline' if that's what the C compiler
calls it, or to nothing if 'inline' is not supported under any name. */
#ifndef __cplusplus
#undef inline
#endif
/* Define to `unsigned' if <sys/types.h> does not define. */
#undef ino_t
/* Define to `short' if <sys/types.h> does not define. */
#undef int16_t
/* Define to `long' if <sys/types.h> does not define. */
#undef int32_t
/* Define to `long long' if <sys/types.h> does not define. */
#undef int64_t
/* Define to `char' if <sys/types.h> does not define. */
#undef int8_t
/* Define to `unsigned long long' if <sys/types.h> does not define. */
#undef intptr_t
/* Define to `off_t' if <sys/types.h> does not define. */
#undef loff_t
/* Define to `int' if <sys/types.h> does not define. */
#undef mode_t
/* Define to `long int' if <sys/types.h> does not define. */
#undef off_t
/* Define to `loff_t' if <sys/types.h> does not define. */
#undef offset_t
/* Define to `int' if <sys/types.h> does not define. */
#undef pid_t
/* Define to `unsigned long long' if <sys/types.h> does not define. */
#undef ptrdiff_t
/* Define to `unsigned int' if <sys/types.h> does not define. */
#undef size_t
/* Socket length type */
#undef socklen_t
/* Define to `int' if <sys/types.h> does not define. */
#undef ssize_t
/* Define to `int' if <sys/types.h> doesn't define. */
#undef uid_t
/* Define to `unsigned short' if <sys/types.h> does not define. */
#undef uint16_t
/* Define to `unsigned long' if <sys/types.h> does not define. */
#undef uint32_t
/* Define to `unsigned long long' if <sys/types.h> does not define. */
#undef uint64_t
/* Define to `unsigned char' if <sys/types.h> does not define. */
#undef uint8_t
/* Define to `unsigned int' if <sys/types.h> does not define. */
#undef uint_t

View File

@ -1,24 +0,0 @@
##################
[SUBSYSTEM::brlock_ctdb]
PUBLIC_DEPENDENCIES = ctdb
brlock_ctdb_OBJ_FILES = $(ctdbsrcdir)/brlock_ctdb.o
##################
[SUBSYSTEM::opendb_ctdb]
PUBLIC_DEPENDENCIES = ctdb
opendb_ctdb_OBJ_FILES = $(ctdbsrcdir)/opendb_ctdb.o
##################
[SUBSYSTEM::ctdb]
PUBLIC_DEPENDENCIES = TDB_WRAP LIBTALLOC LIBEVENTS
ctdb_OBJ_FILES = $(addprefix $(ctdbsrcdir)/, \
ctdb_cluster.o \
client/ctdb_client.o \
common/ctdb_io.o \
common/ctdb_ltdb.o \
common/ctdb_message.o \
common/ctdb_util.o)

File diff suppressed because it is too large Load Diff

View File

@ -1,68 +0,0 @@
AC_PREREQ(2.50)
AC_DEFUN([AC_CHECK_LIB_EXT], [
AC_CHECK_LIB([$1],[$3],[$4],[$5],[$7])
ac_cv_lib_ext_$1_$3=$ac_cv_lib_$1_$3
])
AC_DEFUN([AC_CHECK_FUNC_EXT], [
AC_CHECK_FUNC([$1],[$3],[$4])
ac_cv_func_ext_$1=$ac_cv_func_$1
])
AC_DEFUN([SMB_MODULE_DEFAULT], [echo -n ""])
AC_DEFUN([SMB_LIBRARY_ENABLE], [echo -n ""])
AC_DEFUN([SMB_EXT_LIB], [echo -n ""])
AC_DEFUN([SMB_ENABLE], [echo -n ""])
AC_INIT(ctdb.h)
AC_CONFIG_SRCDIR([server/ctdbd.c])
case `uname` in
Linux*)
CTDB_SYSTEM_OBJ=common/system_linux.o
CTDB_SCSI_IO=bin/scsi_io
CTDB_PCAP_LDFLAGS=
;;
AIX*)
CTDB_SYSTEM_OBJ=common/system_aix.o
CTDB_SCSI_IO=
CTDB_PCAP_LDFLAGS=-lpcap
;;
*)
echo unknown system cant configure
exit
;;
esac
AC_LIBREPLACE_ALL_CHECKS
if test "$ac_cv_prog_gcc" = yes; then
CFLAGS="$CFLAGS -Wall -Wshadow -Wstrict-prototypes -Wpointer-arith -Wcast-qual -Wcast-align -Wwrite-strings"
fi
AC_CONFIG_HEADER(config.h)
EXTRA_OBJ=""
m4_include(libpopt.m4)
m4_include(libtalloc.m4)
m4_include(libtdb.m4)
m4_include(libevents.m4)
m4_include(ib/config.m4)
AC_CHECK_HEADERS(sched.h)
AC_CHECK_FUNCS(sched_setscheduler)
AC_CACHE_CHECK([for sin_len in sock],ctdb_cv_HAVE_SOCK_SIN_LEN,[
AC_TRY_COMPILE([#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>],
[struct sockaddr_in sock; sock.sin_len = sizeof(sock);],
ctdb_cv_HAVE_SOCK_SIN_LEN=yes,ctdb_cv_HAVE_SOCK_SIN_LEN=no)])
if test x"$ctdb_cv_HAVE_SOCK_SIN_LEN" = x"yes"; then
AC_DEFINE(HAVE_SOCK_SIN_LEN,1,[Whether the sockaddr_in struct has a sin_len property])
fi
AC_SUBST(EXTRA_OBJ)
AC_SUBST(CTDB_SYSTEM_OBJ)
AC_SUBST(CTDB_SCSI_IO)
AC_SUBST(CTDB_PCAP_LDFLAGS)
AC_OUTPUT(Makefile)

View File

@ -1,250 +0,0 @@
/*
Unix SMB/CIFS implementation.
ctdb clustering hooks
Copyright (C) Andrew Tridgell 2006
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "cluster/cluster.h"
#include "system/filesys.h"
#include "cluster/cluster_private.h"
#include "../tdb/include/tdb.h"
#include "include/ctdb.h"
#include "tdb_wrap.h"
#include "../lib/util/dlinklist.h"
#include "param/param.h"
#include "librpc/gen_ndr/server_id.h"
#include "cluster/ctdb/ctdb_cluster.h"
/* a linked list of messaging handlers, allowing incoming messages
to be directed to the right messaging context */
struct cluster_messaging_list {
struct cluster_messaging_list *next, *prev;
struct cluster_state *state;
struct messaging_context *msg;
struct server_id server;
cluster_message_fn_t handler;
};
struct cluster_state {
struct ctdb_context *ctdb;
struct cluster_messaging_list *list;
uint32_t vnn;
};
/*
return a server_id for a ctdb node
*/
static struct server_id ctdb_id(struct cluster_ops *ops, uint64_t id, uint32_t id2)
{
struct cluster_state *state = (struct cluster_state *)ops->private;
struct ctdb_context *ctdb = state->ctdb;
struct server_id server_id;
server_id.node = ctdb_get_vnn(ctdb);
server_id.id = id;
server_id.id2 = id2;
return server_id;
}
/*
return a server_id as a string
*/
static const char *ctdb_id_string(struct cluster_ops *ops,
TALLOC_CTX *mem_ctx, struct server_id id)
{
return talloc_asprintf(mem_ctx, "%u.%llu.%u", id.node, (unsigned long long)id.id, id.id2);
}
/*
this is an interim method for subsystems that have not yet been
converted to use the ctdb api. It opens a shared database in the
cluster temporary area, using TDB_CLEAR_IF_FIRST which relies on
correct operation of fcntl locks on the shared fileystem.
*/
static struct tdb_wrap *ctdb_tdb_tmp_open(struct cluster_ops *ops,
TALLOC_CTX *mem_ctx,
struct loadparm_context *lp_ctx,
const char *dbname, int flags)
{
const char *dir = lp_parm_string(lp_ctx, NULL, "ctdb", "shared data");
char *path;
struct tdb_wrap *w;
if (dir == NULL) {
DEBUG(0,("ERROR: You must set 'ctdb:shared data' to a cluster shared path\n"));
return NULL;
}
path = talloc_asprintf(mem_ctx, "%s/%s", dir, dbname);
w = tdb_wrap_open(mem_ctx, path, 0,
flags | TDB_CLEAR_IF_FIRST,
O_RDWR|O_CREAT, 0600);
talloc_free(path);
return w;
}
/*
get at the ctdb handle
*/
static void *ctdb_backend_handle(struct cluster_ops *ops)
{
struct cluster_state *state = (struct cluster_state *)ops->private;
return (void *)state->ctdb;
}
struct ctdb_handler_state {
struct cluster_state *state;
cluster_message_fn_t handler;
struct messaging_context *msg;
};
/*
dispatch incoming ctdb messages
*/
static void ctdb_message_handler(struct ctdb_context *ctdb, uint64_t srvid,
TDB_DATA data, void *private)
{
struct ctdb_handler_state *s = talloc_get_type(private,
struct ctdb_handler_state);
DATA_BLOB blob;
blob.data = data.dptr;
blob.length = data.dsize;
s->handler(s->msg, blob);
}
static int ctdb_handler_destructor(struct ctdb_handler_state *s)
{
/* XXX - tell ctdb to de-register the message handler */
return 0;
}
/*
setup a handler for ctdb messages
*/
static NTSTATUS ctdb_message_init(struct cluster_ops *ops,
struct messaging_context *msg,
struct server_id server,
cluster_message_fn_t handler)
{
struct cluster_state *state = (struct cluster_state *)ops->private;
struct ctdb_handler_state *h;
int ret;
h = talloc(msg, struct ctdb_handler_state);
NT_STATUS_HAVE_NO_MEMORY(h);
h->state = state;
h->handler = handler;
h->msg = msg;
talloc_set_destructor(h, ctdb_handler_destructor);
/* setup a message handler */
ret = ctdb_set_message_handler(state->ctdb, server.id,
ctdb_message_handler, h);
if (ret == -1) {
DEBUG(0,("ctdb_set_message_handler failed - %s\n",
ctdb_errstr(state->ctdb)));
exit(1);
}
return NT_STATUS_OK;
}
/*
send a ctdb message to another node
*/
static NTSTATUS ctdb_message_send(struct cluster_ops *ops,
struct server_id server, DATA_BLOB *data)
{
struct cluster_state *state = (struct cluster_state *)ops->private;
struct ctdb_context *ctdb = state->ctdb;
TDB_DATA tdata;
int ret;
tdata.dptr = data->data;
tdata.dsize = data->length;
ret = ctdb_send_message(ctdb, server.node, server.id, tdata);
if (ret != 0) {
return NT_STATUS_INTERNAL_DB_CORRUPTION;
}
return NT_STATUS_OK;
}
static struct cluster_ops cluster_ctdb_ops = {
.cluster_id = ctdb_id,
.cluster_id_string = ctdb_id_string,
.cluster_tdb_tmp_open = ctdb_tdb_tmp_open,
.backend_handle = ctdb_backend_handle,
.message_init = ctdb_message_init,
.message_send = ctdb_message_send,
.private = NULL
};
/* initialise ctdb */
void cluster_ctdb_init(struct loadparm_context *lp_ctx,
struct event_context *ev, const char *model)
{
struct cluster_state *state;
int ret;
if (!lp_parm_bool(lp_ctx, NULL, "ctdb", "enable", false)) {
return;
}
state = talloc(ev, struct cluster_state);
if (state == NULL) goto failed;
state->ctdb = ctdb_init(ev);
if (state->ctdb == NULL) goto failed;
ret = ctdb_socket_connect(state->ctdb);
if (ret == -1) {
DEBUG(0,(__location__ " Failed to connect to ctdb socket\n"));
goto failed;
}
/* get our vnn */
state->vnn = ctdb_ctrl_getvnn(state->ctdb, timeval_zero(), CTDB_CURRENT_NODE);
if (state->vnn == (uint32_t)-1) {
DEBUG(0,(__location__ " Failed to get ctdb vnn\n"));
goto failed;
}
state->list = NULL;
cluster_ctdb_ops.private = state;
cluster_set_ops(&cluster_ctdb_ops);
#if 0
/* nasty hack for now ... */
{
void brl_ctdb_init_ops(void);
brl_ctdb_init_ops();
}
#endif
return;
failed:
DEBUG(0,("cluster_ctdb_init failed\n"));
talloc_free(state);
}

View File

@ -1,23 +0,0 @@
/*
Unix SMB/CIFS implementation.
ctdb clustering hooks - header
Copyright (C) Andrew Tridgell 2006
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
void cluster_ctdb_init(struct loadparm_context *lp_ctx,
struct event_context *ev, const char *model);

View File

@ -1,403 +0,0 @@
.\" Title: ctdb
.\" Author:
.\" Generator: DocBook XSL Stylesheets v1.72.0 <http://docbook.sf.net/>
.\" Date: 07/10/2007
.\" Manual:
.\" Source:
.\"
.TH "CTDB" "1" "07/10/2007" "" ""
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
.ad l
.SH "NAME"
ctdb \- clustered tdb database management utility
.SH "SYNOPSIS"
.HP 29
\fBctdb [ OPTIONS ] COMMAND ...\fR
.HP 5
\fBctdb\fR [\-n\ <node>] [\-Y] [\-t\ <timeout>] [\-?\ \-\-help] [\-\-usage] [\-d\ \-\-debug=<INTEGER>] [\-\-socket=<filename>]
.SH "DESCRIPTION"
.PP
ctdb is a utility to view and manage a ctdb cluster.
.SH "OPTIONS"
.PP
\-n <node>
.RS 4
This specifies on which node to execute the command. Default is to run the command on the deamon running on the local host.
.RE
.PP
\-Y
.RS 4
Produce output in machinereadable form for easier parsing by scripts. Not all commands support this option.
.RE
.PP
\-t <timeout>
.RS 4
How long should ctdb wait for a command to complete before timing out. Default is 3 seconds.
.RE
.PP
\-? \-\-help
.RS 4
Print some help text to the screen.
.RE
.PP
\-\-usage
.RS 4
Print useage information to the screen.
.RE
.PP
\-d \-\-debug=<debuglevel>
.RS 4
Change the debug level for the command. Default is 0.
.RE
.PP
\-\-socket=<filename>
.RS 4
Specify the socketname to use when connecting to the local ctdb daemon. The default is /tmp/ctdb.socket .
.sp
You only need to specify this parameter if you run multiple ctdb daemons on the same physical host and thus can not use the default name for the domain socket.
.RE
.SH "ADMINISTRATIVE COMMANDS"
.PP
These are commands used to monitor and administrate a CTDB cluster.
.SS "status"
.PP
This command shows the current status of the ctdb node.
.sp
.it 1 an-trap
.nr an-no-space-flag 1
.nr an-break-flag 1
.br
\fBnode status\fR
.RS
.PP
Node status reflects the current status of the node. There are four possible states:
.PP
OK \- This node is fully functional.
.PP
DISCONNECTED \- This node could not be connected through the network and is currently not parcipitating in the cluster. If there is a public IP address associated with this node it should have been taken over by a different node. No services are running on this node.
.PP
DISABLED \- This node has been administratively disabled. This node is still functional and participates in the CTDB cluster but its IP addresses have been taken over by a different node and no services are currently being hosted.
.PP
UNHEALTHY \- A service provided by this node is malfunctioning and should be investigated. The CTDB daemon itself is operational and participates in the cluster. Its public IP address has been taken over by a different node and no services are currnetly being hosted. All unhealthy nodes should be investigated and require an administrative action to rectify.
.PP
BANNED \- This node failed too many recovery attempts and has been banned from participating in the cluster for a period of RecoveryBanPeriod seconds. Any public IP address has been taken over by other nodes. This node does not provide any services. All banned nodes should be investigated and require an administrative action to rectify. This node does not perticipate in the CTDB cluster but can still be communicated with. I.e. ctdb commands can be sent to it.
.RE
.sp
.it 1 an-trap
.nr an-no-space-flag 1
.nr an-break-flag 1
.br
\fBgeneration\fR
.RS
.PP
The generation id is a number that indicates the current generation of a cluster instance. Each time a cluster goes through a reconfiguration or a recovery its generation id will be changed.
.RE
.sp
.it 1 an-trap
.nr an-no-space-flag 1
.nr an-break-flag 1
.br
\fBVNNMAP\fR
.RS
.PP
The list of Virtual Node Numbers. This is a list of all nodes that actively participates in the cluster and that share the workload of hosting the Clustered TDB database records. Only nodes that are parcipitating in the vnnmap can become lmaster or dmaster for a database record.
.RE
.sp
.it 1 an-trap
.nr an-no-space-flag 1
.nr an-break-flag 1
.br
\fBRecovery mode\fR
.RS
.PP
This is the current recovery mode of the cluster. There are two possible modes:
.PP
NORMAL \- The cluster is fully operational.
.PP
RECOVERY \- The cluster databases have all been frozen, pausing all services while the cluster awaits a recovery process to complete. A recovery process should finish within seconds. If a cluster is stuck in the RECOVERY state this would indicate a cluster malfunction which needs to be investigated.
.RE
.sp
.it 1 an-trap
.nr an-no-space-flag 1
.nr an-break-flag 1
.br
\fBRecovery master\fR
.RS
.PP
This is the cluster node that is currently designated as the recovery master. This node is responsible of monitoring the consistency of the cluster and to perform the actual recovery process when reqired.
.RE
.PP
Example: ctdb status
.PP
Example output:
.sp
.RS 4
.nf
Number of nodes:4
vnn:0 11.1.2.200 OK (THIS NODE)
vnn:1 11.1.2.201 OK
vnn:2 11.1.2.202 OK
vnn:3 11.1.2.203 OK
Generation:1362079228
Size:4
hash:0 lmaster:0
hash:1 lmaster:1
hash:2 lmaster:2
hash:3 lmaster:3
Recovery mode:NORMAL (0)
Recovery master:0
.fi
.RE
.SS "ping"
.PP
This command will "ping" all CTDB daemons in the cluster to verify that they are processing commands correctly.
.PP
Example: ctdb ping
.PP
Example output:
.sp
.RS 4
.nf
response from 0 time=0.000054 sec (3 clients)
response from 1 time=0.000144 sec (2 clients)
response from 2 time=0.000105 sec (2 clients)
response from 3 time=0.000114 sec (2 clients)
.fi
.RE
.SS "ip"
.PP
This command will display the list of public addresses that are provided by the cluster and which physical node is currently serving this ip.
.PP
Example: ctdb ip
.PP
Example output:
.sp
.RS 4
.nf
Number of nodes:4
12.1.1.1 0
12.1.1.2 1
12.1.1.3 2
12.1.1.4 3
.fi
.RE
.SS "getvar <name>"
.PP
Get the runtime value of a tuneable variable.
.PP
Example: ctdb getvar MaxRedirectCount
.PP
Example output:
.sp
.RS 4
.nf
MaxRedirectCount = 3
.fi
.RE
.SS "setvar <name> <value>"
.PP
Set the runtime value of a tuneable variable.
.PP
Example: ctdb setvar MaxRedirectCount 5
.SS "listvars"
.PP
List all tuneable variables.
.PP
Example: ctdb listvars
.PP
Example output:
.sp
.RS 4
.nf
MaxRedirectCount = 5
SeqnumFrequency = 1
ControlTimeout = 60
TraverseTimeout = 20
KeepaliveInterval = 2
KeepaliveLimit = 3
MaxLACount = 7
RecoverTimeout = 5
RecoverInterval = 1
ElectionTimeout = 3
TakeoverTimeout = 5
MonitorInterval = 15
EventScriptTimeout = 20
RecoveryGracePeriod = 60
RecoveryBanPeriod = 300
.fi
.RE
.SS "statistics"
.PP
Collect statistics from the CTDB daemon about how many calls it has served.
.PP
Example: ctdb statistics
.PP
Example output:
.sp
.RS 4
.nf
CTDB version 1
num_clients 3
frozen 0
recovering 0
client_packets_sent 360489
client_packets_recv 360466
node_packets_sent 480931
node_packets_recv 240120
keepalive_packets_sent 4
keepalive_packets_recv 3
node
req_call 2
reply_call 2
req_dmaster 0
reply_dmaster 0
reply_error 0
req_message 42
req_control 120408
reply_control 360439
client
req_call 2
req_message 24
req_control 360440
timeouts
call 0
control 0
traverse 0
total_calls 2
pending_calls 0
lockwait_calls 0
pending_lockwait_calls 0
memory_used 5040
max_hop_count 0
max_call_latency 4.948321 sec
max_lockwait_latency 0.000000 sec
.fi
.RE
.SS "statisticsreset"
.PP
This command is used to clear all statistics counters in a node.
.PP
Example: ctdb statisticsreset
.SS "getdebug"
.PP
Get the current debug level for the node. the debug level controls what information is written to the log file.
.SS "setdebug <debuglevel>"
.PP
Set the debug level of a node. This is a number between 0 and 9 and controls what information will be written to the logfile.
.SS "getpid"
.PP
This command will return the process id of the ctdb daemon.
.SS "disable"
.PP
This command is used to administratively disable a node in the cluster. A disabled node will still participate in the cluster and host clustered TDB records but its public ip address has been taken over by a different node and it no longer hosts any services.
.SS "enable"
.PP
Re\-enable a node that has been administratively disabled.
.SS "ban <bantime|0>"
.PP
Administratively ban a node for bantime seconds. A bantime of 0 means that the node should be permanently banned.
.PP
A banned node does not participate in the cluster and does not host any records for the clustered TDB. Its ip address has been taken over by an other node and no services are hosted.
.PP
Nodes are automatically banned if they are the cause of too many cluster recoveries.
.SS "unban"
.PP
This command is used to unban a node that has either been administratively banned using the ban command or has been automatically banned by the recovery daemon.
.SS "shutdown"
.PP
This command will shutdown a specific CTDB daemon.
.SS "recover"
.PP
This command will trigger the recovery daemon to do a cluster recovery.
.SS "killtcp <srcip:port> <dstip:port>"
.PP
This command will kill the specified TCP connection by issuing a TCP RST to the srcip:port endpoint.
.SS "tickle <srcip:port> <dstip:port>"
.PP
This command will will send a TCP tickle to the source host for the specified TCP connection. A TCP tickle is a TCP ACK packet with an invalid sequence and acknowledge number and will when received by the source host result in it sending an immediate correct ACK back to the other end.
.PP
TCP tickles are useful to "tickle" clients after a IP failover has occured since this will make the client immediately recognize the TCP connection has been disrupted and that the client will need to reestablish. This greatly speeds up the time it takes for a client to detect and reestablish after an IP failover in the ctdb cluster.
.SH "DEBUGGING COMMANDS"
.PP
These commands are primarily used for CTDB development and testing and should not be used for normal administration.
.SS "process\-exists <pid>"
.PP
This command checks if a specific process exists on the CTDB host. This is mainly used by Samba to check if remote instances of samba are still running or not.
.SS "getdbmap"
.PP
This command lists all clustered TDB databases that the CTDB daemon has attahced to.
.PP
Example: ctdb getdbmap
.PP
Example output:
.sp
.RS 4
.nf
Number of databases:4
dbid:0x42fe72c5 name:locking.tdb path:/var/ctdb/locking.tdb.0
dbid:0x1421fb78 name:brlock.tdb path:/var/ctdb/brlock.tdb.0
dbid:0x17055d90 name:connections.tdb path:/var/ctdb/connections.tdb.0
dbid:0xc0bdde6a name:sessionid.tdb path:/var/ctdb/sessionid.tdb.0
.fi
.RE
.SS "catdb <dbname>"
.PP
This command will dump a clustered TDB database to the screen. This is a debugging command.
.SS "getmonmode"
.PP
This command returns the monutoring mode of a node. The monitoring mode is either ACTIVE or DISABLED. Normally a node will continously monitor that all other nodes that are expected are in fact connected and that they respond to commands.
.PP
ACTIVE \- This is the normal mode. The node is actively monitoring all other nodes, both that the transport is connected and also that the node responds to commands. If a node becomes unavailable, it will be marked as DISCONNECTED and a recovery is initiated to restore the cluster.
.PP
DISABLED \- This node is not monitoring that other nodes are available. In this mode a node failure will not be detected and no recovery will be performed. This mode is useful when for debugging purposes one wants to attach GDB to a ctdb process but wants to prevent the rest of the cluster from marking this node as DISCONNECTED and do a recovery.
.SS "setmonmode <0|1>"
.PP
This command can be used to explicitely disable/enable monitoring mode on a node. The main purpose is if one wants to attach GDB to a running ctdb daemon but wants to prevent the other nodes from marking it as DISCONNECTED and issuing a recovery. To do this, set monitoring mode to 0 on all nodes before attaching with GDB. Remember to set monitoring mode back to 1 afterwards.
.SS "attach <dbname>"
.PP
This is a debugging command. This command will make the CTDB daemon create a new CTDB database and attach to it.
.SS "dumpmemory"
.PP
This is a debugging command. This command will make the ctdb daemon to write a fill memory allocation map to the log file.
.SS "freeze"
.PP
This command will lock all the local TDB databases causing clients that are accessing these TDBs such as samba3 to block until the databases are thawed.
.PP
This is primarily used by the recovery daemon to stop all samba daemons from accessing any databases while the database is recovered and rebuilt.
.SS "thaw"
.PP
Thaw a previously frozen node.
.SH "SEE ALSO"
.PP
ctdbd(1), onnode(1)
\fI\%http://ctdb.samba.org/\fR
.SH "COPYRIGHT/LICENSE"
.sp
.RS 4
.nf
Copyright (C) Andrew Tridgell 2007
Copyright (C) Ronnie sahlberg 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at
your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see http://www.gnu.org/licenses/.
.fi
.RE

View File

@ -1,277 +0,0 @@
<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>ctdb</title><meta name="generator" content="DocBook XSL Stylesheets V1.72.0"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="refentry" lang="en"><a name="ctdb.1"></a><div class="titlepage"></div><div class="refnamediv"><h2>Name</h2><p>ctdb &#8212; clustered tdb database management utility</p></div><div class="refsynopsisdiv"><h2>Synopsis</h2><div class="cmdsynopsis"><p><code class="command">ctdb [ OPTIONS ] COMMAND ...</code> </p></div><div class="cmdsynopsis"><p><code class="command">ctdb</code> [-n &lt;node&gt;] [-Y] [-t &lt;timeout&gt;] [-? --help] [--usage] [-d --debug=&lt;INTEGER&gt;] [--socket=&lt;filename&gt;]</p></div></div><div class="refsect1" lang="en"><a name="id2488867"></a><h2>DESCRIPTION</h2><p>
ctdb is a utility to view and manage a ctdb cluster.
</p></div><div class="refsect1" lang="en"><a name="id2488877"></a><h2>OPTIONS</h2><div class="variablelist"><dl><dt><span class="term">-n &lt;node&gt;</span></dt><dd><p>
This specifies on which node to execute the command. Default is
to run the command on the deamon running on the local host.
</p></dd><dt><span class="term">-Y</span></dt><dd><p>
Produce output in machinereadable form for easier parsing by scripts. Not all commands support this option.
</p></dd><dt><span class="term">-t &lt;timeout&gt;</span></dt><dd><p>
How long should ctdb wait for a command to complete before timing out. Default is 3 seconds.
</p></dd><dt><span class="term">-? --help</span></dt><dd><p>
Print some help text to the screen.
</p></dd><dt><span class="term">--usage</span></dt><dd><p>
Print useage information to the screen.
</p></dd><dt><span class="term">-d --debug=&lt;debuglevel&gt;</span></dt><dd><p>
Change the debug level for the command. Default is 0.
</p></dd><dt><span class="term">--socket=&lt;filename&gt;</span></dt><dd><p>
Specify the socketname to use when connecting to the local ctdb
daemon. The default is /tmp/ctdb.socket .
</p><p>
You only need to specify this parameter if you run multiple ctdb
daemons on the same physical host and thus can not use the default
name for the domain socket.
</p></dd></dl></div></div><div class="refsect1" lang="en"><a name="id2488991"></a><h2>Administrative Commands</h2><p>
These are commands used to monitor and administrate a CTDB cluster.
</p><div class="refsect2" lang="en"><a name="id2489000"></a><h3>status</h3><p>
This command shows the current status of the ctdb node.
</p><div class="refsect3" lang="en"><a name="id2489009"></a><h4>node status</h4><p>
Node status reflects the current status of the node. There are four possible states:
</p><p>
OK - This node is fully functional.
</p><p>
DISCONNECTED - This node could not be connected through the network and is currently not parcipitating in the cluster. If there is a public IP address associated with this node it should have been taken over by a different node. No services are running on this node.
</p><p>
DISABLED - This node has been administratively disabled. This node is still functional and participates in the CTDB cluster but its IP addresses have been taken over by a different node and no services are currently being hosted.
</p><p>
UNHEALTHY - A service provided by this node is malfunctioning and should be investigated. The CTDB daemon itself is operational and participates in the cluster. Its public IP address has been taken over by a different node and no services are currnetly being hosted. All unhealthy nodes should be investigated and require an administrative action to rectify.
</p><p>
BANNED - This node failed too many recovery attempts and has been banned from participating in the cluster for a period of RecoveryBanPeriod seconds. Any public IP address has been taken over by other nodes. This node does not provide any services. All banned nodes should be investigated and require an administrative action to rectify. This node does not perticipate in the CTDB cluster but can still be communicated with. I.e. ctdb commands can be sent to it.
</p></div><div class="refsect3" lang="en"><a name="id2489061"></a><h4>generation</h4><p>
The generation id is a number that indicates the current generation
of a cluster instance. Each time a cluster goes through a
reconfiguration or a recovery its generation id will be changed.
</p></div><div class="refsect3" lang="en"><a name="id2490207"></a><h4>VNNMAP</h4><p>
The list of Virtual Node Numbers. This is a list of all nodes that actively participates in the cluster and that share the workload of hosting the Clustered TDB database records.
Only nodes that are parcipitating in the vnnmap can become lmaster or dmaster for a database record.
</p></div><div class="refsect3" lang="en"><a name="id2490221"></a><h4>Recovery mode</h4><p>
This is the current recovery mode of the cluster. There are two possible modes:
</p><p>
NORMAL - The cluster is fully operational.
</p><p>
RECOVERY - The cluster databases have all been frozen, pausing all services while the cluster awaits a recovery process to complete. A recovery process should finish within seconds. If a cluster is stuck in the RECOVERY state this would indicate a cluster malfunction which needs to be investigated.
</p></div><div class="refsect3" lang="en"><a name="id2490244"></a><h4>Recovery master</h4><p>
This is the cluster node that is currently designated as the recovery master. This node is responsible of monitoring the consistency of the cluster and to perform the actual recovery process when reqired.
</p></div><p>
Example: ctdb status
</p><p>Example output:</p><pre class="screen">
Number of nodes:4
vnn:0 11.1.2.200 OK (THIS NODE)
vnn:1 11.1.2.201 OK
vnn:2 11.1.2.202 OK
vnn:3 11.1.2.203 OK
Generation:1362079228
Size:4
hash:0 lmaster:0
hash:1 lmaster:1
hash:2 lmaster:2
hash:3 lmaster:3
Recovery mode:NORMAL (0)
Recovery master:0
</pre></div><div class="refsect2" lang="en"><a name="id2490275"></a><h3>ping</h3><p>
This command will "ping" all CTDB daemons in the cluster to verify that they are processing commands correctly.
</p><p>
Example: ctdb ping
</p><p>
Example output:
</p><pre class="screen">
response from 0 time=0.000054 sec (3 clients)
response from 1 time=0.000144 sec (2 clients)
response from 2 time=0.000105 sec (2 clients)
response from 3 time=0.000114 sec (2 clients)
</pre></div><div class="refsect2" lang="en"><a name="id2490302"></a><h3>ip</h3><p>
This command will display the list of public addresses that are provided by the cluster and which physical node is currently serving this ip.
</p><p>
Example: ctdb ip
</p><p>
Example output:
</p><pre class="screen">
Number of nodes:4
12.1.1.1 0
12.1.1.2 1
12.1.1.3 2
12.1.1.4 3
</pre></div><div class="refsect2" lang="en"><a name="id2490327"></a><h3>getvar &lt;name&gt;</h3><p>
Get the runtime value of a tuneable variable.
</p><p>
Example: ctdb getvar MaxRedirectCount
</p><p>
Example output:
</p><pre class="screen">
MaxRedirectCount = 3
</pre></div><div class="refsect2" lang="en"><a name="id2490350"></a><h3>setvar &lt;name&gt; &lt;value&gt;</h3><p>
Set the runtime value of a tuneable variable.
</p><p>
Example: ctdb setvar MaxRedirectCount 5
</p></div><div class="refsect2" lang="en"><a name="id2490365"></a><h3>listvars</h3><p>
List all tuneable variables.
</p><p>
Example: ctdb listvars
</p><p>
Example output:
</p><pre class="screen">
MaxRedirectCount = 5
SeqnumFrequency = 1
ControlTimeout = 60
TraverseTimeout = 20
KeepaliveInterval = 2
KeepaliveLimit = 3
MaxLACount = 7
RecoverTimeout = 5
RecoverInterval = 1
ElectionTimeout = 3
TakeoverTimeout = 5
MonitorInterval = 15
EventScriptTimeout = 20
RecoveryGracePeriod = 60
RecoveryBanPeriod = 300
</pre></div><div class="refsect2" lang="en"><a name="id2490393"></a><h3>statistics</h3><p>
Collect statistics from the CTDB daemon about how many calls it has served.
</p><p>
Example: ctdb statistics
</p><p>
Example output:
</p><pre class="screen">
CTDB version 1
num_clients 3
frozen 0
recovering 0
client_packets_sent 360489
client_packets_recv 360466
node_packets_sent 480931
node_packets_recv 240120
keepalive_packets_sent 4
keepalive_packets_recv 3
node
req_call 2
reply_call 2
req_dmaster 0
reply_dmaster 0
reply_error 0
req_message 42
req_control 120408
reply_control 360439
client
req_call 2
req_message 24
req_control 360440
timeouts
call 0
control 0
traverse 0
total_calls 2
pending_calls 0
lockwait_calls 0
pending_lockwait_calls 0
memory_used 5040
max_hop_count 0
max_call_latency 4.948321 sec
max_lockwait_latency 0.000000 sec
</pre></div><div class="refsect2" lang="en"><a name="id2490436"></a><h3>statisticsreset</h3><p>
This command is used to clear all statistics counters in a node.
</p><p>
Example: ctdb statisticsreset
</p></div><div class="refsect2" lang="en"><a name="id2490450"></a><h3>getdebug</h3><p>
Get the current debug level for the node. the debug level controls what information is written to the log file.
</p></div><div class="refsect2" lang="en"><a name="id2490461"></a><h3>setdebug &lt;debuglevel&gt;</h3><p>
Set the debug level of a node. This is a number between 0 and 9 and controls what information will be written to the logfile.
</p></div><div class="refsect2" lang="en"><a name="id2536585"></a><h3>getpid</h3><p>
This command will return the process id of the ctdb daemon.
</p></div><div class="refsect2" lang="en"><a name="id2536595"></a><h3>disable</h3><p>
This command is used to administratively disable a node in the cluster.
A disabled node will still participate in the cluster and host
clustered TDB records but its public ip address has been taken over by
a different node and it no longer hosts any services.
</p></div><div class="refsect2" lang="en"><a name="id2536613"></a><h3>enable</h3><p>
Re-enable a node that has been administratively disabled.
</p></div><div class="refsect2" lang="en"><a name="id2536623"></a><h3>ban &lt;bantime|0&gt;</h3><p>
Administratively ban a node for bantime seconds. A bantime of 0 means that the node should be permanently banned.
</p><p>
A banned node does not participate in the cluster and does not host any records for the clustered TDB. Its ip address has been taken over by an other node and no services are hosted.
</p><p>
Nodes are automatically banned if they are the cause of too many
cluster recoveries.
</p></div><div class="refsect2" lang="en"><a name="id2536646"></a><h3>unban</h3><p>
This command is used to unban a node that has either been
administratively banned using the ban command or has been automatically
banned by the recovery daemon.
</p></div><div class="refsect2" lang="en"><a name="id2536658"></a><h3>shutdown</h3><p>
This command will shutdown a specific CTDB daemon.
</p></div><div class="refsect2" lang="en"><a name="id2536668"></a><h3>recover</h3><p>
This command will trigger the recovery daemon to do a cluster
recovery.
</p></div><div class="refsect2" lang="en"><a name="id2536679"></a><h3>killtcp &lt;srcip:port&gt; &lt;dstip:port&gt;</h3><p>
This command will kill the specified TCP connection by issuing a
TCP RST to the srcip:port endpoint.
</p></div><div class="refsect2" lang="en"><a name="id2536690"></a><h3>tickle &lt;srcip:port&gt; &lt;dstip:port&gt;</h3><p>
This command will will send a TCP tickle to the source host for the
specified TCP connection.
A TCP tickle is a TCP ACK packet with an invalid sequence and
acknowledge number and will when received by the source host result
in it sending an immediate correct ACK back to the other end.
</p><p>
TCP tickles are useful to "tickle" clients after a IP failover has
occured since this will make the client immediately recognize the
TCP connection has been disrupted and that the client will need
to reestablish. This greatly speeds up the time it takes for a client
to detect and reestablish after an IP failover in the ctdb cluster.
</p></div></div><div class="refsect1" lang="en"><a name="id2536716"></a><h2>Debugging Commands</h2><p>
These commands are primarily used for CTDB development and testing and
should not be used for normal administration.
</p><div class="refsect2" lang="en"><a name="id2536726"></a><h3>process-exists &lt;pid&gt;</h3><p>
This command checks if a specific process exists on the CTDB host. This is mainly used by Samba to check if remote instances of samba are still running or not.
</p></div><div class="refsect2" lang="en"><a name="id2536738"></a><h3>getdbmap</h3><p>
This command lists all clustered TDB databases that the CTDB daemon has attahced to.
</p><p>
Example: ctdb getdbmap
</p><p>
Example output:
</p><pre class="screen">
Number of databases:4
dbid:0x42fe72c5 name:locking.tdb path:/var/ctdb/locking.tdb.0
dbid:0x1421fb78 name:brlock.tdb path:/var/ctdb/brlock.tdb.0
dbid:0x17055d90 name:connections.tdb path:/var/ctdb/connections.tdb.0
dbid:0xc0bdde6a name:sessionid.tdb path:/var/ctdb/sessionid.tdb.0
</pre></div><div class="refsect2" lang="en"><a name="id2536766"></a><h3>catdb &lt;dbname&gt;</h3><p>
This command will dump a clustered TDB database to the screen. This is a debugging command.
</p></div><div class="refsect2" lang="en"><a name="id2536777"></a><h3>getmonmode</h3><p>
This command returns the monutoring mode of a node. The monitoring mode is either ACTIVE or DISABLED. Normally a node will continously monitor that all other nodes that are expected are in fact connected and that they respond to commands.
</p><p>
ACTIVE - This is the normal mode. The node is actively monitoring all other nodes, both that the transport is connected and also that the node responds to commands. If a node becomes unavailable, it will be marked as DISCONNECTED and a recovery is initiated to restore the cluster.
</p><p>
DISABLED - This node is not monitoring that other nodes are available. In this mode a node failure will not be detected and no recovery will be performed. This mode is useful when for debugging purposes one wants to attach GDB to a ctdb process but wants to prevent the rest of the cluster from marking this node as DISCONNECTED and do a recovery.
</p></div><div class="refsect2" lang="en"><a name="id2536808"></a><h3>setmonmode &lt;0|1&gt;</h3><p>
This command can be used to explicitely disable/enable monitoring mode on a node. The main purpose is if one wants to attach GDB to a running ctdb daemon but wants to prevent the other nodes from marking it as DISCONNECTED and issuing a recovery. To do this, set monitoring mode to 0 on all nodes before attaching with GDB. Remember to set monitoring mode back to 1 afterwards.
</p></div><div class="refsect2" lang="en"><a name="id2536823"></a><h3>attach &lt;dbname&gt;</h3><p>
This is a debugging command. This command will make the CTDB daemon create a new CTDB database and attach to it.
</p></div><div class="refsect2" lang="en"><a name="id2536835"></a><h3>dumpmemory</h3><p>
This is a debugging command. This command will make the ctdb daemon to write a fill memory allocation map to the log file.
</p></div><div class="refsect2" lang="en"><a name="id2536846"></a><h3>freeze</h3><p>
This command will lock all the local TDB databases causing clients
that are accessing these TDBs such as samba3 to block until the
databases are thawed.
</p><p>
This is primarily used by the recovery daemon to stop all samba
daemons from accessing any databases while the database is recovered
and rebuilt.
</p></div><div class="refsect2" lang="en"><a name="id2536864"></a><h3>thaw</h3><p>
Thaw a previously frozen node.
</p></div></div><div class="refsect1" lang="en"><a name="id2536875"></a><h2>SEE ALSO</h2><p>
ctdbd(1), onnode(1)
<a href="http://ctdb.samba.org/" target="_top">http://ctdb.samba.org/</a>
</p></div><div class="refsect1" lang="en"><a name="id2536888"></a><h2>COPYRIGHT/LICENSE</h2><div class="literallayout"><p><br>
Copyright (C) Andrew Tridgell 2007<br>
Copyright (C) Ronnie sahlberg 2007<br>
<br>
This program is free software; you can redistribute it and/or modify<br>
it under the terms of the GNU General Public License as published by<br>
the Free Software Foundation; either version 3 of the License, or (at<br>
your option) any later version.<br>
<br>
This program is distributed in the hope that it will be useful, but<br>
WITHOUT ANY WARRANTY; without even the implied warranty of<br>
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU<br>
General Public License for more details.<br>
<br>
You should have received a copy of the GNU General Public License<br>
along with this program; if not, see http://www.gnu.org/licenses/.<br>
</p></div></div></div></body></html>

View File

@ -1,553 +0,0 @@
<?xml version="1.0" encoding="iso-8859-1"?>
<!DOCTYPE refentry PUBLIC "-//Samba-Team//DTD DocBook V4.2-Based Variant V1.0//EN" "http://www.samba.org/samba/DTD/samba-doc">
<refentry id="ctdb.1">
<refmeta>
<refentrytitle>ctdb</refentrytitle>
<manvolnum>1</manvolnum>
</refmeta>
<refnamediv>
<refname>ctdb</refname>
<refpurpose>clustered tdb database management utility</refpurpose>
</refnamediv>
<refsynopsisdiv>
<cmdsynopsis>
<command>ctdb [ OPTIONS ] COMMAND ...</command>
</cmdsynopsis>
<cmdsynopsis>
<command>ctdb</command>
<arg choice="opt">-n &lt;node&gt;</arg>
<arg choice="opt">-Y</arg>
<arg choice="opt">-t &lt;timeout&gt;</arg>
<arg choice="opt">-? --help</arg>
<arg choice="opt">--usage</arg>
<arg choice="opt">-d --debug=&lt;INTEGER&gt;</arg>
<arg choice="opt">--socket=&lt;filename&gt;</arg>
</cmdsynopsis>
</refsynopsisdiv>
<refsect1><title>DESCRIPTION</title>
<para>
ctdb is a utility to view and manage a ctdb cluster.
</para>
</refsect1>
<refsect1>
<title>OPTIONS</title>
<variablelist>
<varlistentry><term>-n &lt;node&gt;</term>
<listitem>
<para>
This specifies on which node to execute the command. Default is
to run the command on the deamon running on the local host.
</para>
</listitem>
</varlistentry>
<varlistentry><term>-Y</term>
<listitem>
<para>
Produce output in machinereadable form for easier parsing by scripts. Not all commands support this option.
</para>
</listitem>
</varlistentry>
<varlistentry><term>-t &lt;timeout&gt;</term>
<listitem>
<para>
How long should ctdb wait for a command to complete before timing out. Default is 3 seconds.
</para>
</listitem>
</varlistentry>
<varlistentry><term>-? --help</term>
<listitem>
<para>
Print some help text to the screen.
</para>
</listitem>
</varlistentry>
<varlistentry><term>--usage</term>
<listitem>
<para>
Print useage information to the screen.
</para>
</listitem>
</varlistentry>
<varlistentry><term>-d --debug=&lt;debuglevel&gt;</term>
<listitem>
<para>
Change the debug level for the command. Default is 0.
</para>
</listitem>
</varlistentry>
<varlistentry><term>--socket=&lt;filename&gt;</term>
<listitem>
<para>
Specify the socketname to use when connecting to the local ctdb
daemon. The default is /tmp/ctdb.socket .
</para>
<para>
You only need to specify this parameter if you run multiple ctdb
daemons on the same physical host and thus can not use the default
name for the domain socket.
</para>
</listitem>
</varlistentry>
</variablelist>
</refsect1>
<refsect1><title>Administrative Commands</title>
<para>
These are commands used to monitor and administrate a CTDB cluster.
</para>
<refsect2><title>status</title>
<para>
This command shows the current status of the ctdb node.
</para>
<refsect3><title>node status</title>
<para>
Node status reflects the current status of the node. There are four possible states:
</para>
<para>
OK - This node is fully functional.
</para>
<para>
DISCONNECTED - This node could not be connected through the network and is currently not parcipitating in the cluster. If there is a public IP address associated with this node it should have been taken over by a different node. No services are running on this node.
</para>
<para>
DISABLED - This node has been administratively disabled. This node is still functional and participates in the CTDB cluster but its IP addresses have been taken over by a different node and no services are currently being hosted.
</para>
<para>
UNHEALTHY - A service provided by this node is malfunctioning and should be investigated. The CTDB daemon itself is operational and participates in the cluster. Its public IP address has been taken over by a different node and no services are currnetly being hosted. All unhealthy nodes should be investigated and require an administrative action to rectify.
</para>
<para>
BANNED - This node failed too many recovery attempts and has been banned from participating in the cluster for a period of RecoveryBanPeriod seconds. Any public IP address has been taken over by other nodes. This node does not provide any services. All banned nodes should be investigated and require an administrative action to rectify. This node does not perticipate in the CTDB cluster but can still be communicated with. I.e. ctdb commands can be sent to it.
</para>
</refsect3>
<refsect3><title>generation</title>
<para>
The generation id is a number that indicates the current generation
of a cluster instance. Each time a cluster goes through a
reconfiguration or a recovery its generation id will be changed.
</para>
</refsect3>
<refsect3><title>VNNMAP</title>
<para>
The list of Virtual Node Numbers. This is a list of all nodes that actively participates in the cluster and that share the workload of hosting the Clustered TDB database records.
Only nodes that are parcipitating in the vnnmap can become lmaster or dmaster for a database record.
</para>
</refsect3>
<refsect3><title>Recovery mode</title>
<para>
This is the current recovery mode of the cluster. There are two possible modes:
</para>
<para>
NORMAL - The cluster is fully operational.
</para>
<para>
RECOVERY - The cluster databases have all been frozen, pausing all services while the cluster awaits a recovery process to complete. A recovery process should finish within seconds. If a cluster is stuck in the RECOVERY state this would indicate a cluster malfunction which needs to be investigated.
</para>
</refsect3>
<refsect3><title>Recovery master</title>
<para>
This is the cluster node that is currently designated as the recovery master. This node is responsible of monitoring the consistency of the cluster and to perform the actual recovery process when reqired.
</para>
</refsect3>
<para>
Example: ctdb status
</para>
<para>Example output:</para>
<screen format="linespecific">
Number of nodes:4
vnn:0 11.1.2.200 OK (THIS NODE)
vnn:1 11.1.2.201 OK
vnn:2 11.1.2.202 OK
vnn:3 11.1.2.203 OK
Generation:1362079228
Size:4
hash:0 lmaster:0
hash:1 lmaster:1
hash:2 lmaster:2
hash:3 lmaster:3
Recovery mode:NORMAL (0)
Recovery master:0
</screen>
</refsect2>
<refsect2><title>ping</title>
<para>
This command will "ping" all CTDB daemons in the cluster to verify that they are processing commands correctly.
</para>
<para>
Example: ctdb ping
</para>
<para>
Example output:
</para>
<screen format="linespecific">
response from 0 time=0.000054 sec (3 clients)
response from 1 time=0.000144 sec (2 clients)
response from 2 time=0.000105 sec (2 clients)
response from 3 time=0.000114 sec (2 clients)
</screen>
</refsect2>
<refsect2><title>ip</title>
<para>
This command will display the list of public addresses that are provided by the cluster and which physical node is currently serving this ip.
</para>
<para>
Example: ctdb ip
</para>
<para>
Example output:
</para>
<screen format="linespecific">
Number of nodes:4
12.1.1.1 0
12.1.1.2 1
12.1.1.3 2
12.1.1.4 3
</screen>
</refsect2>
<refsect2><title>getvar &lt;name&gt;</title>
<para>
Get the runtime value of a tuneable variable.
</para>
<para>
Example: ctdb getvar MaxRedirectCount
</para>
<para>
Example output:
</para>
<screen format="linespecific">
MaxRedirectCount = 3
</screen>
</refsect2>
<refsect2><title>setvar &lt;name&gt; &lt;value&gt;</title>
<para>
Set the runtime value of a tuneable variable.
</para>
<para>
Example: ctdb setvar MaxRedirectCount 5
</para>
</refsect2>
<refsect2><title>listvars</title>
<para>
List all tuneable variables.
</para>
<para>
Example: ctdb listvars
</para>
<para>
Example output:
</para>
<screen format="linespecific">
MaxRedirectCount = 5
SeqnumFrequency = 1
ControlTimeout = 60
TraverseTimeout = 20
KeepaliveInterval = 2
KeepaliveLimit = 3
MaxLACount = 7
RecoverTimeout = 5
RecoverInterval = 1
ElectionTimeout = 3
TakeoverTimeout = 5
MonitorInterval = 15
EventScriptTimeout = 20
RecoveryGracePeriod = 60
RecoveryBanPeriod = 300
</screen>
</refsect2>
<refsect2><title>statistics</title>
<para>
Collect statistics from the CTDB daemon about how many calls it has served.
</para>
<para>
Example: ctdb statistics
</para>
<para>
Example output:
</para>
<screen format="linespecific">
CTDB version 1
num_clients 3
frozen 0
recovering 0
client_packets_sent 360489
client_packets_recv 360466
node_packets_sent 480931
node_packets_recv 240120
keepalive_packets_sent 4
keepalive_packets_recv 3
node
req_call 2
reply_call 2
req_dmaster 0
reply_dmaster 0
reply_error 0
req_message 42
req_control 120408
reply_control 360439
client
req_call 2
req_message 24
req_control 360440
timeouts
call 0
control 0
traverse 0
total_calls 2
pending_calls 0
lockwait_calls 0
pending_lockwait_calls 0
memory_used 5040
max_hop_count 0
max_call_latency 4.948321 sec
max_lockwait_latency 0.000000 sec
</screen>
</refsect2>
<refsect2><title>statisticsreset</title>
<para>
This command is used to clear all statistics counters in a node.
</para>
<para>
Example: ctdb statisticsreset
</para>
</refsect2>
<refsect2><title>getdebug</title>
<para>
Get the current debug level for the node. the debug level controls what information is written to the log file.
</para>
</refsect2>
<refsect2><title>setdebug &lt;debuglevel&gt;</title>
<para>
Set the debug level of a node. This is a number between 0 and 9 and controls what information will be written to the logfile.
</para>
</refsect2>
<refsect2><title>getpid</title>
<para>
This command will return the process id of the ctdb daemon.
</para>
</refsect2>
<refsect2><title>disable</title>
<para>
This command is used to administratively disable a node in the cluster.
A disabled node will still participate in the cluster and host
clustered TDB records but its public ip address has been taken over by
a different node and it no longer hosts any services.
</para>
</refsect2>
<refsect2><title>enable</title>
<para>
Re-enable a node that has been administratively disabled.
</para>
</refsect2>
<refsect2><title>ban &lt;bantime|0&gt;</title>
<para>
Administratively ban a node for bantime seconds. A bantime of 0 means that the node should be permanently banned.
</para>
<para>
A banned node does not participate in the cluster and does not host any records for the clustered TDB. Its ip address has been taken over by an other node and no services are hosted.
</para>
<para>
Nodes are automatically banned if they are the cause of too many
cluster recoveries.
</para>
</refsect2>
<refsect2><title>unban</title>
<para>
This command is used to unban a node that has either been
administratively banned using the ban command or has been automatically
banned by the recovery daemon.
</para>
</refsect2>
<refsect2><title>shutdown</title>
<para>
This command will shutdown a specific CTDB daemon.
</para>
</refsect2>
<refsect2><title>recover</title>
<para>
This command will trigger the recovery daemon to do a cluster
recovery.
</para>
</refsect2>
<refsect2><title>killtcp &lt;srcip:port&gt; &lt;dstip:port&gt;</title>
<para>
This command will kill the specified TCP connection by issuing a
TCP RST to the srcip:port endpoint.
</para>
</refsect2>
<refsect2><title>tickle &lt;srcip:port&gt; &lt;dstip:port&gt;</title>
<para>
This command will will send a TCP tickle to the source host for the
specified TCP connection.
A TCP tickle is a TCP ACK packet with an invalid sequence and
acknowledge number and will when received by the source host result
in it sending an immediate correct ACK back to the other end.
</para>
<para>
TCP tickles are useful to "tickle" clients after a IP failover has
occured since this will make the client immediately recognize the
TCP connection has been disrupted and that the client will need
to reestablish. This greatly speeds up the time it takes for a client
to detect and reestablish after an IP failover in the ctdb cluster.
</para>
</refsect2>
</refsect1>
<refsect1><title>Debugging Commands</title>
<para>
These commands are primarily used for CTDB development and testing and
should not be used for normal administration.
</para>
<refsect2><title>process-exists &lt;pid&gt;</title>
<para>
This command checks if a specific process exists on the CTDB host. This is mainly used by Samba to check if remote instances of samba are still running or not.
</para>
</refsect2>
<refsect2><title>getdbmap</title>
<para>
This command lists all clustered TDB databases that the CTDB daemon has attahced to.
</para>
<para>
Example: ctdb getdbmap
</para>
<para>
Example output:
</para>
<screen format="linespecific">
Number of databases:4
dbid:0x42fe72c5 name:locking.tdb path:/var/ctdb/locking.tdb.0
dbid:0x1421fb78 name:brlock.tdb path:/var/ctdb/brlock.tdb.0
dbid:0x17055d90 name:connections.tdb path:/var/ctdb/connections.tdb.0
dbid:0xc0bdde6a name:sessionid.tdb path:/var/ctdb/sessionid.tdb.0
</screen>
</refsect2>
<refsect2><title>catdb &lt;dbname&gt;</title>
<para>
This command will dump a clustered TDB database to the screen. This is a debugging command.
</para>
</refsect2>
<refsect2><title>getmonmode</title>
<para>
This command returns the monutoring mode of a node. The monitoring mode is either ACTIVE or DISABLED. Normally a node will continously monitor that all other nodes that are expected are in fact connected and that they respond to commands.
</para>
<para>
ACTIVE - This is the normal mode. The node is actively monitoring all other nodes, both that the transport is connected and also that the node responds to commands. If a node becomes unavailable, it will be marked as DISCONNECTED and a recovery is initiated to restore the cluster.
</para>
<para>
DISABLED - This node is not monitoring that other nodes are available. In this mode a node failure will not be detected and no recovery will be performed. This mode is useful when for debugging purposes one wants to attach GDB to a ctdb process but wants to prevent the rest of the cluster from marking this node as DISCONNECTED and do a recovery.
</para>
</refsect2>
<refsect2><title>setmonmode &lt;0|1&gt;</title>
<para>
This command can be used to explicitely disable/enable monitoring mode on a node. The main purpose is if one wants to attach GDB to a running ctdb daemon but wants to prevent the other nodes from marking it as DISCONNECTED and issuing a recovery. To do this, set monitoring mode to 0 on all nodes before attaching with GDB. Remember to set monitoring mode back to 1 afterwards.
</para>
</refsect2>
<refsect2><title>attach &lt;dbname&gt;</title>
<para>
This is a debugging command. This command will make the CTDB daemon create a new CTDB database and attach to it.
</para>
</refsect2>
<refsect2><title>dumpmemory</title>
<para>
This is a debugging command. This command will make the ctdb daemon to write a fill memory allocation map to the log file.
</para>
</refsect2>
<refsect2><title>freeze</title>
<para>
This command will lock all the local TDB databases causing clients
that are accessing these TDBs such as samba3 to block until the
databases are thawed.
</para>
<para>
This is primarily used by the recovery daemon to stop all samba
daemons from accessing any databases while the database is recovered
and rebuilt.
</para>
</refsect2>
<refsect2><title>thaw</title>
<para>
Thaw a previously frozen node.
</para>
</refsect2>
</refsect1>
<refsect1><title>SEE ALSO</title>
<para>
ctdbd(1), onnode(1)
<ulink url="http://ctdb.samba.org/"/>
</para>
</refsect1>
<refsect1><title>COPYRIGHT/LICENSE</title>
<literallayout>
Copyright (C) Andrew Tridgell 2007
Copyright (C) Ronnie sahlberg 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at
your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see http://www.gnu.org/licenses/.
</literallayout>
</refsect1>
</refentry>

View File

@ -1,213 +0,0 @@
.\" Title: ctdbd
.\" Author:
.\" Generator: DocBook XSL Stylesheets v1.72.0 <http://docbook.sf.net/>
.\" Date: 07/10/2007
.\" Manual:
.\" Source:
.\"
.TH "CTDBD" "1" "07/10/2007" "" ""
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
.ad l
.SH "NAME"
ctdbd \- The CTDB cluster daemon
.SH "SYNOPSIS"
.HP 6
\fBctdbd\fR
.HP 6
\fBctdbd\fR {\-\-reclock=<filename>} {\-\-nlist=<filename>} {\-\-dbdir=<directory>} [\-?\ \-\-help] [\-\-usage] [\-i\ \-\-interactive] [\-\-public\-addresses=<filename>] [\-\-public\-interface=<interface>] [\-\-event\-script=<filename>] [\-\-logfile=<filename>] [\-\-listen=<address>] [\-\-transport=<STRING>] [\-\-socket=<filename>] [\-d\ \-\-debug=<INTEGER>] [\-\-torture]
.SH "DESCRIPTION"
.PP
ctdbd is the main ctdb daemon.
.PP
ctdbd provides a clustered version of the TDB database with automatic rebuild/recovery of the databases upon nodefailures.
.PP
Combined with a cluster filesystem ctdbd provides a full HA environment for services such as clustered Samba and NFS as well as other services.
.PP
ctdbd provides monitoring of all nodes in the cluster and automatically reconfigures the cluster and recovers upon node failures.
.PP
ctdbd is the main component in clustered Samba that provides a high\-awailability load\-sharing CIFS server cluster.
.SH "OPTIONS"
.PP
\-? \-\-help
.RS 4
Print some help text to the screen.
.RE
.PP
\-\-usage
.RS 4
Print useage information to the screen.
.RE
.PP
\-\-reclock=<filename>
.RS 4
This is the name of the lock file stored of the shared cluster filesystem that ctdbd uses to arbitrate which node has the role of recovery\-master. This file must be stored on shared storage.
.RE
.PP
\-\-nlist=<filename>
.RS 4
This file contains a list of the private ip addresses of every node in the cluster. There is one line/ip address for each node. This file must be the same for all nodes in the cluster.
.sp
This file is usually /etc/ctdb/nodes .
.RE
.PP
\-\-dbdir=<directory>
.RS 4
This is the directory on local storage where ctdbd keeps the local copy of the TDB databases. This directory is local for each node and should not be stored on the shared cluster filesystem.
.sp
This directory would usually be /var/ctdb .
.RE
.PP
\-i \-\-interactive
.RS 4
By default ctdbd will detach itself from the shell and run in the background as a daemon. This option makes ctdbd to start in interactive mode.
.RE
.PP
\-\-public_addresses=<filename>
.RS 4
When used with IP takeover this specifies a file containing the public ip addresses to use on the cluster. This file contains one entry for each node in the cluster.
.sp
This is usually the file /etc/ctdb/public_addresses
.RE
.PP
\-\-public\-interface=<interface>
.RS 4
When used with IP takeover this option specifies which physical interface should be used to attach the public addresses to.
.RE
.PP
\-\-event\-script=<filename>
.RS 4
This option is used to specify which events script that ctdbd will use to manage services when the cluster configuration changes.
.sp
This will normally be /etc/ctdb/events which is part of the ctdb distribution.
.RE
.PP
\-\-logfile=<filename>
.RS 4
This is the file where ctdbd will write its log. This is usually /var/log/log.ctdb .
.RE
.PP
\-\-listen=<address>
.RS 4
This specifies which ip address ctdb will bind to. By default ctdbd will bind to the first address it finds in the /etc/ctdb/nodes file and which is also present on the local system in which case you do not need to provide this option.
.sp
This option is only required when you want to run multiple ctdbd daemons/nodes on the same physical host in which case there would be multiple entries in /etc/ctdb/nodes what would match a local interface.
.RE
.PP
\-\-transport=<STRING>
.RS 4
This option specifies which transport to use for ctdbd internode communications. The default is "tcp".
.sp
Suported transports are "tcp" and "infiniband".
.RE
.PP
\-\-socket=<filename>
.RS 4
This specifies the name of the domain socket that ctdbd will create. This socket is used for local clients to attach to and communicate with the ctdbd daemon.
.sp
The default is /tmp/ctdb.socket . You only need to use this option if you plan to run multiple ctdbd daemons on the same physical host.
.RE
.PP
\-d \-\-debug=<DEBUGLEVEL>
.RS 4
This option sets the debuglevel on the ctdbd daemon which controls what will be written to the logfile. The default is 0 which will only log important events and errors. A larger number will provide additional logging.
.RE
.PP
\-\-torture
.RS 4
This option is only used for development and testing of ctdbd. It adds artificial errors and failures to the common codepaths in ctdbd to verify that ctdbd can recover correctly for failures.
.sp
You do NOT want to use this option unless you are developing and testing new functionality in ctdbd.
.RE
.SH "PRIVATE VS PUBLIC ADDRESSES"
.PP
When used for ip takeover in a HA environment, each node in a ctdb cluster has two ip addresses assigned to it. One private and one public.
.SS "Private address"
.PP
This is the physical ip address of the node which is configured in linux and attached to a physical interface. This address uniquely identifies a physical node in the cluster and is the ip addresses that ctdbd will use to communicate with the ctdbd daemons on the other nodes in the cluster.
.PP
The private addresses are configured in /etc/ctdb/nodes (unless the \-\-nlist option is used) and contain one line for each node in the cluster. Each line contains the private ip address for one node in the cluster.
.PP
Each node is assigned an internal node number which corresponds to which line in the nodes file that has the local private address of the node.
.PP
Since the private addresses are only available to the network when the corresponding node is up and running you should not use these addresses for clients to connect to services provided by the cluster. Instead client applications should only attach to the public addresses since these are guaranteed to always be available.
Example /etc/ctdb/nodes for a four node cluster:
.sp
.RS 4
.nf
10.1.1.1
10.1.1.2
10.1.1.3
10.1.1.4
.fi
.RE
.SS "Public address"
.PP
A public address on the other hand is not attached to an interface. This address is managed by ctdbd itself and is attached/detached to a physical node at runtime. You should NOT have this address configured to an interface in linux. Let ctdbd manage these addresses.
.PP
The ctdb cluster will assign/reassign these public addresses across the available nodes in the cluster. When one node fails, its public address will be migrated to and taken over by a different node in the cluster to ensure that all public addresses are always available to clients.
.PP
These addresses are not physically attached to a specific node. The 'ctdb ip' command can be used to view the current assignment of public addresses and which physical node is currently serving it.
.PP
By default, each node will when operational always serve its primary public address which is the corresponding line for that node number in the public addresses file. I.e. as long as node X is available and fully oprational it will always be the node that serves the corresponding public address.
.PP
The list of public addresses also contain the netmask for that address. the reason for this is because ctdbd needs to know which mask to use when it adds/removes the address from a physical node. This netmask is also used by ctdbd when making decisions on which node should take over a public ip address for a failed node. A node will only be allowed to take over a public address from a different node IFF that public address resides in the same subnet as the primary public address for that node.
Example /etc/ctdb/public_addresses for a four node cluster:
.sp
.RS 4
.nf
11.1.1.1/24
11.1.1.2/24
11.1.2.1/24
11.1.2.2/24
.fi
.RE
.PP
In this example, if node 3 fails, its public address can be taken over by node 2 since node 2 is on the same subnet as 3 but not by node 0 or node 1 since node 0 and 1 are both on a different subnet from node 3.
.SH "NODE STATUS"
.PP
The current status of each node in the cluster can be viewed by the 'ctdb status' command.
.PP
There are five possible for a node.
.PP
OK \- This node is fully functional.
.PP
DISCONNECTED \- This node could not be connected through the network and is currently not parcipitating in the cluster. If there is a public IP address associated with this node it should have been taken over by a different node. No services are running on this node.
.PP
DISABLED \- This node has been administratively disabled. This node is still functional and participates in the CTDB cluster but its IP addresses have been taken over by a different node and no services are currently being hosted.
.PP
UNHEALTHY \- A service provided by this node is malfunctioning and should be investigated. The CTDB daemon itself is operational and participates in the cluster. Its public IP address has been taken over by a different node and no services are currently being hosted. All unhealthy nodes should be investigated and require an administrative action to rectify.
.PP
BANNED \- This node failed too many recovery attempts and has been banned from participating in the cluster for a period of RecoveryBanPeriod seconds. Any public IP address has been taken over by other nodes. This node does not provide any services. All banned nodes should be investigated and require an administrative action to rectify. This node does not perticipate in the CTDB cluster but can still be communicated with. I.e. ctdb commands can be sent to it.
.SH "SEE ALSO"
.PP
ctdb(1), onnode(1)
\fI\%http://ctdb.samba.org/\fR
.SH "COPYRIGHT/LICENSE"
.sp
.RS 4
.nf
Copyright (C) Andrew Tridgell 2007
Copyright (C) Ronnie sahlberg 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at
your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see http://www.gnu.org/licenses/.
.fi
.RE

View File

@ -1,182 +0,0 @@
<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>ctdbd</title><meta name="generator" content="DocBook XSL Stylesheets V1.72.0"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="refentry" lang="en"><a name="ctdbd.1"></a><div class="titlepage"></div><div class="refnamediv"><h2>Name</h2><p>ctdbd &#8212; The CTDB cluster daemon</p></div><div class="refsynopsisdiv"><h2>Synopsis</h2><div class="cmdsynopsis"><p><code class="command">ctdbd</code> </p></div><div class="cmdsynopsis"><p><code class="command">ctdbd</code> {--reclock=&lt;filename&gt;} {--nlist=&lt;filename&gt;} {--dbdir=&lt;directory&gt;} [-? --help] [--usage] [-i --interactive] [--public-addresses=&lt;filename&gt;] [--public-interface=&lt;interface&gt;] [--event-script=&lt;filename&gt;] [--logfile=&lt;filename&gt;] [--listen=&lt;address&gt;] [--transport=&lt;STRING&gt;] [--socket=&lt;filename&gt;] [-d --debug=&lt;INTEGER&gt;] [--torture]</p></div></div><div class="refsect1" lang="en"><a name="id2488930"></a><h2>DESCRIPTION</h2><p>
ctdbd is the main ctdb daemon.
</p><p>
ctdbd provides a clustered version of the TDB database with automatic rebuild/recovery of the databases upon nodefailures.
</p><p>
Combined with a cluster filesystem ctdbd provides a full HA environment for services such as clustered Samba and NFS as well as other services.
</p><p>
ctdbd provides monitoring of all nodes in the cluster and automatically reconfigures the cluster and recovers upon node failures.
</p><p>
ctdbd is the main component in clustered Samba that provides a high-awailability load-sharing CIFS server cluster.
</p></div><div class="refsect1" lang="en"><a name="id2488962"></a><h2>OPTIONS</h2><div class="variablelist"><dl><dt><span class="term">-? --help</span></dt><dd><p>
Print some help text to the screen.
</p></dd><dt><span class="term">--usage</span></dt><dd><p>
Print useage information to the screen.
</p></dd><dt><span class="term">--reclock=&lt;filename&gt;</span></dt><dd><p>
This is the name of the lock file stored of the shared cluster filesystem that ctdbd uses to arbitrate which node has the role of recovery-master.
This file must be stored on shared storage.
</p></dd><dt><span class="term">--nlist=&lt;filename&gt;</span></dt><dd><p>
This file contains a list of the private ip addresses of every node in the cluster. There is one line/ip address for each node. This file must be the same for all nodes in the cluster.
</p><p>
This file is usually /etc/ctdb/nodes .
</p></dd><dt><span class="term">--dbdir=&lt;directory&gt;</span></dt><dd><p>
This is the directory on local storage where ctdbd keeps the local
copy of the TDB databases. This directory is local for each node and should not be stored on the shared cluster filesystem.
</p><p>
This directory would usually be /var/ctdb .
</p></dd><dt><span class="term">-i --interactive</span></dt><dd><p>
By default ctdbd will detach itself from the shell and run in
the background as a daemon. This option makes ctdbd to start in interactive mode.
</p></dd><dt><span class="term">--public_addresses=&lt;filename&gt;</span></dt><dd><p>
When used with IP takeover this specifies a file containing the public ip addresses to use on the cluster. This file contains one entry for each node in the cluster.
</p><p>
This is usually the file /etc/ctdb/public_addresses
</p></dd><dt><span class="term">--public-interface=&lt;interface&gt;</span></dt><dd><p>
When used with IP takeover this option specifies which physical interface should be used to attach the public addresses to.
</p></dd><dt><span class="term">--event-script=&lt;filename&gt;</span></dt><dd><p>
This option is used to specify which events script that ctdbd will
use to manage services when the cluster configuration changes.
</p><p>
This will normally be /etc/ctdb/events which is part of the ctdb distribution.
</p></dd><dt><span class="term">--logfile=&lt;filename&gt;</span></dt><dd><p>
This is the file where ctdbd will write its log. This is usually /var/log/log.ctdb .
</p></dd><dt><span class="term">--listen=&lt;address&gt;</span></dt><dd><p>
This specifies which ip address ctdb will bind to. By default ctdbd will bind to the first address it finds in the /etc/ctdb/nodes file and which is also present on the local system in which case you do not need to provide this option.
</p><p>
This option is only required when you want to run multiple ctdbd daemons/nodes on the same physical host in which case there would be multiple entries in /etc/ctdb/nodes what would match a local interface.
</p></dd><dt><span class="term">--transport=&lt;STRING&gt;</span></dt><dd><p>
This option specifies which transport to use for ctdbd internode communications. The default is "tcp".
</p><p>
Suported transports are "tcp" and "infiniband".
</p></dd><dt><span class="term">--socket=&lt;filename&gt;</span></dt><dd><p>
This specifies the name of the domain socket that ctdbd will create. This socket is used for local clients to attach to and communicate with the ctdbd daemon.
</p><p>
The default is /tmp/ctdb.socket . You only need to use this option if you plan to run multiple ctdbd daemons on the same physical host.
</p></dd><dt><span class="term">-d --debug=&lt;DEBUGLEVEL&gt;</span></dt><dd><p>
This option sets the debuglevel on the ctdbd daemon which controls what will be written to the logfile. The default is 0 which will only log important events and errors. A larger number will provide additional logging.
</p></dd><dt><span class="term">--torture</span></dt><dd><p>
This option is only used for development and testing of ctdbd. It adds artificial errors and failures to the common codepaths in ctdbd to verify that ctdbd can recover correctly for failures.
</p><p>
You do NOT want to use this option unless you are developing and testing new functionality in ctdbd.
</p></dd></dl></div></div><div class="refsect1" lang="en"><a name="id2490376"></a><h2>Private vs Public addresses</h2><p>
When used for ip takeover in a HA environment, each node in a ctdb
cluster has two ip addresses assigned to it. One private and one public.
</p><div class="refsect2" lang="en"><a name="id2490386"></a><h3>Private address</h3><p>
This is the physical ip address of the node which is configured in
linux and attached to a physical interface. This address uniquely
identifies a physical node in the cluster and is the ip addresses
that ctdbd will use to communicate with the ctdbd daemons on the
other nodes in the cluster.
</p><p>
The private addresses are configured in /etc/ctdb/nodes
(unless the --nlist option is used) and contain one line for each
node in the cluster. Each line contains the private ip address for one
node in the cluster.
</p><p>
Each node is assigned an internal node number which corresponds to
which line in the nodes file that has the local private address
of the node.
</p><p>
Since the private addresses are only available to the network when the
corresponding node is up and running you should not use these addresses
for clients to connect to services provided by the cluster. Instead
client applications should only attach to the public addresses since
these are guaranteed to always be available.
</p>
Example /etc/ctdb/nodes for a four node cluster:
<pre class="screen">
10.1.1.1
10.1.1.2
10.1.1.3
10.1.1.4
</pre></div><div class="refsect2" lang="en"><a name="id2490432"></a><h3>Public address</h3><p>
A public address on the other hand is not attached to an interface.
This address is managed by ctdbd itself and is attached/detached to
a physical node at runtime. You should NOT have this address configured
to an interface in linux. Let ctdbd manage these addresses.
</p><p>
The ctdb cluster will assign/reassign these public addresses across the
available nodes in the cluster. When one node fails, its public address
will be migrated to and taken over by a different node in the cluster
to ensure that all public addresses are always available to clients.
</p><p>
These addresses are not physically attached to a specific node.
The 'ctdb ip' command can be used to view the current assignment of
public addresses and which physical node is currently serving it.
</p><p>
By default, each node will when operational always serve its primary
public address which is the corresponding line for that node number
in the public addresses file. I.e. as long as node X is available and
fully oprational it will always be the node that serves the
corresponding public address.
</p><p>
The list of public addresses also contain the netmask for that address.
the reason for this is because ctdbd needs to know which mask to use
when it adds/removes the address from a physical node. This netmask
is also used by ctdbd when making decisions on which node should take
over a public ip address for a failed node.
A node will only be allowed to take over a public address from a
different node IFF that public address resides in the same subnet
as the primary public address for that node.
</p>
Example /etc/ctdb/public_addresses for a four node cluster:
<pre class="screen">
11.1.1.1/24
11.1.1.2/24
11.1.2.1/24
11.1.2.2/24
</pre><p>
In this example, if node 3 fails, its public address can be taken over
by node 2 since node 2 is on the same subnet as 3 but not by node 0 or
node 1 since node 0 and 1 are both on a different subnet from node 3.
</p></div></div><div class="refsect1" lang="en"><a name="id2536612"></a><h2>Node status</h2><p>
The current status of each node in the cluster can be viewed by the
'ctdb status' command.
</p><p>
There are five possible for a node.
</p><p>
OK - This node is fully functional.
</p><p>
DISCONNECTED - This node could not be connected through the network
and is currently not parcipitating in the cluster. If there is a
public IP address associated with this node it should have been taken
over by a different node. No services are running on this node.
</p><p>
DISABLED - This node has been administratively disabled. This node is
still functional and participates in the CTDB cluster but its IP
addresses have been taken over by a different node and no services are
currently being hosted.
</p><p>
UNHEALTHY - A service provided by this node is malfunctioning and should
be investigated. The CTDB daemon itself is operational and participates
in the cluster. Its public IP address has been taken over by a different
node and no services are currently being hosted. All unhealthy nodes
should be investigated and require an administrative action to rectify.
</p><p>
BANNED - This node failed too many recovery attempts and has been banned
from participating in the cluster for a period of RecoveryBanPeriod
seconds. Any public IP address has been taken over by other nodes. This
node does not provide any services. All banned nodes should be
investigated and require an administrative action to rectify. This node
does not perticipate in the CTDB cluster but can still be communicated
with. I.e. ctdb commands can be sent to it.
</p></div><div class="refsect1" lang="en"><a name="id2536669"></a><h2>SEE ALSO</h2><p>
ctdb(1), onnode(1)
<a href="http://ctdb.samba.org/" target="_top">http://ctdb.samba.org/</a>
</p></div><div class="refsect1" lang="en"><a name="id2536682"></a><h2>COPYRIGHT/LICENSE</h2><div class="literallayout"><p><br>
Copyright (C) Andrew Tridgell 2007<br>
Copyright (C) Ronnie sahlberg 2007<br>
<br>
This program is free software; you can redistribute it and/or modify<br>
it under the terms of the GNU General Public License as published by<br>
the Free Software Foundation; either version 3 of the License, or (at<br>
your option) any later version.<br>
<br>
This program is distributed in the hope that it will be useful, but<br>
WITHOUT ANY WARRANTY; without even the implied warranty of<br>
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU<br>
General Public License for more details.<br>
<br>
You should have received a copy of the GNU General Public License<br>
along with this program; if not, see http://www.gnu.org/licenses/.<br>
</p></div></div></div></body></html>

View File

@ -1,379 +0,0 @@
<?xml version="1.0" encoding="iso-8859-1"?>
<!DOCTYPE refentry PUBLIC "-//Samba-Team//DTD DocBook V4.2-Based Variant V1.0//EN" "http://www.samba.org/samba/DTD/samba-doc">
<refentry id="ctdbd.1">
<refmeta>
<refentrytitle>ctdbd</refentrytitle>
<manvolnum>1</manvolnum>
</refmeta>
<refnamediv>
<refname>ctdbd</refname>
<refpurpose>The CTDB cluster daemon</refpurpose>
</refnamediv>
<refsynopsisdiv>
<cmdsynopsis>
<command>ctdbd</command>
</cmdsynopsis>
<cmdsynopsis>
<command>ctdbd</command>
<arg choice="req">--reclock=&lt;filename&gt;</arg>
<arg choice="req">--nlist=&lt;filename&gt;</arg>
<arg choice="req">--dbdir=&lt;directory&gt;</arg>
<arg choice="opt">-? --help</arg>
<arg choice="opt">--usage</arg>
<arg choice="opt">-i --interactive</arg>
<arg choice="opt">--public-addresses=&lt;filename&gt;</arg>
<arg choice="opt">--public-interface=&lt;interface&gt;</arg>
<arg choice="opt">--event-script=&lt;filename&gt;</arg>
<arg choice="opt">--logfile=&lt;filename&gt;</arg>
<arg choice="opt">--listen=&lt;address&gt;</arg>
<arg choice="opt">--transport=&lt;STRING&gt;</arg>
<arg choice="opt">--socket=&lt;filename&gt;</arg>
<arg choice="opt">-d --debug=&lt;INTEGER&gt;</arg>
<arg choice="opt">--torture</arg>
</cmdsynopsis>
</refsynopsisdiv>
<refsect1><title>DESCRIPTION</title>
<para>
ctdbd is the main ctdb daemon.
</para>
<para>
ctdbd provides a clustered version of the TDB database with automatic rebuild/recovery of the databases upon nodefailures.
</para>
<para>
Combined with a cluster filesystem ctdbd provides a full HA environment for services such as clustered Samba and NFS as well as other services.
</para>
<para>
ctdbd provides monitoring of all nodes in the cluster and automatically reconfigures the cluster and recovers upon node failures.
</para>
<para>
ctdbd is the main component in clustered Samba that provides a high-awailability load-sharing CIFS server cluster.
</para>
</refsect1>
<refsect1>
<title>OPTIONS</title>
<variablelist>
<varlistentry><term>-? --help</term>
<listitem>
<para>
Print some help text to the screen.
</para>
</listitem>
</varlistentry>
<varlistentry><term>--usage</term>
<listitem>
<para>
Print useage information to the screen.
</para>
</listitem>
</varlistentry>
<varlistentry><term>--reclock=&lt;filename&gt;</term>
<listitem>
<para>
This is the name of the lock file stored of the shared cluster filesystem that ctdbd uses to arbitrate which node has the role of recovery-master.
This file must be stored on shared storage.
</para>
</listitem>
</varlistentry>
<varlistentry><term>--nlist=&lt;filename&gt;</term>
<listitem>
<para>
This file contains a list of the private ip addresses of every node in the cluster. There is one line/ip address for each node. This file must be the same for all nodes in the cluster.
</para>
<para>
This file is usually /etc/ctdb/nodes .
</para>
</listitem>
</varlistentry>
<varlistentry><term>--dbdir=&lt;directory&gt;</term>
<listitem>
<para>
This is the directory on local storage where ctdbd keeps the local
copy of the TDB databases. This directory is local for each node and should not be stored on the shared cluster filesystem.
</para>
<para>
This directory would usually be /var/ctdb .
</para>
</listitem>
</varlistentry>
<varlistentry><term>-i --interactive</term>
<listitem>
<para>
By default ctdbd will detach itself from the shell and run in
the background as a daemon. This option makes ctdbd to start in interactive mode.
</para>
</listitem>
</varlistentry>
<varlistentry><term>--public_addresses=&lt;filename&gt;</term>
<listitem>
<para>
When used with IP takeover this specifies a file containing the public ip addresses to use on the cluster. This file contains one entry for each node in the cluster.
</para>
<para>
This is usually the file /etc/ctdb/public_addresses
</para>
</listitem>
</varlistentry>
<varlistentry><term>--public-interface=&lt;interface&gt;</term>
<listitem>
<para>
When used with IP takeover this option specifies which physical interface should be used to attach the public addresses to.
</para>
</listitem>
</varlistentry>
<varlistentry><term>--event-script=&lt;filename&gt;</term>
<listitem>
<para>
This option is used to specify which events script that ctdbd will
use to manage services when the cluster configuration changes.
</para>
<para>
This will normally be /etc/ctdb/events which is part of the ctdb distribution.
</para>
</listitem>
</varlistentry>
<varlistentry><term>--logfile=&lt;filename&gt;</term>
<listitem>
<para>
This is the file where ctdbd will write its log. This is usually /var/log/log.ctdb .
</para>
</listitem>
</varlistentry>
<varlistentry><term>--listen=&lt;address&gt;</term>
<listitem>
<para>
This specifies which ip address ctdb will bind to. By default ctdbd will bind to the first address it finds in the /etc/ctdb/nodes file and which is also present on the local system in which case you do not need to provide this option.
</para>
<para>
This option is only required when you want to run multiple ctdbd daemons/nodes on the same physical host in which case there would be multiple entries in /etc/ctdb/nodes what would match a local interface.
</para>
</listitem>
</varlistentry>
<varlistentry><term>--transport=&lt;STRING&gt;</term>
<listitem>
<para>
This option specifies which transport to use for ctdbd internode communications. The default is "tcp".
</para>
<para>
Suported transports are "tcp" and "infiniband".
</para>
</listitem>
</varlistentry>
<varlistentry><term>--socket=&lt;filename&gt;</term>
<listitem>
<para>
This specifies the name of the domain socket that ctdbd will create. This socket is used for local clients to attach to and communicate with the ctdbd daemon.
</para>
<para>
The default is /tmp/ctdb.socket . You only need to use this option if you plan to run multiple ctdbd daemons on the same physical host.
</para>
</listitem>
</varlistentry>
<varlistentry><term>-d --debug=&lt;DEBUGLEVEL&gt;</term>
<listitem>
<para>
This option sets the debuglevel on the ctdbd daemon which controls what will be written to the logfile. The default is 0 which will only log important events and errors. A larger number will provide additional logging.
</para>
</listitem>
</varlistentry>
<varlistentry><term>--torture</term>
<listitem>
<para>
This option is only used for development and testing of ctdbd. It adds artificial errors and failures to the common codepaths in ctdbd to verify that ctdbd can recover correctly for failures.
</para>
<para>
You do NOT want to use this option unless you are developing and testing new functionality in ctdbd.
</para>
</listitem>
</varlistentry>
</variablelist>
</refsect1>
<refsect1><title>Private vs Public addresses</title>
<para>
When used for ip takeover in a HA environment, each node in a ctdb
cluster has two ip addresses assigned to it. One private and one public.
</para>
<refsect2><title>Private address</title>
<para>
This is the physical ip address of the node which is configured in
linux and attached to a physical interface. This address uniquely
identifies a physical node in the cluster and is the ip addresses
that ctdbd will use to communicate with the ctdbd daemons on the
other nodes in the cluster.
</para>
<para>
The private addresses are configured in /etc/ctdb/nodes
(unless the --nlist option is used) and contain one line for each
node in the cluster. Each line contains the private ip address for one
node in the cluster.
</para>
<para>
Each node is assigned an internal node number which corresponds to
which line in the nodes file that has the local private address
of the node.
</para>
<para>
Since the private addresses are only available to the network when the
corresponding node is up and running you should not use these addresses
for clients to connect to services provided by the cluster. Instead
client applications should only attach to the public addresses since
these are guaranteed to always be available.
</para>
Example /etc/ctdb/nodes for a four node cluster:
<screen format="linespecific">
10.1.1.1
10.1.1.2
10.1.1.3
10.1.1.4
</screen>
</refsect2>
<refsect2><title>Public address</title>
<para>
A public address on the other hand is not attached to an interface.
This address is managed by ctdbd itself and is attached/detached to
a physical node at runtime. You should NOT have this address configured
to an interface in linux. Let ctdbd manage these addresses.
</para>
<para>
The ctdb cluster will assign/reassign these public addresses across the
available nodes in the cluster. When one node fails, its public address
will be migrated to and taken over by a different node in the cluster
to ensure that all public addresses are always available to clients.
</para>
<para>
These addresses are not physically attached to a specific node.
The 'ctdb ip' command can be used to view the current assignment of
public addresses and which physical node is currently serving it.
</para>
<para>
By default, each node will when operational always serve its primary
public address which is the corresponding line for that node number
in the public addresses file. I.e. as long as node X is available and
fully oprational it will always be the node that serves the
corresponding public address.
</para>
<para>
The list of public addresses also contain the netmask for that address.
the reason for this is because ctdbd needs to know which mask to use
when it adds/removes the address from a physical node. This netmask
is also used by ctdbd when making decisions on which node should take
over a public ip address for a failed node.
A node will only be allowed to take over a public address from a
different node IFF that public address resides in the same subnet
as the primary public address for that node.
</para>
Example /etc/ctdb/public_addresses for a four node cluster:
<screen format="linespecific">
11.1.1.1/24
11.1.1.2/24
11.1.2.1/24
11.1.2.2/24
</screen>
<para>
In this example, if node 3 fails, its public address can be taken over
by node 2 since node 2 is on the same subnet as 3 but not by node 0 or
node 1 since node 0 and 1 are both on a different subnet from node 3.
</para>
</refsect2>
</refsect1>
<refsect1><title>Node status</title>
<para>
The current status of each node in the cluster can be viewed by the
'ctdb status' command.
</para>
<para>
There are five possible for a node.
</para>
<para>
OK - This node is fully functional.
</para>
<para>
DISCONNECTED - This node could not be connected through the network
and is currently not parcipitating in the cluster. If there is a
public IP address associated with this node it should have been taken
over by a different node. No services are running on this node.
</para>
<para>
DISABLED - This node has been administratively disabled. This node is
still functional and participates in the CTDB cluster but its IP
addresses have been taken over by a different node and no services are
currently being hosted.
</para>
<para>
UNHEALTHY - A service provided by this node is malfunctioning and should
be investigated. The CTDB daemon itself is operational and participates
in the cluster. Its public IP address has been taken over by a different
node and no services are currently being hosted. All unhealthy nodes
should be investigated and require an administrative action to rectify.
</para>
<para>
BANNED - This node failed too many recovery attempts and has been banned
from participating in the cluster for a period of RecoveryBanPeriod
seconds. Any public IP address has been taken over by other nodes. This
node does not provide any services. All banned nodes should be
investigated and require an administrative action to rectify. This node
does not perticipate in the CTDB cluster but can still be communicated
with. I.e. ctdb commands can be sent to it.
</para>
</refsect1>
<refsect1><title>SEE ALSO</title>
<para>
ctdb(1), onnode(1)
<ulink url="http://ctdb.samba.org/"/>
</para>
</refsect1>
<refsect1><title>COPYRIGHT/LICENSE</title>
<literallayout>
Copyright (C) Andrew Tridgell 2007
Copyright (C) Ronnie sahlberg 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at
your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see http://www.gnu.org/licenses/.
</literallayout>
</refsect1>
</refentry>

View File

@ -1,79 +0,0 @@
.\" Title: onnode
.\" Author:
.\" Generator: DocBook XSL Stylesheets v1.72.0 <http://docbook.sf.net/>
.\" Date: 07/10/2007
.\" Manual:
.\" Source:
.\"
.TH "ONNODE" "1" "07/10/2007" "" ""
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
.ad l
.SH "NAME"
onnode \- run commands on ctdb nodes
.SH "SYNOPSIS"
.HP 24
\fBonnode NODE COMMAND ...\fR
.SH "DESCRIPTION"
.PP
onnode is a utility to run commands on a specific node of a CTDB cluster, or on all nodes.
.PP
The NODE option specifies which node to run a command on. You can specify a numeric node number (from 0 to N\-1) or the special node 'all'.
.PP
The COMMAND can be any shell command. The onnode utility uses ssh or rsh to connect to the remote nodes and run the command.
.PP
If the COMMAND starts with a /, then the command is run via the 'at' service. Otherwise the command is run in the foreground.
.SH "EXAMPLES"
.PP
The following command would show the process ID of ctdb on all nodes
.sp
.RS 4
.nf
onnode all pidof ctdbd
.fi
.RE
.PP
The following command would show the last 5 lines of log on each node, preceded by the nodes hostname
.sp
.RS 4
.nf
onnode all "hostname; tail \-5 /var/log/log.ctdb"
.fi
.RE
.PP
The following command would restart the ctdb service on all nodes.
.sp
.RS 4
.nf
onnode all service ctdb restart
.fi
.RE
.SH "SEE ALSO"
.PP
ctdbd(1), ctdb(1),
\fI\%http://ctdb.samba.org/\fR
.SH "COPYRIGHT/LICENSE"
.sp
.RS 4
.nf
Copyright (C) Andrew Tridgell 2007
Copyright (C) Ronnie sahlberg 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at
your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see http://www.gnu.org/licenses/.
.fi
.RE

View File

@ -1,45 +0,0 @@
<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>onnode</title><meta name="generator" content="DocBook XSL Stylesheets V1.72.0"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="refentry" lang="en"><a name="onnode.1"></a><div class="titlepage"></div><div class="refnamediv"><h2>Name</h2><p>onnode &#8212; run commands on ctdb nodes</p></div><div class="refsynopsisdiv"><h2>Synopsis</h2><div class="cmdsynopsis"><p><code class="command">onnode NODE COMMAND ...</code> </p></div></div><div class="refsect1" lang="en"><a name="id2490457"></a><h2>DESCRIPTION</h2><p>
onnode is a utility to run commands on a specific node of a CTDB
cluster, or on all nodes.
</p><p>
The NODE option specifies which node to run a command on. You
can specify a numeric node number (from 0 to N-1) or the special
node 'all'.
</p><p>
The COMMAND can be any shell command. The onnode utility uses
ssh or rsh to connect to the remote nodes and run the command.
</p><p>
If the COMMAND starts with a /, then the command is run via the
'at' service. Otherwise the command is run in the foreground.
</p></div><div class="refsect1" lang="en"><a name="id2487793"></a><h2>EXAMPLES</h2><p>
The following command would show the process ID of ctdb on all nodes
</p><pre class="screen">
onnode all pidof ctdbd
</pre><p>
The following command would show the last 5 lines of log on each
node, preceded by the nodes hostname
</p><pre class="screen">
onnode all "hostname; tail -5 /var/log/log.ctdb"
</pre><p>
The following command would restart the ctdb service on all nodes.
</p><pre class="screen">
onnode all service ctdb restart
</pre></div><div class="refsect1" lang="en"><a name="id2488691"></a><h2>SEE ALSO</h2><p>
ctdbd(1), ctdb(1), <a href="http://ctdb.samba.org/" target="_top">http://ctdb.samba.org/</a>
</p></div><div class="refsect1" lang="en"><a name="id2488704"></a><h2>COPYRIGHT/LICENSE</h2><div class="literallayout"><p><br>
Copyright (C) Andrew Tridgell 2007<br>
Copyright (C) Ronnie sahlberg 2007<br>
<br>
This program is free software; you can redistribute it and/or modify<br>
it under the terms of the GNU General Public License as published by<br>
the Free Software Foundation; either version 3 of the License, or (at<br>
your option) any later version.<br>
<br>
This program is distributed in the hope that it will be useful, but<br>
WITHOUT ANY WARRANTY; without even the implied warranty of<br>
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU<br>
General Public License for more details.<br>
<br>
You should have received a copy of the GNU General Public License<br>
along with this program; if not, see http://www.gnu.org/licenses/.<br>
</p></div></div></div></body></html>

View File

@ -1,91 +0,0 @@
<?xml version="1.0" encoding="iso-8859-1"?>
<!DOCTYPE refentry PUBLIC "-//Samba-Team//DTD DocBook V4.2-Based Variant V1.0//EN" "http://www.samba.org/samba/DTD/samba-doc">
<refentry id="onnode.1">
<refmeta>
<refentrytitle>onnode</refentrytitle>
<manvolnum>1</manvolnum>
</refmeta>
<refnamediv>
<refname>onnode</refname>
<refpurpose>run commands on ctdb nodes</refpurpose>
</refnamediv>
<refsynopsisdiv>
<cmdsynopsis>
<command>onnode NODE COMMAND ...</command>
</cmdsynopsis>
</refsynopsisdiv>
<refsect1><title>DESCRIPTION</title>
<para>
onnode is a utility to run commands on a specific node of a CTDB
cluster, or on all nodes.
</para>
<para>
The NODE option specifies which node to run a command on. You
can specify a numeric node number (from 0 to N-1) or the special
node 'all'.
</para>
<para>
The COMMAND can be any shell command. The onnode utility uses
ssh or rsh to connect to the remote nodes and run the command.
</para>
<para>
If the COMMAND starts with a /, then the command is run via the
'at' service. Otherwise the command is run in the foreground.
</para>
</refsect1>
<refsect1><title>EXAMPLES</title>
<para>
The following command would show the process ID of ctdb on all nodes
</para>
<screen format="linespecific">
onnode all pidof ctdbd
</screen>
<para>
The following command would show the last 5 lines of log on each
node, preceded by the nodes hostname
</para>
<screen format="linespecific">
onnode all "hostname; tail -5 /var/log/log.ctdb"
</screen>
<para>
The following command would restart the ctdb service on all nodes.
</para>
<screen format="linespecific">
onnode all service ctdb restart
</screen>
</refsect1>
<refsect1><title>SEE ALSO</title>
<para>
ctdbd(1), ctdb(1), <ulink url="http://ctdb.samba.org/"/>
</para>
</refsect1>
<refsect1><title>COPYRIGHT/LICENSE</title>
<literallayout>
Copyright (C) Andrew Tridgell 2007
Copyright (C) Ronnie sahlberg 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at
your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see http://www.gnu.org/licenses/.
</literallayout>
</refsect1>
</refentry>

View File

@ -1,12 +0,0 @@
#!/bin/sh
killall samba
sleep 1
killall -9 samba
killall -9 valgrind
type=$1
xterm -e $type bin/samba -s /home/tridge/samba/samba4.svn/prefix/etc/smb.conf.node1 -M single -i &
xterm -e $type bin/samba -s /home/tridge/samba/samba4.svn/prefix/etc/smb.conf.node2 -M single -i &

View File

@ -1,19 +0,0 @@
Here is a very brief howto for setting up a simple test of the current
ctdb code.
1) put smb.conf.cluster, smb.conf.node1, smb.conf.node2 and nlist.txt in the
prefix/etc/ directory for samba4. Adjust them for your local paths.
2) use the script cluster_start.sh to start the two nodes. You can
pass extra arguments to start under valgrind or gdb. For example:
cluster_start.sh "gdb --args"
3) test using the following command:
bin/locktest //localhost/test //win2003/test -Uadministrator%password -l unclist.txt
with the unclist.txt supplied
NOTE: This is very much experimental code!

View File

@ -1,2 +0,0 @@
127.0.0.1:9001
127.0.0.2:9001

View File

@ -1,26 +0,0 @@
ctdb:shared data = /home/tridge/samba/samba4.svn/prefix/cluster
ctdb:maxlacount = 7
ctdb:brlock = true
; ctdb:selfconnect = true
ctdb:nlist = /home/tridge/samba/samba4.svn/prefix/etc/nlist.txt
dos charset = ascii
unix charset = utf8
display charset = utf8
netbios name = blu
workgroup = bludom
realm = bludom.tridgell.net
panic action = backtrace %PID% %PROG%
js include = /home/tridge/samba/samba4.svn/source/scripting/libjs
setup directory = /home/tridge/samba/samba4.svn/source/setup
tls enabled = yes
server role = domain controller
posix:sharedelay = 100000
server services = smb
bind interfaces only = true
name resolve order = bcast
smb ports = 445
[test]
ntvfs handler = unixuid posix
path = /home/tridge/prefix/testdir
read only = No

View File

@ -1,6 +0,0 @@
ctdb:address = 127.0.0.1:9001
interfaces = lo
log file = /home/tridge/samba/samba4.svn/prefix/var/log.node1
pid directory = /home/tridge/samba/samba4.svn/prefix/var/run/node1
lock directory = /home/tridge/samba/samba4.svn/prefix/var/run/locks/node1
include = /home/tridge/samba/samba4.svn/prefix/etc/smb.conf.cluster

View File

@ -1,6 +0,0 @@
ctdb:address = 127.0.0.2:9001
interfaces = lo:2
log file = /home/tridge/samba/samba4.svn/prefix/var/log.node2
pid directory = /home/tridge/samba/samba4.svn/prefix/var/run/node2
lock directory = /home/tridge/samba/samba4.svn/prefix/var/run/locks/node1
include = /home/tridge/samba/samba4.svn/prefix/etc/smb.conf.cluster

View File

@ -1,2 +0,0 @@
\\127.0.0.1\test
\\127.0.0.2\test

View File

@ -1,19 +0,0 @@
Compilation
===========
For the configure script, please set the OFED include & library path by e.g.:
export CFLAGS="-I/usr/local/ofed/include -L/usr/local/ofed/lib"
After then:
./configure --enable-infiniband
Example for testing
===================
bin/ctdb_test --transport ib --nlist ../2nodes_rm.txt --listen 10.0.0.1
bin/ctdb_test --transport ib --nlist ../2nodes_rm.txt --listen 10.0.0.2
where 2nodes_rm.txt:
10.0.0.1
10.0.0.2

View File

@ -1,31 +0,0 @@
AC_ARG_ENABLE(--enable-infiniband,
[ --enable-infiniband Turn on infiniband support (default=no)])
HAVE_INFINIBAND=no
if eval "test x$enable_infiniband = xyes"; then
AC_DEFINE(USE_INFINIBAND,1,[Use infiniband])
HAVE_INFINIBAND=yes
INFINIBAND_WRAPPER_OBJ="ib/ibwrapper.o ib/ibw_ctdb.o ib/ibw_ctdb_init.o"
INFINIBAND_LIBS="-lrdmacm -libverbs"
INFINIBAND_BINS="bin/ibwrapper_test"
AC_CHECK_HEADERS(infiniband/verbs.h, [], [
echo "ERROR: you need infiniband/verbs.h when ib enabled!"
exit -1])
AC_CHECK_HEADERS(rdma/rdma_cma.h, [], [
echo "ERROR: you need rdma/rdma_cma.h when ib enabled!"
exit -1])
AC_CHECK_LIB(ibverbs, ibv_create_qp, [], [
echo "ERROR: you need libibverbs when ib enabled!"
exit -1])
AC_CHECK_LIB(rdmacm, rdma_connect, [], [
echo "ERROR: you need librdmacm when ib enabled!"
exit -1])
fi
AC_SUBST(HAVE_INFINIBAND)
AC_SUBST(INFINIBAND_WRAPPER_OBJ)
AC_SUBST(INFINIBAND_LIBS)
AC_SUBST(INFINIBAND_BINS)

View File

@ -1,174 +0,0 @@
/*
* Unix SMB/CIFS implementation.
* Join infiniband wrapper and ctdb.
*
* Copyright (C) Sven Oehme <oehmes@de.ibm.com> 2006
*
* Major code contributions by Peter Somogyi <psomogyi@gamax.hu>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "lib/events/events.h"
#include <system/network.h>
#include <assert.h>
#include "ctdb_private.h"
#include "ibwrapper.h"
#include "ibw_ctdb.h"
int ctdb_ibw_get_address(struct ctdb_context *ctdb,
const char *address, struct in_addr *addr)
{
if (inet_pton(AF_INET, address, addr) <= 0) {
struct hostent *he = gethostbyname(address);
if (he == NULL || he->h_length > sizeof(*addr)) {
ctdb_set_error(ctdb, "invalid nework address '%s'\n",
address);
return -1;
}
memcpy(addr, he->h_addr, he->h_length);
}
return 0;
}
int ctdb_ibw_node_connect(struct ctdb_node *node)
{
struct ctdb_ibw_node *cn = talloc_get_type(node->private_data, struct ctdb_ibw_node);
int rc;
assert(cn!=NULL);
assert(cn->conn!=NULL);
struct sockaddr_in sock_out;
memset(&sock_out, 0, sizeof(struct sockaddr_in));
sock_out.sin_port = htons(node->address.port);
sock_out.sin_family = PF_INET;
if (ctdb_ibw_get_address(node->ctdb, node->address.address, &sock_out.sin_addr)) {
DEBUG(0, ("ctdb_ibw_node_connect failed\n"));
return -1;
}
rc = ibw_connect(cn->conn, &sock_out, node);
if (rc) {
DEBUG(0, ("ctdb_ibw_node_connect/ibw_connect failed - retrying...\n"));
/* try again once a second */
event_add_timed(node->ctdb->ev, node, timeval_current_ofs(1, 0),
ctdb_ibw_node_connect_event, node);
}
/* continues at ibw_ctdb.c/IBWC_CONNECTED in good case */
return 0;
}
void ctdb_ibw_node_connect_event(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data)
{
struct ctdb_node *node = talloc_get_type(private_data, struct ctdb_node);
ctdb_ibw_node_connect(node);
}
int ctdb_ibw_connstate_handler(struct ibw_ctx *ctx, struct ibw_conn *conn)
{
if (ctx!=NULL) {
/* ctx->state changed */
switch(ctx->state) {
case IBWS_INIT: /* ctx start - after ibw_init */
break;
case IBWS_READY: /* after ibw_bind & ibw_listen */
break;
case IBWS_CONNECT_REQUEST: /* after [IBWS_READY + incoming request] */
/* => [(ibw_accept)IBWS_READY | (ibw_disconnect)STOPPED | ERROR] */
if (ibw_accept(ctx, conn, NULL)) {
DEBUG(0, ("connstate_handler/ibw_accept failed\n"));
return -1;
} /* else continue in IBWC_CONNECTED */
break;
case IBWS_STOPPED: /* normal stop <= ibw_disconnect+(IBWS_READY | IBWS_CONNECT_REQUEST) */
/* TODO: have a CTDB upcall for which CTDB should wait in a (final) loop */
break;
case IBWS_ERROR: /* abnormal state; ibw_stop must be called after this */
break;
default:
assert(0);
break;
}
}
if (conn!=NULL) {
/* conn->state changed */
switch(conn->state) {
case IBWC_INIT: /* conn start - internal state */
break;
case IBWC_CONNECTED: { /* after ibw_accept or ibw_connect */
struct ctdb_node *node = talloc_get_type(conn->conn_userdata, struct ctdb_node);
if (node!=NULL) { /* after ibw_connect */
struct ctdb_ibw_node *cn = talloc_get_type(node->private_data, struct ctdb_ibw_node);
node->ctdb->upcalls->node_connected(node);
ctdb_flush_cn_queue(cn);
} else { /* after ibw_accept */
/* NOP in CTDB case */
}
} break;
case IBWC_DISCONNECTED: { /* after ibw_disconnect */
struct ctdb_node *node = talloc_get_type(conn->conn_userdata, struct ctdb_node);
if (node!=NULL)
node->ctdb->upcalls->node_dead(node);
talloc_free(conn);
/* normal + intended disconnect => not reconnecting in this layer */
} break;
case IBWC_ERROR: {
struct ctdb_node *node = talloc_get_type(conn->conn_userdata, struct ctdb_node);
if (node!=NULL) {
struct ctdb_ibw_node *cn = talloc_get_type(node->private_data, struct ctdb_ibw_node);
struct ibw_ctx *ictx = cn->conn->ctx;
DEBUG(10, ("IBWC_ERROR, reconnecting...\n"));
talloc_free(cn->conn); /* internal queue content is destroyed */
cn->conn = (void *)ibw_conn_new(ictx, node);
event_add_timed(node->ctdb->ev, node, timeval_current_ofs(1, 0),
ctdb_ibw_node_connect_event, node);
}
} break;
default:
assert(0);
break;
}
}
return 0;
}
int ctdb_ibw_receive_handler(struct ibw_conn *conn, void *buf, int n)
{
struct ctdb_context *ctdb = talloc_get_type(conn->ctx->ctx_userdata, struct ctdb_context);
void *buf2; /* future TODO: a solution for removal of this */
assert(ctdb!=NULL);
assert(buf!=NULL);
assert(conn!=NULL);
assert(conn->state==IBWC_CONNECTED);
/* so far "buf" is an ib-registered memory area
* and being reused for next receive
* noticed that HL requires talloc-ed memory to be stolen */
buf2 = talloc_zero_size(conn, n);
memcpy(buf2, buf, n);
ctdb->upcalls->recv_pkt(ctdb, (uint8_t *)buf2, (uint32_t)n);
return 0;
}

View File

@ -1,50 +0,0 @@
/*
* Unix SMB/CIFS implementation.
* Join infiniband wrapper and ctdb.
*
* Copyright (C) Sven Oehme <oehmes@de.ibm.com> 2006
*
* Major code contributions by Peter Somogyi <psomogyi@gamax.hu>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
struct ctdb_ibw_msg {
uint8_t *data;
uint32_t length;
struct ctdb_ibw_msg *prev;
struct ctdb_ibw_msg *next;
};
struct ctdb_ibw_node {
struct ibw_conn *conn;
struct ctdb_ibw_msg *queue;
struct ctdb_ibw_msg *queue_last;
int qcnt;
};
int ctdb_ibw_get_address(struct ctdb_context *ctdb,
const char *address, struct in_addr *addr);
int ctdb_ibw_connstate_handler(struct ibw_ctx *ctx, struct ibw_conn *conn);
int ctdb_ibw_receive_handler(struct ibw_conn *conn, void *buf, int n);
int ctdb_ibw_node_connect(struct ctdb_node *node);
void ctdb_ibw_node_connect_event(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data);
int ctdb_flush_cn_queue(struct ctdb_ibw_node *cn);
int ctdb_ibw_init(struct ctdb_context *ctdb);

View File

@ -1,237 +0,0 @@
/*
* Unix SMB/CIFS implementation.
* Join infiniband wrapper and ctdb.
*
* Copyright (C) Sven Oehme <oehmes@de.ibm.com> 2006
*
* Major code contributions by Peter Somogyi <psomogyi@gamax.hu>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "lib/events/events.h"
#include <system/network.h>
#include <assert.h>
#include "ctdb_private.h"
#include "ibwrapper.h"
#include "ibw_ctdb.h"
static int ctdb_ibw_listen(struct ctdb_context *ctdb, int backlog)
{
struct ibw_ctx *ictx = talloc_get_type(ctdb->private_data, struct ibw_ctx);
struct sockaddr_in my_addr;
assert(ictx!=NULL);
memset(&my_addr, 0, sizeof(struct sockaddr_in));
my_addr.sin_port = htons(ctdb->address.port);
my_addr.sin_family = PF_INET;
if (ctdb_ibw_get_address(ctdb, ctdb->address.address, &my_addr.sin_addr))
return -1;
if (ibw_bind(ictx, &my_addr)) {
DEBUG(0, ("ctdb_ibw_listen: ibw_bind failed\n"));
return -1;
}
if (ibw_listen(ictx, backlog)) {
DEBUG(0, ("ctdb_ibw_listen: ibw_listen failed\n"));
return -1;
}
return 0;
}
/*
* initialise ibw portion of a ctdb node
*/
static int ctdb_ibw_add_node(struct ctdb_node *node)
{
struct ibw_ctx *ictx = talloc_get_type(node->ctdb->private_data, struct ibw_ctx);
struct ctdb_ibw_node *cn = talloc_zero(node, struct ctdb_ibw_node);
assert(cn!=NULL);
cn->conn = ibw_conn_new(ictx, node);
node->private_data = (void *)cn;
return (cn->conn!=NULL ? 0 : -1);
}
/*
* initialise infiniband
*/
static int ctdb_ibw_initialise(struct ctdb_context *ctdb)
{
int i, ret;
ret = ctdb_ibw_init(ctdb);
if (ret != 0) {
return ret;
}
for (i=0; i<ctdb->num_nodes; i++) {
if (ctdb_ibw_add_node(ctdb->nodes[i]) != 0) {
DEBUG(0, ("methods->add_node failed at %d\n", i));
return -1;
}
}
/* listen on our own address */
if (ctdb_ibw_listen(ctdb, 10)) /* TODO: backlog as param */
return -1;
return 0;
}
/*
* Start infiniband
*/
static int ctdb_ibw_start(struct ctdb_context *ctdb)
{
int i, ret;
/* everything async here */
for (i=0;i<ctdb->num_nodes;i++) {
struct ctdb_node *node = ctdb->nodes[i];
if (!ctdb_same_address(&ctdb->address, &node->address)) {
ctdb_ibw_node_connect(node);
}
}
return 0;
}
static int ctdb_ibw_send_pkt(struct ibw_conn *conn, uint8_t *data, uint32_t length)
{
void *buf, *key;
if (ibw_alloc_send_buf(conn, &buf, &key, length)) {
DEBUG(0, ("queue_pkt/ibw_alloc_send_buf failed\n"));
return -1;
}
memcpy(buf, data, length);
return ibw_send(conn, buf, key, length);
}
int ctdb_flush_cn_queue(struct ctdb_ibw_node *cn)
{
struct ctdb_ibw_msg *p;
int rc = 0;
while(cn->queue) {
p = cn->queue;
rc = ctdb_ibw_send_pkt(cn->conn, p->data, p->length);
if (rc)
return -1; /* will be retried later when conn is up */
DLIST_REMOVE(cn->queue, p);
cn->qcnt--;
talloc_free(p); /* it will talloc_free p->data as well */
}
assert(cn->qcnt==0);
/* cn->queue_last = NULL is not needed - see DLIST_ADD_AFTER */
return rc;
}
static int ctdb_ibw_queue_pkt(struct ctdb_node *node, uint8_t *data, uint32_t length)
{
struct ctdb_ibw_node *cn = talloc_get_type(node->private_data, struct ctdb_ibw_node);
int rc;
assert(length>=sizeof(uint32_t));
assert(cn!=NULL);
if (cn->conn==NULL) {
DEBUG(0, ("ctdb_ibw_queue_pkt: conn is NULL\n"));
return -1;
}
if (cn->conn->state==IBWC_CONNECTED) {
rc = ctdb_ibw_send_pkt(cn->conn, data, length);
} else {
struct ctdb_ibw_msg *p = talloc_zero(cn, struct ctdb_ibw_msg);
p->data = talloc_memdup(p, data, length);
p->length = length;
DLIST_ADD_AFTER(cn->queue, p, cn->queue_last);
cn->queue_last = p;
cn->qcnt++;
rc = 0;
}
return rc;
}
/*
* transport packet allocator - allows transport to control memory for packets
*/
static void *ctdb_ibw_allocate_pkt(TALLOC_CTX *mem_ctx, size_t size)
{
/* TODO: use ibw_alloc_send_buf instead... */
return talloc_size(mem_ctx, size);
}
#ifdef __NOTDEF__
static int ctdb_ibw_stop(struct ctdb_context *cctx)
{
struct ibw_ctx *ictx = talloc_get_type(cctx->private_data, struct ibw_ctx);
assert(ictx!=NULL);
return ibw_stop(ictx);
}
#endif /* __NOTDEF__ */
static const struct ctdb_methods ctdb_ibw_methods = {
.initialise= ctdb_ibw_initialise,
.start = ctdb_ibw_start,
.queue_pkt = ctdb_ibw_queue_pkt,
.add_node = ctdb_ibw_add_node,
.allocate_pkt = ctdb_ibw_allocate_pkt,
// .stop = ctdb_ibw_stop
};
/*
* initialise ibw portion of ctdb
*/
int ctdb_ibw_init(struct ctdb_context *ctdb)
{
struct ibw_ctx *ictx;
DEBUG(10, ("ctdb_ibw_init invoked...\n"));
ictx = ibw_init(
NULL, //struct ibw_initattr *attr, /* TODO */
0, //int nattr, /* TODO */
ctdb,
ctdb_ibw_connstate_handler,
ctdb_ibw_receive_handler,
ctdb->ev);
if (ictx==NULL) {
DEBUG(0, ("ctdb_ibw_init: ibw_init failed\n"));
return -1;
}
ctdb->methods = &ctdb_ibw_methods;
ctdb->private_data = ictx;
DEBUG(10, ("ctdb_ibw_init succeeded.\n"));
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@ -1,218 +0,0 @@
/*
* Unix SMB/CIFS implementation.
* Wrap Infiniband calls.
*
* Copyright (C) Sven Oehme <oehmes@de.ibm.com> 2006
*
* Major code contributions by Peter Somogyi <psomogyi@gamax.hu>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
/* Server communication state */
enum ibw_state_ctx {
IBWS_INIT = 0, /* ctx start - after ibw_init */
IBWS_READY, /* after ibw_bind & ibw_listen */
IBWS_CONNECT_REQUEST, /* after [IBWS_READY + incoming request] */
/* => [(ibw_accept)IBWS_READY | (ibw_disconnect)STOPPED | ERROR] */
IBWS_STOPPED, /* normal stop <= ibw_disconnect+(IBWS_READY | IBWS_CONNECT_REQUEST) */
IBWS_ERROR /* abnormal state; ibw_stop must be called after this */
};
/* Connection state */
struct ibw_ctx {
void *ctx_userdata; /* see ibw_init */
enum ibw_state_ctx state;
void *internal;
struct ibw_conn *conn_list; /* 1st elem of double linked list */
};
enum ibw_state_conn {
IBWC_INIT = 0, /* conn start - internal state */
IBWC_CONNECTED, /* after ibw_accept or ibw_connect */
IBWC_DISCONNECTED, /* after ibw_disconnect */
IBWC_ERROR
};
struct ibw_conn {
struct ibw_ctx *ctx;
enum ibw_state_conn state;
void *conn_userdata; /* see ibw_connect and ibw_accept */
void *internal;
struct ibw_conn *prev, *next;
};
/*
* (name, value) pair for array param of ibw_init
*/
struct ibw_initattr {
const char *name;
const char *value;
};
/*
* Callback function definition which should inform you about
* connection state change
* This callback is invoked whenever server or client connection changes.
* Both <conn> and <ctx> can be NULL if their state didn't change.
* Return nonzero on error.
*/
typedef int (*ibw_connstate_fn_t)(struct ibw_ctx *ctx, struct ibw_conn *conn);
/*
* Callback function definition which should process incoming packets
* This callback is invoked whenever any message arrives.
* Return nonzero on error.
*
* Important: you mustn't store buf pointer for later use.
* Process its contents before returning.
*/
typedef int (*ibw_receive_fn_t)(struct ibw_conn *conn, void *buf, int n);
/*
* settings: array of (name, value) pairs
* where name is one of:
* max_send_wr [default is 256]
* max_recv_wr [default is 1024]
* <...>
*
* Must be called _ONCE_ for each node.
*
* max_msg_size is the maximum size of a message
* (max_send_wr + max_recv_wr) * max_msg_size bytes allocated per connection
*
* returns non-NULL on success
*
* talloc_free must be called for the result in IBWS_STOPPED;
* it will close resources by destructor
* connections(ibw_conn *) must have been closed prior talloc_free
*/
struct ibw_ctx *ibw_init(struct ibw_initattr *attr, int nattr,
void *ctx_userdata,
ibw_connstate_fn_t ibw_connstate,
ibw_receive_fn_t ibw_receive,
struct event_context *ectx);
/*
* Must be called in states of (IBWS_ERROR, IBWS_READY, IBWS_CONNECT_REQUEST)
*
* It will send out disconnect requests and free up ibw_conn structures.
* The ctx->state will transit to IBWS_STOPPED after every conn are disconnected.
* During that time, you mustn't send/recv/disconnect any more.
* Only after ctx->state=IBWS_STOPPED you can talloc_free the ctx.
*/
int ibw_stop(struct ibw_ctx *ctx);
/*************** connection initiation - like stream sockets *****/
/*
* works like socket bind
* needs a normal internet address here
*
* return 0 on success
*/
int ibw_bind(struct ibw_ctx *ctx, struct sockaddr_in *my_addr);
/*
* works like socket listen
* non-blocking
* enables accepting incoming connections (after IBWS_READY)
* (it doesn't touch ctx->state by itself)
*
* returns 0 on success
*/
int ibw_listen(struct ibw_ctx *ctx, int backlog);
/*
* works like socket accept
* initializes a connection to a client
* must be called when state=IBWS_CONNECT_REQUEST
*
* returns 0 on success
*
* You have +1 waiting here: you will get ibw_conn (having the
* same <conn_userdata> member) structure in ibw_connstate_fn_t.
*
* Important: you won't get remote IP address (only internal conn info)
*/
int ibw_accept(struct ibw_ctx *ctx, struct ibw_conn *conn, void *conn_userdata);
/*
* Create a new connection structure
* available for queueing ibw_send
*
* <parent> is needed to be notified by talloc destruct action.
*/
struct ibw_conn *ibw_conn_new(struct ibw_ctx *ctx, TALLOC_CTX *mem_ctx);
/*
* Needs a normal internet address here
* can be called within IBWS_READY|IBWS_CONNECT_REQUEST
*
* returns non-NULL on success
*
* You have +1 waiting here: you will get ibw_conn (having the
* same <conn_userdata> member) structure in ibw_connstate_fn_t.
*/
int ibw_connect(struct ibw_conn *conn, struct sockaddr_in *serv_addr, void *conn_userdata);
/*
* Sends out a disconnect request.
* You should process fds after calling this function
* and then process it with ibw_process_event normally
* until you get conn->state = IBWC_DISCONNECTED
*
* You mustn't talloc_free <conn> yet right after this,
* first wait for IBWC_DISCONNECTED.
*/
int ibw_disconnect(struct ibw_conn *conn);
/************ Infiniband specific event loop wrapping ******************/
/*
* You have to use this buf to fill in before send.
* It's just to avoid memcpy.in ibw_send.
* Use the same (buf, key) pair with ibw_send.
* Don't use more space than maxsize (see ibw_init).
*
* Returns 0 on success.
*/
int ibw_alloc_send_buf(struct ibw_conn *conn, void **buf, void **key, uint32_t len);
/*
* Send the message in one
* Can be invoked any times (should fit into buffers) and at any time
* (in conn->state=IBWC_CONNECTED)
* n must be less or equal than max_msg_size (see ibw_init)
*
* You mustn't use (buf, key) any more for sending.
*/
int ibw_send(struct ibw_conn *conn, void *buf, void *key, uint32_t len);
/*
* Call this after ibw_alloc_send_buf
* when you won't call ibw_send for (buf, key)
* You mustn't use (buf, key) any more.
*/
int ibw_cancel_send_buf(struct ibw_conn *conn, void *buf, void *key);
/*
* Retrieves the last error
* result: always non-zero, mustn't be freed (static)
*/
const char *ibw_getLastError(void);

View File

@ -1,126 +0,0 @@
/*
* Unix SMB/CIFS implementation.
* Wrap Infiniband calls.
*
* Copyright (C) Sven Oehme <oehmes@de.ibm.com> 2006
*
* Major code contributions by Peter Somogyi <psomogyi@gamax.hu>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
struct ibw_opts {
uint32_t max_send_wr;
uint32_t max_recv_wr;
uint32_t recv_bufsize;
uint32_t recv_threshold;
};
struct ibw_wr {
char *buf; /* initialized in ibw_init_memory once per connection */
int wr_id; /* position in wr_index list; also used as wr id */
char *buf_large; /* allocated specially for "large" message */
struct ibv_mr *mr_large;
int ref_cnt; /* reference count for ibw_wc_send to know when to release */
char *queued_msg; /* set at ibw_send - can be different than above */
int queued_ref_cnt; /* instead of adding the same to the queue again */
uint32_t queued_rlen; /* last wins when queued_ref_cnt>0; or simple msg size */
struct ibw_wr *next, *prev; /* in wr_list_avail or wr_list_used */
/* or extra_sent or extra_avail */
struct ibw_wr *qnext, *qprev; /* in queue */
};
struct ibw_ctx_priv {
struct event_context *ectx;
struct ibw_opts opts;
struct rdma_cm_id *cm_id; /* server cm id */
struct rdma_event_channel *cm_channel;
struct fd_event *cm_channel_event;
ibw_connstate_fn_t connstate_func; /* see ibw_init */
ibw_receive_fn_t receive_func; /* see ibw_init */
long pagesize; /* sysconf result for memalign */
};
struct ibw_part {
char *buf; /* talloced memory buffer */
uint32_t bufsize; /* allocated size of buf - always grows */
uint32_t len; /* message part length */
uint32_t to_read; /* 4 or *((uint32_t)buf) if len>=sizeof(uint32_t) */
};
struct ibw_conn_priv {
struct ibv_comp_channel *verbs_channel;
struct fd_event *verbs_channel_event;
struct rdma_cm_id *cm_id; /* client's cm id */
struct ibv_pd *pd;
int is_accepted;
struct ibv_cq *cq; /* qp is in cm_id */
char *buf_send; /* max_send_wr * avg_send_size */
struct ibv_mr *mr_send;
struct ibw_wr *wr_list_avail;
struct ibw_wr *wr_list_used;
struct ibw_wr **wr_index; /* array[0..(qsize-1)] of (ibw_wr *) */
int wr_sent; /* # of send wrs in the CQ */
struct ibw_wr *extra_sent;
struct ibw_wr *extra_avail;
int extra_max; /* max wr_id in the queue */
struct ibw_wr *queue;
/* buf_recv is a ring buffer */
char *buf_recv; /* max_recv_wr * avg_recv_size */
struct ibv_mr *mr_recv;
int recv_index; /* index of the next recv buffer when refilling */
struct ibw_part part;
};
/* remove an element from a list - element doesn't have to be in list. */
#define DLIST_REMOVE2(list, p, prev, next) \
do { \
if ((p) == (list)) { \
(list) = (p)->next; \
if (list) (list)->prev = NULL; \
} else { \
if ((p)->prev) (p)->prev->next = (p)->next; \
if ((p)->next) (p)->next->prev = (p)->prev; \
} \
if ((p) != (list)) (p)->next = (p)->prev = NULL; \
} while (0)
/* hook into the end of the list - needs a tmp pointer */
#define DLIST_ADD_END2(list, p, type, prev, next) \
do { \
if (!(list)) { \
(list) = (p); \
(p)->next = (p)->prev = NULL; \
} else { \
type tmp; \
for (tmp = (list); tmp->next; tmp = tmp->next) ; \
tmp->next = (p); \
(p)->next = NULL; \
(p)->prev = tmp; \
} \
} while (0)

View File

@ -1,659 +0,0 @@
/*
* Unix SMB/CIFS implementation.
* Test the infiniband wrapper.
*
* Copyright (C) Sven Oehme <oehmes@de.ibm.com> 2006
*
* Major code contributions by Peter Somogyi <psomogyi@gamax.hu>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
#include <sys/types.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <netdb.h>
#include <arpa/inet.h>
#include <malloc.h>
#include <assert.h>
#include <unistd.h>
#include <signal.h>
#include <sys/time.h>
#include <time.h>
#include "includes.h"
#include "lib/events/events.h"
#include "ib/ibwrapper.h"
struct ibwtest_ctx {
int is_server;
char *id; /* my id */
struct ibw_initattr *attrs;
int nattrs;
char *opts; /* option string */
struct sockaddr_in *addrs; /* dynamic array of dest addrs */
int naddrs;
unsigned int nsec; /* delta times between messages in nanosec */
unsigned int sleep_usec; /* microsecs to sleep in the main loop to emulate overloading */
uint32_t maxsize; /* maximum variable message size */
int cnt;
int nsent;
int nmsg; /* number of messages to send (client) */
int kill_me;
int stopping;
int error;
struct ibw_ctx *ibwctx;
struct timeval start_time, end_time;
};
struct ibwtest_conn {
char *id;
};
enum testopcode {
TESTOP_SEND_ID = 1,
TESTOP_SEND_TEXT = 2,
TESTOP_SEND_RND = 3
};
int ibwtest_connect_everybody(struct ibwtest_ctx *tcx)
{
struct ibw_conn *conn;
struct ibwtest_conn *tconn = talloc_zero(tcx, struct ibwtest_conn);
int i;
for(i=0; i<tcx->naddrs; i++) {
conn = ibw_conn_new(tcx->ibwctx, tconn);
if (ibw_connect(conn, &tcx->addrs[i], tconn)) {
fprintf(stderr, "ibw_connect error at %d\n", i);
return -1;
}
}
DEBUG(10, ("sent %d connect request...\n", tcx->naddrs));
return 0;
}
int ibwtest_send_id(struct ibw_conn *conn)
{
struct ibwtest_ctx *tcx = talloc_get_type(conn->ctx->ctx_userdata, struct ibwtest_ctx);
char *buf;
void *key;
uint32_t len;
DEBUG(10, ("ibwtest_send_id\n"));
len = sizeof(uint32_t)+strlen(tcx->id)+2;
if (ibw_alloc_send_buf(conn, (void **)&buf, &key, len)) {
DEBUG(0, ("send_id: ibw_alloc_send_buf failed\n"));
return -1;
}
/* first sizeof(uint32_t) size bytes are for length */
*((uint32_t *)buf) = len;
buf[sizeof(uint32_t)] = (char)TESTOP_SEND_ID;
strcpy(buf+sizeof(uint32_t)+1, tcx->id);
if (ibw_send(conn, buf, key, len)) {
DEBUG(0, ("send_id: ibw_send error\n"));
return -1;
}
tcx->nsent++;
return 0;
}
int ibwtest_send_test_msg(struct ibwtest_ctx *tcx, struct ibw_conn *conn, const char *msg)
{
char *buf, *p;
void *key;
uint32_t len;
if (conn->state!=IBWC_CONNECTED)
return 0; /* not yet up */
len = strlen(msg) + 2 + sizeof(uint32_t);
if (ibw_alloc_send_buf(conn, (void **)&buf, &key, len)) {
fprintf(stderr, "send_test_msg: ibw_alloc_send_buf failed\n");
return -1;
}
*((uint32_t *)buf) = len;
p = buf;
p += sizeof(uint32_t);
p[0] = (char)TESTOP_SEND_TEXT;
p++;
strcpy(p, msg);
if (ibw_send(conn, buf, key, len)) {
DEBUG(0, ("send_test_msg: ibw_send error\n"));
return -1;
}
tcx->nsent++;
return 0;
}
unsigned char ibwtest_fill_random(unsigned char *buf, uint32_t size)
{
uint32_t i = size;
unsigned char sum = 0;
unsigned char value;
while(i) {
i--;
value = (unsigned char)(256.0 * (rand() / (RAND_MAX + 1.0)));
buf[i] = value;
sum += value;
}
return sum;
}
unsigned char ibwtest_get_sum(unsigned char *buf, uint32_t size)
{
uint32_t i = size;
unsigned char sum = 0;
while(i) {
i--;
sum += buf[i];
}
return sum;
}
int ibwtest_do_varsize_scenario_conn_size(struct ibwtest_ctx *tcx, struct ibw_conn *conn, uint32_t size)
{
unsigned char *buf;
void *key;
uint32_t len;
unsigned char sum;
len = sizeof(uint32_t) + 1 + size + 1;
if (ibw_alloc_send_buf(conn, (void **)&buf, &key, len)) {
DEBUG(0, ("varsize/ibw_alloc_send_buf failed\n"));
return -1;
}
*((uint32_t *)buf) = len;
buf[sizeof(uint32_t)] = TESTOP_SEND_RND;
sum = ibwtest_fill_random(buf + sizeof(uint32_t) + 1, size);
buf[sizeof(uint32_t) + 1 + size] = sum;
if (ibw_send(conn, buf, key, len)) {
DEBUG(0, ("varsize/ibw_send failed\n"));
return -1;
}
tcx->nsent++;
return 0;
}
int ibwtest_do_varsize_scenario_conn(struct ibwtest_ctx *tcx, struct ibw_conn *conn)
{
uint32_t size;
int i;
for(i=0; i<tcx->nmsg; i++)
{
//size = (uint32_t)((float)(tcx->maxsize) * (rand() / (RAND_MAX + 1.0)));
size = (uint32_t)((float)(tcx->maxsize) * ((float)(i+1)/(float)tcx->nmsg));
if (ibwtest_do_varsize_scenario_conn_size(tcx, conn, size))
return -1;
}
return 0;
}
/*int ibwtest_do_varsize_scenario(ibwtest_ctx *tcx)
{
int rc;
struct ibw_conn *conn;
for(conn=tcx->ibwctx->conn_list; conn!=NULL; conn=conn->next) {
if (conn->state==IBWC_CONNECTED) {
rc = ibwtest_do_varsize_scenario_conn(tcx, conn);
if (rc)
tcx->error = rc;
}
}
}*/
int ibwtest_connstate_handler(struct ibw_ctx *ctx, struct ibw_conn *conn)
{
struct ibwtest_ctx *tcx = NULL; /* userdata */
struct ibwtest_conn *tconn = NULL; /* userdata */
if (ctx) {
tcx = talloc_get_type(ctx->ctx_userdata, struct ibwtest_ctx);
switch(ctx->state) {
case IBWS_INIT:
DEBUG(10, ("test IBWS_INIT\n"));
break;
case IBWS_READY:
DEBUG(10, ("test IBWS_READY\n"));
break;
case IBWS_CONNECT_REQUEST:
DEBUG(10, ("test IBWS_CONNECT_REQUEST\n"));
tconn = talloc_zero(conn, struct ibwtest_conn);
if (ibw_accept(ctx, conn, tconn)) {
DEBUG(0, ("error accepting the connect request\n"));
}
break;
case IBWS_STOPPED:
DEBUG(10, ("test IBWS_STOPPED\n"));
tcx->kill_me = 1; /* main loop can exit */
break;
case IBWS_ERROR:
DEBUG(10, ("test IBWS_ERROR\n"));
ibw_stop(tcx->ibwctx);
break;
default:
assert(0);
break;
}
}
if (conn) {
tconn = talloc_get_type(conn->conn_userdata, struct ibwtest_conn);
switch(conn->state) {
case IBWC_INIT:
DEBUG(10, ("test IBWC_INIT\n"));
break;
case IBWC_CONNECTED:
if (gettimeofday(&tcx->start_time, NULL)) {
DEBUG(0, ("gettimeofday error %d", errno));
return -1;
}
ibwtest_send_id(conn);
break;
case IBWC_DISCONNECTED:
DEBUG(10, ("test IBWC_DISCONNECTED\n"));
talloc_free(conn);
break;
case IBWC_ERROR:
DEBUG(10, ("test IBWC_ERROR %s\n", ibw_getLastError()));
break;
default:
assert(0);
break;
}
}
return 0;
}
int ibwtest_receive_handler(struct ibw_conn *conn, void *buf, int n)
{
struct ibwtest_conn *tconn;
enum testopcode op;
struct ibwtest_ctx *tcx = talloc_get_type(conn->ctx->ctx_userdata, struct ibwtest_ctx);
int rc = 0;
assert(conn!=NULL);
assert(n>=sizeof(uint32_t)+1);
tconn = talloc_get_type(conn->conn_userdata, struct ibwtest_conn);
op = (enum testopcode)((char *)buf)[sizeof(uint32_t)];
if (op==TESTOP_SEND_ID) {
tconn->id = talloc_strdup(tconn, ((char *)buf)+sizeof(uint32_t)+1);
}
if (op==TESTOP_SEND_ID || op==TESTOP_SEND_TEXT) {
DEBUG(11, ("[%d]msg from %s: \"%s\"(%d)\n", op,
tconn->id ? tconn->id : "NULL", ((char *)buf)+sizeof(uint32_t)+1, n));
}
if (tcx->is_server) {
if (op==TESTOP_SEND_RND) {
unsigned char sum;
sum = ibwtest_get_sum((unsigned char *)buf + sizeof(uint32_t) + 1,
n - sizeof(uint32_t) - 2);
DEBUG(11, ("[%d]msg varsize %u/sum %u from %s\n",
op,
n - sizeof(uint32_t) - 2,
(uint32_t)sum,
tconn->id ? tconn->id : "NULL"));
if (sum!=((unsigned char *)buf)[n-1]) {
DEBUG(0, ("ERROR: checksum mismatch %u!=%u\n",
(uint32_t)sum, (uint32_t)((unsigned char *)buf)[n-1]));
ibw_stop(tcx->ibwctx);
goto error;
}
} else if (op!=TESTOP_SEND_ID) {
char *buf2;
void *key2;
/* bounce message regardless what it is */
if (ibw_alloc_send_buf(conn, (void **)&buf2, &key2, n)) {
fprintf(stderr, "ibw_alloc_send_buf error #2\n");
goto error;
}
memcpy(buf2, buf, n);
if (ibw_send(conn, buf2, key2, n)) {
fprintf(stderr, "ibw_send error #2\n");
goto error;
}
tcx->nsent++;
}
} else { /* client: */
if (op==TESTOP_SEND_ID && tcx->maxsize) {
/* send them in one blow */
rc = ibwtest_do_varsize_scenario_conn(tcx, conn);
}
if (tcx->nmsg) {
char msg[26];
sprintf(msg, "hello world %d", tcx->nmsg--);
rc = ibwtest_send_test_msg(tcx, conn, msg);
if (tcx->nmsg==0) {
ibw_stop(tcx->ibwctx);
tcx->stopping = 1;
}
}
}
if (rc)
tcx->error = rc;
return rc;
error:
return -1;
}
void ibwtest_timeout_handler(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data)
{
struct ibwtest_ctx *tcx = talloc_get_type(private_data, struct ibwtest_ctx);
int rc;
if (!tcx->is_server) {
struct ibw_conn *conn;
char msg[50];
/* fill it with something variable... */
sprintf(msg, "hello world %d", tcx->cnt++);
/* send something to everybody... */
for(conn=tcx->ibwctx->conn_list; conn!=NULL; conn=conn->next) {
if (conn->state==IBWC_CONNECTED) {
rc = ibwtest_send_test_msg(tcx, conn, msg);
if (rc)
tcx->error = rc;
}
}
} /* else allow main loop run */
}
static struct ibwtest_ctx *testctx = NULL;
void ibwtest_sigint_handler(int sig)
{
DEBUG(0, ("got SIGINT\n"));
if (testctx) {
if (testctx->ibwctx->state==IBWS_READY ||
testctx->ibwctx->state==IBWS_CONNECT_REQUEST ||
testctx->ibwctx->state==IBWS_ERROR)
{
if (testctx->stopping) {
DEBUG(10, ("forcing exit...\n"));
testctx->kill_me = 1;
} else {
/* mostly expected case */
ibw_stop(testctx->ibwctx);
testctx->stopping = 1;
}
} else
testctx->kill_me = 1;
}
}
int ibwtest_parse_attrs(struct ibwtest_ctx *tcx, char *optext,
struct ibw_initattr **pattrs, int *nattrs, char op)
{
int i = 0, n = 1;
int porcess_next = 1;
char *p, *q;
struct ibw_initattr *attrs = NULL;
*pattrs = NULL;
for(p = optext; *p!='\0'; p++) {
if (*p==',')
n++;
}
attrs = (struct ibw_initattr *)talloc_size(tcx,
n * sizeof(struct ibw_initattr));
for(p = optext; *p!='\0'; p++) {
if (porcess_next) {
attrs[i].name = p;
q = strchr(p, ':');
if (q==NULL) {
fprintf(stderr, "-%c format error\n", op);
return -1;
}
*q = '\0';
attrs[i].value = q + 1;
porcess_next = 0;
i++;
p = q; /* ++ at end */
}
if (*p==',') {
*p = '\0'; /* ++ at end */
porcess_next = 1;
}
}
*pattrs = attrs;
*nattrs = n;
return 0;
}
static int ibwtest_get_address(const char *address, struct in_addr *addr)
{
if (inet_pton(AF_INET, address, addr) <= 0) {
struct hostent *he = gethostbyname(address);
if (he == NULL || he->h_length > sizeof(*addr)) {
DEBUG(0, ("invalid nework address '%s'\n", address));
return -1;
}
memcpy(addr, he->h_addr, he->h_length);
}
return 0;
}
int ibwtest_getdests(struct ibwtest_ctx *tcx, char op)
{
int i;
struct ibw_initattr *attrs = NULL;
struct sockaddr_in *p;
char *tmp;
tmp = talloc_strdup(tcx, optarg);
/* hack to reuse the above ibw_initattr parser */
if (ibwtest_parse_attrs(tcx, tmp, &attrs, &tcx->naddrs, op))
return -1;
tcx->addrs = talloc_size(tcx,
tcx->naddrs * sizeof(struct sockaddr_in));
for(i=0; i<tcx->naddrs; i++) {
p = tcx->addrs + i;
p->sin_family = AF_INET;
if (ibwtest_get_address(attrs[i].name, &p->sin_addr))
return -1;
p->sin_port = htons(atoi(attrs[i].value));
}
return 0;
}
int ibwtest_init_server(struct ibwtest_ctx *tcx)
{
if (tcx->naddrs!=1) {
fprintf(stderr, "incorrect number of addrs(%d!=1)\n", tcx->naddrs);
return -1;
}
if (ibw_bind(tcx->ibwctx, &tcx->addrs[0])) {
DEBUG(0, ("ERROR: ibw_bind failed\n"));
return -1;
}
if (ibw_listen(tcx->ibwctx, 1)) {
DEBUG(0, ("ERROR: ibw_listen failed\n"));
return -1;
}
/* continued at IBWS_READY */
return 0;
}
void ibwtest_usage(struct ibwtest_ctx *tcx, char *name)
{
printf("Usage:\n");
printf("\t%s -i <id> -o {name:value} -d {addr:port} -t nsec -s\n", name);
printf("\t-i <id> is a free text, acting as a server id, max 23 chars [mandatory]\n");
printf("\t-o name1:value1,name2:value2,... is a list of (name, value) pairs\n");
printf("\t-a addr1:port1,addr2:port2,... is a list of destination ip addresses\n");
printf("\t-t nsec delta time between sends in nanosec [default %d]\n", tcx->nsec);
printf("\t\t send message periodically and endless when nsec is non-zero\n");
printf("\t-s server mode (you have to give exactly one -d address:port in this case)\n");
printf("\t-n number of messages to send [default %d]\n", tcx->nmsg);
printf("\t-l usec time to sleep in the main loop [default %d]\n", tcx->sleep_usec);
printf("\t-v max variable msg size in bytes [default %d], 0=don't send var. size\n", tcx->maxsize);
printf("\t-d LogLevel [default %d]\n", LogLevel);
printf("Press ctrl+C to stop the program.\n");
}
int main(int argc, char *argv[])
{
int rc, op;
int result = 1;
struct event_context *ev = NULL;
struct ibwtest_ctx *tcx = NULL;
float usec;
tcx = talloc_zero(NULL, struct ibwtest_ctx);
memset(tcx, 0, sizeof(struct ibwtest_ctx));
tcx->nsec = 0;
tcx->nmsg = 1000;
LogLevel = 0;
/* here is the only case we can't avoid using global... */
testctx = tcx;
signal(SIGINT, ibwtest_sigint_handler);
srand((unsigned)time(NULL));
while ((op=getopt(argc, argv, "i:o:d:m:st:n:l:v:a:")) != -1) {
switch (op) {
case 'i':
tcx->id = talloc_strdup(tcx, optarg);
break;
case 'o':
tcx->opts = talloc_strdup(tcx, optarg);
if (ibwtest_parse_attrs(tcx, tcx->opts, &tcx->attrs,
&tcx->nattrs, op))
goto cleanup;
break;
case 'a':
if (ibwtest_getdests(tcx, op))
goto cleanup;
break;
case 's':
tcx->is_server = 1;
break;
case 't':
tcx->nsec = (unsigned int)atoi(optarg);
break;
case 'n':
tcx->nmsg = atoi(optarg);
break;
case 'l':
tcx->sleep_usec = (unsigned int)atoi(optarg);
break;
case 'v':
tcx->maxsize = (unsigned int)atoi(optarg);
break;
case 'd':
LogLevel = atoi(optarg);
break;
default:
fprintf(stderr, "ERROR: unknown option -%c\n", (char)op);
ibwtest_usage(tcx, argv[0]);
goto cleanup;
}
}
if (tcx->id==NULL) {
ibwtest_usage(tcx, argv[0]);
goto cleanup;
}
ev = s4_event_context_init(NULL);
assert(ev);
tcx->ibwctx = ibw_init(tcx->attrs, tcx->nattrs,
tcx,
ibwtest_connstate_handler,
ibwtest_receive_handler,
ev
);
if (!tcx->ibwctx)
goto cleanup;
if (tcx->is_server)
rc = ibwtest_init_server(tcx);
else
rc = ibwtest_connect_everybody(tcx);
if (rc)
goto cleanup;
while(!tcx->kill_me && !tcx->error) {
if (tcx->nsec) {
event_add_timed(ev, tcx, timeval_current_ofs(0, tcx->nsec),
ibwtest_timeout_handler, tcx);
}
event_loop_once(ev);
if (tcx->sleep_usec)
usleep(tcx->sleep_usec);
}
if (!tcx->is_server && tcx->nsent!=0 && !tcx->error) {
if (gettimeofday(&tcx->end_time, NULL)) {
DEBUG(0, ("gettimeofday error %d\n", errno));
goto cleanup;
}
usec = (tcx->end_time.tv_sec - tcx->start_time.tv_sec) * 1000000 +
(tcx->end_time.tv_usec - tcx->start_time.tv_usec);
printf("usec: %f, nmsg: %d, usec/nmsg: %f\n",
usec, tcx->nsent, usec/(float)tcx->nsent);
}
if (!tcx->error)
result = 0; /* everything OK */
cleanup:
if (tcx)
talloc_free(tcx);
if (ev)
talloc_free(ev);
DEBUG(0, ("exited with code %d\n", result));
return result;
}

View File

@ -1,7 +0,0 @@
extern struct poptOption popt_ctdb_cmdline[];
#define POPT_CTDB_CMDLINE { NULL, 0, POPT_ARG_INCLUDE_TABLE, popt_ctdb_cmdline, 0, "Common ctdb test options:", NULL },
struct ctdb_context *ctdb_cmdline_init(struct event_context *ev);

View File

@ -1,375 +0,0 @@
/*
ctdb database library
Copyright (C) Andrew Tridgell 2006
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _CTDB_H
#define _CTDB_H
#define CTDB_IMMEDIATE_MIGRATION 0x00000001
struct ctdb_call {
int call_id;
TDB_DATA key;
TDB_DATA call_data;
TDB_DATA reply_data;
uint32_t status;
uint32_t flags;
};
/*
structure passed to a ctdb call backend function
*/
struct ctdb_call_info {
TDB_DATA key; /* record key */
TDB_DATA record_data; /* current data in the record */
TDB_DATA *new_data; /* optionally updated record data */
TDB_DATA *call_data; /* optionally passed from caller */
TDB_DATA *reply_data; /* optionally returned by function */
uint32_t status; /* optional reply status - defaults to zero */
};
#define CTDB_ERR_INVALID 1
#define CTDB_ERR_NOMEM 2
/*
ctdb flags
*/
#define CTDB_FLAG_TORTURE (1<<1)
/*
a message handler ID meaning "give me all messages"
*/
#define CTDB_SRVID_ALL (~(uint64_t)0)
/*
srvid type : RECOVERY
*/
#define CTDB_SRVID_RECOVERY 0xF100000000000000LL
/*
a message handler ID meaning that the cluster has been reconfigured
*/
#define CTDB_SRVID_RECONFIGURE 0xF200000000000000LL
/*
a message handler ID meaning that an IP address has been released
*/
#define CTDB_SRVID_RELEASE_IP 0xF300000000000000LL
/*
a message ID meaning that a nodes flags have changed
*/
#define CTDB_SRVID_NODE_FLAGS_CHANGED 0xF400000000000000LL
/*
a message ID meaning that a node should be banned
*/
#define CTDB_SRVID_BAN_NODE 0xF500000000000000LL
/*
a message ID meaning that a node should be unbanned
*/
#define CTDB_SRVID_UNBAN_NODE 0xF600000000000000LL
/* used on the domain socket, send a pdu to the local daemon */
#define CTDB_CURRENT_NODE 0xF0000001
/* send a broadcast to all nodes in the cluster, active or not */
#define CTDB_BROADCAST_ALL 0xF0000002
/* send a broadcast to all nodes in the current vnn map */
#define CTDB_BROADCAST_VNNMAP 0xF0000003
/* send a broadcast to all connected nodes */
#define CTDB_BROADCAST_CONNECTED 0xF0000004
struct event_context;
/*
initialise ctdb subsystem
*/
struct ctdb_context *ctdb_init(struct event_context *ev);
/*
choose the transport
*/
int ctdb_set_transport(struct ctdb_context *ctdb, const char *transport);
/*
set the directory for the local databases
*/
int ctdb_set_tdb_dir(struct ctdb_context *ctdb, const char *dir);
/*
set some flags
*/
void ctdb_set_flags(struct ctdb_context *ctdb, unsigned flags);
/*
set max acess count before a dmaster migration
*/
void ctdb_set_max_lacount(struct ctdb_context *ctdb, unsigned count);
/*
tell ctdb what address to listen on, in transport specific format
*/
int ctdb_set_address(struct ctdb_context *ctdb, const char *address);
int ctdb_set_socketname(struct ctdb_context *ctdb, const char *socketname);
/*
tell ctdb what nodes are available. This takes a filename, which will contain
1 node address per line, in a transport specific format
*/
int ctdb_set_nlist(struct ctdb_context *ctdb, const char *nlist);
/*
start the ctdb protocol
*/
int ctdb_start(struct ctdb_context *ctdb);
int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork);
/*
attach to a ctdb database
*/
struct ctdb_db_context *ctdb_attach(struct ctdb_context *ctdb, const char *name);
/*
find an attached ctdb_db handle given a name
*/
struct ctdb_db_context *ctdb_db_handle(struct ctdb_context *ctdb, const char *name);
/*
error string for last ctdb error
*/
const char *ctdb_errstr(struct ctdb_context *);
/* a ctdb call function */
typedef int (*ctdb_fn_t)(struct ctdb_call_info *);
/*
setup a ctdb call function
*/
int ctdb_set_call(struct ctdb_db_context *ctdb_db, ctdb_fn_t fn, uint32_t id);
/*
make a ctdb call. The associated ctdb call function will be called on the DMASTER
for the given record
*/
int ctdb_call(struct ctdb_db_context *ctdb_db, struct ctdb_call *call);
/*
initiate an ordered ctdb cluster shutdown
this function will never return
*/
void ctdb_shutdown(struct ctdb_context *ctdb);
/* return vnn of this node */
uint32_t ctdb_get_vnn(struct ctdb_context *ctdb);
/*
return the number of nodes
*/
uint32_t ctdb_get_num_nodes(struct ctdb_context *ctdb);
/* setup a handler for ctdb messages */
typedef void (*ctdb_message_fn_t)(struct ctdb_context *, uint64_t srvid,
TDB_DATA data, void *);
int ctdb_set_message_handler(struct ctdb_context *ctdb, uint64_t srvid,
ctdb_message_fn_t handler,
void *private_data);
int ctdb_call(struct ctdb_db_context *ctdb_db, struct ctdb_call *call);
struct ctdb_client_call_state *ctdb_call_send(struct ctdb_db_context *ctdb_db, struct ctdb_call *call);
int ctdb_call_recv(struct ctdb_client_call_state *state, struct ctdb_call *call);
/* send a ctdb message */
int ctdb_send_message(struct ctdb_context *ctdb, uint32_t vnn,
uint64_t srvid, TDB_DATA data);
/*
Fetch a ctdb record from a remote node
. Underneath this will force the
dmaster for the record to be moved to the local node.
*/
struct ctdb_record_handle *ctdb_fetch_lock(struct ctdb_db_context *ctdb_db, TALLOC_CTX *mem_ctx,
TDB_DATA key, TDB_DATA *data);
int ctdb_record_store(struct ctdb_record_handle *h, TDB_DATA data);
int ctdb_fetch(struct ctdb_db_context *ctdb_db, TALLOC_CTX *mem_ctx,
TDB_DATA key, TDB_DATA *data);
int ctdb_register_message_handler(struct ctdb_context *ctdb,
TALLOC_CTX *mem_ctx,
uint64_t srvid,
ctdb_message_fn_t handler,
void *private_data);
struct ctdb_db_context *find_ctdb_db(struct ctdb_context *ctdb, uint32_t id);
struct ctdb_context *ctdb_cmdline_client(struct event_context *ev);
struct ctdb_statistics;
int ctdb_ctrl_statistics(struct ctdb_context *ctdb, uint32_t destnode, struct ctdb_statistics *status);
int ctdb_ctrl_shutdown(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode);
struct ctdb_vnn_map;
int ctdb_ctrl_getvnnmap(struct ctdb_context *ctdb,
struct timeval timeout, uint32_t destnode,
TALLOC_CTX *mem_ctx, struct ctdb_vnn_map **vnnmap);
int ctdb_ctrl_setvnnmap(struct ctdb_context *ctdb,
struct timeval timeout, uint32_t destnode,
TALLOC_CTX *mem_ctx, struct ctdb_vnn_map *vnnmap);
/* table that contains a list of all dbids on a node
*/
struct ctdb_dbid_map {
uint32_t num;
uint32_t dbids[1];
};
int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb,
struct timeval timeout, uint32_t destnode,
TALLOC_CTX *mem_ctx, struct ctdb_dbid_map **dbmap);
struct ctdb_node_map;
int ctdb_ctrl_getnodemap(struct ctdb_context *ctdb,
struct timeval timeout, uint32_t destnode,
TALLOC_CTX *mem_ctx, struct ctdb_node_map **nodemap);
struct ctdb_key_list {
uint32_t dbid;
uint32_t num;
TDB_DATA *keys;
struct ctdb_ltdb_header *headers;
TDB_DATA *data;
};
int ctdb_ctrl_pulldb(struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid, uint32_t lmaster, TALLOC_CTX *mem_ctx, struct ctdb_key_list *keys);
int ctdb_ctrl_copydb(struct ctdb_context *ctdb,
struct timeval timeout, uint32_t sourcenode,
uint32_t destnode, uint32_t dbid, uint32_t lmaster,
TALLOC_CTX *mem_ctx);
int ctdb_ctrl_getdbpath(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx, const char **path);
int ctdb_ctrl_getdbname(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx, const char **name);
int ctdb_ctrl_createdb(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, const char *name);
int ctdb_ctrl_process_exists(struct ctdb_context *ctdb, uint32_t destnode, pid_t pid);
int ctdb_ctrl_ping(struct ctdb_context *ctdb, uint32_t destnode);
int ctdb_ctrl_get_config(struct ctdb_context *ctdb);
int ctdb_ctrl_get_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, uint32_t *level);
int ctdb_ctrl_set_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, uint32_t level);
/*
change dmaster for all keys in the database to the new value
*/
int ctdb_ctrl_setdmaster(struct ctdb_context *ctdb,
struct timeval timeout, uint32_t destnode,
TALLOC_CTX *mem_ctx, uint32_t dbid, uint32_t dmaster);
/*
write a record on a specific db (this implicitely updates dmaster of the record to locally be the vnn of the node where the control is executed on)
*/
int ctdb_ctrl_write_record(struct ctdb_context *ctdb, uint32_t destnode, TALLOC_CTX *mem_ctx, uint32_t dbid, TDB_DATA key, TDB_DATA data);
#define CTDB_RECOVERY_NORMAL 0
#define CTDB_RECOVERY_ACTIVE 1
/*
get the recovery mode of a remote node
*/
int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *recmode);
/*
set the recovery mode of a remote node
*/
int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmode);
/*
get the monitoring mode of a remote node
*/
int ctdb_ctrl_getmonmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *monmode);
/*
set the monitoringmode of a remote node
*/
int ctdb_ctrl_setmonmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t monmode);
/*
get the recovery master of a remote node
*/
int ctdb_ctrl_getrecmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *recmaster);
/*
set the recovery master of a remote node
*/
int ctdb_ctrl_setrecmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmaster);
uint32_t *ctdb_get_connected_nodes(struct ctdb_context *ctdb,
struct timeval timeout,
TALLOC_CTX *mem_ctx,
uint32_t *num_nodes);
int ctdb_statistics_reset(struct ctdb_context *ctdb, uint32_t destnode);
int ctdb_set_logfile(struct ctdb_context *ctdb, const char *logfile);
typedef int (*ctdb_traverse_func)(struct ctdb_context *, TDB_DATA, TDB_DATA, void *);
int ctdb_traverse(struct ctdb_db_context *ctdb_db, ctdb_traverse_func fn, void *private_data);
int ctdb_dump_db(struct ctdb_db_context *ctdb_db, FILE *f);
/*
get the pid of a ctdb daemon
*/
int ctdb_ctrl_getpid(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *pid);
int ctdb_ctrl_freeze(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode);
int ctdb_ctrl_thaw(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode);
int ctdb_ctrl_getvnn(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode);
int ctdb_ctrl_get_tunable(struct ctdb_context *ctdb,
struct timeval timeout,
uint32_t destnode,
const char *name, uint32_t *value);
int ctdb_ctrl_set_tunable(struct ctdb_context *ctdb,
struct timeval timeout,
uint32_t destnode,
const char *name, uint32_t value);
int ctdb_ctrl_list_tunables(struct ctdb_context *ctdb,
struct timeval timeout,
uint32_t destnode,
TALLOC_CTX *mem_ctx,
const char ***list, uint32_t *count);
int ctdb_ctrl_modflags(struct ctdb_context *ctdb,
struct timeval timeout,
uint32_t destnode,
uint32_t set, uint32_t clear);
int ctdb_socket_connect(struct ctdb_context *ctdb);
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +0,0 @@
struct idr_context *idr_init(TALLOC_CTX *mem_ctx);
int idr_get_new(struct idr_context *idp, void *ptr, int limit);
int idr_get_new_above(struct idr_context *idp, void *ptr, int starting_id, int limit);
int idr_get_new_random(struct idr_context *idp, void *ptr, int limit);
void *idr_find(struct idr_context *idp, int id);
int idr_remove(struct idr_context *idp, int id);

View File

@ -1,40 +0,0 @@
#define HAVE_UNIXSOCKET 1
#include "replace.h"
#include "talloc.h"
#include "tdb.h"
#include "idtree.h"
#include "ctdb.h"
#include "lib/util/debug.h"
typedef bool BOOL;
#define True 1
#define False 0
extern int LogLevel;
#define DEBUG(lvl, x) if ((lvl) <= LogLevel) (do_debug x)
#define _PUBLIC_
#define ZERO_STRUCT(x) memset((char *)&(x), 0, sizeof(x))
#ifndef discard_const
#define discard_const(ptr) ((void *)((uintptr_t)(ptr)))
#endif
struct timeval timeval_zero(void);
bool timeval_is_zero(const struct timeval *tv);
struct timeval timeval_current(void);
struct timeval timeval_set(uint32_t secs, uint32_t usecs);
int timeval_compare(const struct timeval *tv1, const struct timeval *tv2);
struct timeval timeval_until(const struct timeval *tv1,
const struct timeval *tv2);
_PUBLIC_ struct timeval timeval_current_ofs(uint32_t secs, uint32_t usecs);
double timeval_elapsed(struct timeval *tv);
char **file_lines_load(const char *fname, int *numlines, TALLOC_CTX *mem_ctx);
char *hex_encode(TALLOC_CTX *mem_ctx, const unsigned char *buff_in, size_t len);
_PUBLIC_ const char **str_list_add(const char **list, const char *s);
_PUBLIC_ int set_blocking(int fd, bool set);

View File

@ -1,238 +0,0 @@
#! /bin/sh
#
# install - install a program, script, or datafile
# This comes from X11R5.
#
# Calling this script install-sh is preferred over install.sh, to prevent
# `make' implicit rules from creating a file called install from it
# when there is no Makefile.
#
# This script is compatible with the BSD install script, but was written
# from scratch.
#
# set DOITPROG to echo to test this script
# Don't use :- since 4.3BSD and earlier shells don't like it.
doit="${DOITPROG-}"
# put in absolute paths if you don't have them in your path; or use env. vars.
mvprog="${MVPROG-mv}"
cpprog="${CPPROG-cp}"
chmodprog="${CHMODPROG-chmod}"
chownprog="${CHOWNPROG-chown}"
chgrpprog="${CHGRPPROG-chgrp}"
stripprog="${STRIPPROG-strip}"
rmprog="${RMPROG-rm}"
mkdirprog="${MKDIRPROG-mkdir}"
transformbasename=""
transform_arg=""
instcmd="$mvprog"
chmodcmd="$chmodprog 0755"
chowncmd=""
chgrpcmd=""
stripcmd=""
rmcmd="$rmprog -f"
mvcmd="$mvprog"
src=""
dst=""
dir_arg=""
while [ x"$1" != x ]; do
case $1 in
-c) instcmd="$cpprog"
shift
continue;;
-d) dir_arg=true
shift
continue;;
-m) chmodcmd="$chmodprog $2"
shift
shift
continue;;
-o) chowncmd="$chownprog $2"
shift
shift
continue;;
-g) chgrpcmd="$chgrpprog $2"
shift
shift
continue;;
-s) stripcmd="$stripprog"
shift
continue;;
-t=*) transformarg=`echo $1 | sed 's/-t=//'`
shift
continue;;
-b=*) transformbasename=`echo $1 | sed 's/-b=//'`
shift
continue;;
*) if [ x"$src" = x ]
then
src=$1
else
# this colon is to work around a 386BSD /bin/sh bug
:
dst=$1
fi
shift
continue;;
esac
done
if [ x"$src" = x ]
then
echo "install: no input file specified"
exit 1
else
true
fi
if [ x"$dir_arg" != x ]; then
dst=$src
src=""
if [ -d $dst ]; then
instcmd=:
else
instcmd=mkdir
fi
else
# Waiting for this to be detected by the "$instcmd $src $dsttmp" command
# might cause directories to be created, which would be especially bad
# if $src (and thus $dsttmp) contains '*'.
if [ -f $src -o -d $src ]
then
true
else
echo "install: $src does not exist"
exit 1
fi
if [ x"$dst" = x ]
then
echo "install: no destination specified"
exit 1
else
true
fi
# If destination is a directory, append the input filename; if your system
# does not like double slashes in filenames, you may need to add some logic
if [ -d $dst ]
then
dst="$dst"/`basename $src`
else
true
fi
fi
## this sed command emulates the dirname command
dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
# Make sure that the destination directory exists.
# this part is taken from Noah Friedman's mkinstalldirs script
# Skip lots of stat calls in the usual case.
if [ ! -d "$dstdir" ]; then
defaultIFS='
'
IFS="${IFS-${defaultIFS}}"
oIFS="${IFS}"
# Some sh's can't handle IFS=/ for some reason.
IFS='%'
set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'`
IFS="${oIFS}"
pathcomp=''
while [ $# -ne 0 ] ; do
pathcomp="${pathcomp}${1}"
shift
if [ ! -d "${pathcomp}" ] ;
then
$mkdirprog "${pathcomp}"
else
true
fi
pathcomp="${pathcomp}/"
done
fi
if [ x"$dir_arg" != x ]
then
$doit $instcmd $dst &&
if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi &&
if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi &&
if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi &&
if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi
else
# If we're going to rename the final executable, determine the name now.
if [ x"$transformarg" = x ]
then
dstfile=`basename $dst`
else
dstfile=`basename $dst $transformbasename |
sed $transformarg`$transformbasename
fi
# don't allow the sed command to completely eliminate the filename
if [ x"$dstfile" = x ]
then
dstfile=`basename $dst`
else
true
fi
# Make a temp file name in the proper directory.
dsttmp=$dstdir/#inst.$$#
# Move or copy the file name to the temp name
$doit $instcmd $src $dsttmp &&
trap "rm -f ${dsttmp}" 0 &&
# and set any options; do chmod last to preserve setuid bits
# If any of these fail, we abort the whole thing. If we want to
# ignore errors from any of these, just make sure not to ignore
# errors from the above "$doit $instcmd $src $dsttmp" command.
if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi &&
if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi &&
if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi &&
if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi &&
# Now rename the file to the real destination.
$doit $rmcmd -f $dstdir/$dstfile &&
$doit $mvcmd $dsttmp $dstdir/$dstfile
fi &&
exit 0

View File

@ -1,597 +0,0 @@
/*
Unix SMB/CIFS implementation.
Copyright (C) Ronnie Sahlberg 2007
Copyright (C) Andrew Tridgell 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
this is the open files database, ctdb backend. It implements shared
storage of what files are open between server instances, and
implements the rules of shared access to files.
The caller needs to provide a file_key, which specifies what file
they are talking about. This needs to be a unique key across all
filesystems, and is usually implemented in terms of a device/inode
pair.
Before any operations can be performed the caller needs to establish
a lock on the record associated with file_key. That is done by
calling odb_lock(). The caller releases this lock by calling
talloc_free() on the returned handle.
All other operations on a record are done by passing the odb_lock()
handle back to this module. The handle contains internal
information about what file_key is being operated on.
*/
#include "includes.h"
#include "system/filesys.h"
#include "../tdb/include/tdb.h"
#include "messaging/messaging.h"
#include "tdb_wrap.h"
#include "lib/messaging/irpc.h"
#include "librpc/gen_ndr/ndr_opendb.h"
#include "ntvfs/ntvfs.h"
#include "ntvfs/common/ntvfs_common.h"
#include "cluster/cluster.h"
#include "include/ctdb.h"
#include "param/param.h"
struct odb_context {
struct ctdb_context *ctdb;
struct ctdb_db_context *ctdb_db;
struct ntvfs_context *ntvfs_ctx;
bool oplocks;
};
/*
an odb lock handle. You must obtain one of these using odb_lock() before doing
any other operations.
*/
struct odb_lock {
struct odb_context *odb;
struct ctdb_record_handle *rec;
TDB_DATA key;
TDB_DATA data;
};
/*
Open up the openfiles.tdb database. Close it down using
talloc_free(). We need the messaging_ctx to allow for pending open
notifications.
*/
static struct odb_context *odb_ctdb_init(TALLOC_CTX *mem_ctx,
struct ntvfs_context *ntvfs_ctx)
{
struct odb_context *odb;
struct ctdb_context *ctdb = talloc_get_type(cluster_backend_handle(),
struct ctdb_context);
odb = talloc(mem_ctx, struct odb_context);
if (odb == NULL) {
return NULL;
}
odb->ctdb = ctdb;
odb->ctdb_db = ctdb_attach(ctdb, "opendb");
if (!odb->ctdb_db) {
DEBUG(0,("Failed to get attached ctdb db handle for opendb\n"));
talloc_free(odb);
return NULL;
}
odb->ntvfs_ctx = ntvfs_ctx;
/* leave oplocks disabled by default until the code is working */
odb->oplocks = share_bool_option(ntvfs_ctx->config, SHARE_OPLOCKS, SHARE_OPLOCKS_DEFAULT);
return odb;
}
/*
get a lock on a entry in the odb. This call returns a lock handle,
which the caller should unlock using talloc_free().
*/
static struct odb_lock *odb_ctdb_lock(TALLOC_CTX *mem_ctx,
struct odb_context *odb, DATA_BLOB *file_key)
{
struct odb_lock *lck;
lck = talloc(mem_ctx, struct odb_lock);
if (lck == NULL) {
return NULL;
}
lck->odb = talloc_reference(lck, odb);
lck->key.dptr = talloc_memdup(lck, file_key->data, file_key->length);
lck->key.dsize = file_key->length;
if (lck->key.dptr == NULL) {
talloc_free(lck);
return NULL;
}
lck->rec = ctdb_fetch_lock(odb->ctdb_db, (TALLOC_CTX *)lck, lck->key, &lck->data);
if (!lck->rec) {
talloc_free(lck);
return NULL;
}
return lck;
}
static DATA_BLOB odb_ctdb_get_key(TALLOC_CTX *mem_ctx, struct odb_lock *lck)
{
/*
* as this file will went away and isn't used yet,
* copy the implementation from the tdb backend
* --metze
*/
return data_blob_const(NULL, 0);
}
/*
determine if two odb_entry structures conflict
return NT_STATUS_OK on no conflict
*/
static NTSTATUS share_conflict(struct opendb_entry *e1, struct opendb_entry *e2)
{
/* if either open involves no read.write or delete access then
it can't conflict */
if (!(e1->access_mask & (SEC_FILE_WRITE_DATA |
SEC_FILE_APPEND_DATA |
SEC_FILE_READ_DATA |
SEC_FILE_EXECUTE |
SEC_STD_DELETE))) {
return NT_STATUS_OK;
}
if (!(e2->access_mask & (SEC_FILE_WRITE_DATA |
SEC_FILE_APPEND_DATA |
SEC_FILE_READ_DATA |
SEC_FILE_EXECUTE |
SEC_STD_DELETE))) {
return NT_STATUS_OK;
}
/* data IO access masks. This is skipped if the two open handles
are on different streams (as in that case the masks don't
interact) */
if (e1->stream_id != e2->stream_id) {
return NT_STATUS_OK;
}
#define CHECK_MASK(am, right, sa, share) \
if (((am) & (right)) && !((sa) & (share))) return NT_STATUS_SHARING_VIOLATION
CHECK_MASK(e1->access_mask, SEC_FILE_WRITE_DATA | SEC_FILE_APPEND_DATA,
e2->share_access, NTCREATEX_SHARE_ACCESS_WRITE);
CHECK_MASK(e2->access_mask, SEC_FILE_WRITE_DATA | SEC_FILE_APPEND_DATA,
e1->share_access, NTCREATEX_SHARE_ACCESS_WRITE);
CHECK_MASK(e1->access_mask, SEC_FILE_READ_DATA | SEC_FILE_EXECUTE,
e2->share_access, NTCREATEX_SHARE_ACCESS_READ);
CHECK_MASK(e2->access_mask, SEC_FILE_READ_DATA | SEC_FILE_EXECUTE,
e1->share_access, NTCREATEX_SHARE_ACCESS_READ);
CHECK_MASK(e1->access_mask, SEC_STD_DELETE,
e2->share_access, NTCREATEX_SHARE_ACCESS_DELETE);
CHECK_MASK(e2->access_mask, SEC_STD_DELETE,
e1->share_access, NTCREATEX_SHARE_ACCESS_DELETE);
return NT_STATUS_OK;
}
/*
pull a record, translating from the db format to the opendb_file structure defined
in opendb.idl
*/
static NTSTATUS odb_pull_record(struct odb_lock *lck, struct opendb_file *file)
{
TDB_DATA dbuf;
DATA_BLOB blob;
enum ndr_err_code ndr_err;
dbuf = lck->data;
if (dbuf.dsize == 0) {
/* empty record in ctdb means the record isn't there */
return NT_STATUS_OBJECT_NAME_NOT_FOUND;
}
blob.data = dbuf.dptr;
blob.length = dbuf.dsize;
ndr_err = ndr_pull_struct_blob(&blob, lck, lp_iconv_convenience(lck->odb->ntvfs_ctx->lp_ctx), file, (ndr_pull_flags_fn_t)ndr_pull_opendb_file);
if (!NDR_ERR_CODE_IS_SUCCESS(ndr_err)) {
return ndr_map_error2ntstatus(ndr_err);
}
return NT_STATUS_OK;
}
/*
push a record, translating from the opendb_file structure defined in opendb.idl
*/
static NTSTATUS odb_push_record(struct odb_lock *lck, struct opendb_file *file)
{
TDB_DATA dbuf;
DATA_BLOB blob;
enum ndr_err_code ndr_err;
int ret;
if (!file->num_entries) {
dbuf.dptr = NULL;
dbuf.dsize = 0;
ctdb_record_store(lck->rec, dbuf);
talloc_free(lck->rec);
return NT_STATUS_OK;
}
ndr_err = ndr_push_struct_blob(&blob, lck,
lp_iconv_convenience(lck->odb->ntvfs_ctx->lp_ctx),
file, (ndr_push_flags_fn_t)ndr_push_opendb_file);
if (!NDR_ERR_CODE_IS_SUCCESS(ndr_err)) {
return ndr_map_error2ntstatus(ndr_err);
}
dbuf.dptr = blob.data;
dbuf.dsize = blob.length;
ret = ctdb_record_store(lck->rec, dbuf);
talloc_free(lck->rec);
data_blob_free(&blob);
if (ret != 0) {
return NT_STATUS_INTERNAL_DB_CORRUPTION;
}
return NT_STATUS_OK;
}
#if 0
/*
send an oplock break to a client
*/
static NTSTATUS odb_oplock_break_send(struct odb_context *odb, struct opendb_entry *e)
{
/* tell the server handling this open file about the need to send the client
a break */
return messaging_send_ptr(odb->ntvfs_ctx->msg_ctx, e->server,
MSG_NTVFS_OPLOCK_BREAK, e->file_handle);
}
#endif
/*
register an open file in the open files database. This implements the share_access
rules
Note that the path is only used by the delete on close logic, not
for comparing with other filenames
*/
static NTSTATUS odb_ctdb_open_file(struct odb_lock *lck,
void *file_handle, const char *path,
int *fd, NTTIME open_write_time,
bool allow_level_II_oplock,
uint32_t oplock_level, uint32_t *oplock_granted)
{
/*
* as this file will went away and isn't used yet,
* copy the implementation from the tdb backend
* --metze
*/
return NT_STATUS_FOOBAR;
}
/*
register a pending open file in the open files database
*/
static NTSTATUS odb_ctdb_open_file_pending(struct odb_lock *lck, void *private)
{
struct odb_context *odb = lck->odb;
struct opendb_file file;
NTSTATUS status;
status = odb_pull_record(lck, &file);
NT_STATUS_NOT_OK_RETURN(status);
file.pending = talloc_realloc(lck, file.pending, struct opendb_pending,
file.num_pending+1);
NT_STATUS_HAVE_NO_MEMORY(file.pending);
file.pending[file.num_pending].server = odb->ntvfs_ctx->server_id;
file.pending[file.num_pending].notify_ptr = private;
file.num_pending++;
return odb_push_record(lck, &file);
}
/*
remove a opendb entry
*/
static NTSTATUS odb_ctdb_close_file(struct odb_lock *lck, void *file_handle,
const char **_delete_path)
{
struct odb_context *odb = lck->odb;
struct opendb_file file;
const char *delete_path = NULL;
int i;
NTSTATUS status;
status = odb_pull_record(lck, &file);
NT_STATUS_NOT_OK_RETURN(status);
/* find the entry, and delete it */
for (i=0;i<file.num_entries;i++) {
if (file_handle == file.entries[i].file_handle &&
cluster_id_equal(&odb->ntvfs_ctx->server_id, &file.entries[i].server)) {
if (file.entries[i].delete_on_close) {
file.delete_on_close = true;
}
if (i < file.num_entries-1) {
memmove(file.entries+i, file.entries+i+1,
(file.num_entries - (i+1)) *
sizeof(struct opendb_entry));
}
break;
}
}
if (i == file.num_entries) {
return NT_STATUS_UNSUCCESSFUL;
}
/* send any pending notifications, removing them once sent */
for (i=0;i<file.num_pending;i++) {
messaging_send_ptr(odb->ntvfs_ctx->msg_ctx, file.pending[i].server,
MSG_PVFS_RETRY_OPEN,
file.pending[i].notify_ptr);
}
file.num_pending = 0;
file.num_entries--;
if (file.num_entries == 0 && file.delete_on_close) {
delete_path = talloc_strdup(lck, file.path);
NT_STATUS_HAVE_NO_MEMORY(delete_path);
}
if (_delete_path) {
*_delete_path = delete_path;
}
return odb_push_record(lck, &file);
}
/*
update the oplock level of the client
*/
static NTSTATUS odb_ctdb_update_oplock(struct odb_lock *lck, void *file_handle,
uint32_t oplock_level)
{
/*
* as this file will went away and isn't used yet,
* copy the implementation from the tdb backend
* --metze
*/
return NT_STATUS_FOOBAR;
}
static NTSTATUS odb_ctdb_break_oplocks(struct odb_lock *lck)
{
/*
* as this file will went away and isn't used yet,
* copy the implementation from the tdb backend
* --metze
*/
return NT_STATUS_FOOBAR;
}
/*
remove a pending opendb entry
*/
static NTSTATUS odb_ctdb_remove_pending(struct odb_lock *lck, void *private)
{
struct odb_context *odb = lck->odb;
int i;
NTSTATUS status;
struct opendb_file file;
status = odb_pull_record(lck, &file);
NT_STATUS_NOT_OK_RETURN(status);
/* find the entry, and delete it */
for (i=0;i<file.num_pending;i++) {
if (private == file.pending[i].notify_ptr &&
cluster_id_equal(&odb->ntvfs_ctx->server_id, &file.pending[i].server)) {
if (i < file.num_pending-1) {
memmove(file.pending+i, file.pending+i+1,
(file.num_pending - (i+1)) *
sizeof(struct opendb_pending));
}
break;
}
}
if (i == file.num_pending) {
return NT_STATUS_UNSUCCESSFUL;
}
file.num_pending--;
return odb_push_record(lck, &file);
}
/*
rename the path in a open file
*/
static NTSTATUS odb_ctdb_rename(struct odb_lock *lck, const char *path)
{
struct opendb_file file;
NTSTATUS status;
status = odb_pull_record(lck, &file);
if (NT_STATUS_EQUAL(NT_STATUS_OBJECT_NAME_NOT_FOUND, status)) {
/* not having the record at all is OK */
return NT_STATUS_OK;
}
NT_STATUS_NOT_OK_RETURN(status);
file.path = path;
return odb_push_record(lck, &file);
}
/*
get the path of an open file
*/
static NTSTATUS odb_ctdb_get_path(struct odb_lock *lck, const char **path)
{
struct opendb_file file;
NTSTATUS status;
*path = NULL;
status = odb_pull_record(lck, &file);
/* we don't ignore NT_STATUS_OBJECT_NAME_NOT_FOUND here */
NT_STATUS_NOT_OK_RETURN(status);
*path = file.path;
return NT_STATUS_OK;
}
/*
update delete on close flag on an open file
*/
static NTSTATUS odb_ctdb_set_delete_on_close(struct odb_lock *lck, bool del_on_close)
{
NTSTATUS status;
struct opendb_file file;
status = odb_pull_record(lck, &file);
NT_STATUS_NOT_OK_RETURN(status);
file.delete_on_close = del_on_close;
return odb_push_record(lck, &file);
}
static NTSTATUS odb_ctdb_set_write_time(struct odb_lock *lck,
NTTIME write_time, bool force)
{
/*
* as this file will went away and isn't used yet,
* copy the implementation from the tdb backend
* --metze
*/
return NT_STATUS_FOOBAR;
}
/*
return the current value of the delete_on_close bit, and how many
people still have the file open
*/
static NTSTATUS odb_ctdb_get_file_infos(struct odb_context *odb, DATA_BLOB *key,
bool *del_on_close, NTTIME *write_time)
{
/*
* as this file will went away and isn't used yet,
* copy the implementation from the tdb backend
* --metze
*/
return NT_STATUS_FOOBAR;
}
/*
determine if a file can be opened with the given share_access,
create_options and access_mask
*/
static NTSTATUS odb_ctdb_can_open(struct odb_lock *lck,
uint32_t stream_id, uint32_t share_access,
uint32_t access_mask, bool delete_on_close,
uint32_t open_disposition, bool break_to_none)
{
struct odb_context *odb = lck->odb;
NTSTATUS status;
struct opendb_file file;
struct opendb_entry e;
int i;
status = odb_pull_record(lck, &file);
if (NT_STATUS_EQUAL(status, NT_STATUS_OBJECT_NAME_NOT_FOUND)) {
return NT_STATUS_OK;
}
NT_STATUS_NOT_OK_RETURN(status);
if (delete_on_close &&
file.num_entries != 0) {
return NT_STATUS_SHARING_VIOLATION;
}
if (file.delete_on_close) {
return NT_STATUS_DELETE_PENDING;
}
e.server = odb->ntvfs_ctx->server_id;
e.file_handle = NULL;
e.stream_id = 0;
e.share_access = share_access;
e.access_mask = access_mask;
for (i=0;i<file.num_entries;i++) {
status = share_conflict(&file.entries[i], &e);
if (!NT_STATUS_IS_OK(status)) {
/* note that we discard the error code
here. We do this as unless we are actually
doing an open (which comes via a different
function), we need to return a sharing
violation */
return NT_STATUS_SHARING_VIOLATION;
}
}
return NT_STATUS_OK;
}
static const struct opendb_ops opendb_ctdb_ops = {
.odb_init = odb_ctdb_init,
.odb_lock = odb_ctdb_lock,
.odb_get_key = odb_ctdb_get_key,
.odb_open_file = odb_ctdb_open_file,
.odb_open_file_pending = odb_ctdb_open_file_pending,
.odb_close_file = odb_ctdb_close_file,
.odb_remove_pending = odb_ctdb_remove_pending,
.odb_rename = odb_ctdb_rename,
.odb_get_path = odb_ctdb_get_path,
.odb_set_delete_on_close = odb_ctdb_set_delete_on_close,
.odb_set_write_time = odb_ctdb_set_write_time,
.odb_get_file_infos = odb_ctdb_get_file_infos,
.odb_can_open = odb_ctdb_can_open,
.odb_update_oplock = odb_ctdb_update_oplock,
.odb_break_oplocks = odb_ctdb_break_oplocks
};
void odb_ctdb_init_ops(void)
{
odb_set_ops(&opendb_ctdb_ops);
}

View File

@ -1,117 +0,0 @@
%define initdir %{_sysconfdir}/rc.d/init.d
Summary: Clustered TDB
Vendor: Samba Team
Packager: Samba Team <samba@samba.org>
Name: ctdb
Version: 1.0
Release: 1
Epoch: 0
License: GNU GPL version 2
Group: System Environment/Daemons
URL: bzr://www.samba.org/~tridge/ctdb/
Source: ctdb-%{version}.tar.bz2
Source999: ctdb-setup.tar.bz2
Prereq: /sbin/chkconfig /bin/mktemp /usr/bin/killall
Prereq: fileutils sed /etc/init.d
Requires: initscripts >= 5.54-1
Provides: ctdb = %{version}
Prefix: /usr
BuildRoot: %{_tmppath}/%{name}-%{version}-root
%description
ctdb is the clustered database used by samba
#######################################################################
%prep
%setup -q
# setup the init script and sysconfig file
%setup -T -D -a 999 -n ctdb-%{version} -q
%build
CC="gcc"
## always run autogen.sh
./autogen.sh
CFLAGS="$RPM_OPT_FLAGS $EXTRA -D_GNU_SOURCE" ./configure \
--prefix=%{_prefix} \
--sysconfdir=%{_sysconfdir} \
--localstatedir="/var"
make showflags
make
%install
# Clean up in case there is trash left from a previous build
rm -rf $RPM_BUILD_ROOT
# Create the target build directory hierarchy
mkdir -p $RPM_BUILD_ROOT%{_includedir}
mkdir -p $RPM_BUILD_ROOT{%{_libdir},%{_includedir}}
mkdir -p $RPM_BUILD_ROOT%{_prefix}/{bin,sbin}
mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/ctdb
mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/sysconfig
mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/rc.d/init.d
make DESTDIR=$RPM_BUILD_ROOT install
install -m644 setup/ctdb.sysconfig $RPM_BUILD_ROOT%{_sysconfdir}/sysconfig/ctdb
install -m755 setup/ctdb.init $RPM_BUILD_ROOT%{initdir}/ctdb
install -m755 tools/events $RPM_BUILD_ROOT%{_sysconfdir}/ctdb/events
install -m755 tools/onnode.ssh $RPM_BUILD_ROOT%{_bindir}
install -m755 tools/onnode.rsh $RPM_BUILD_ROOT%{_bindir}
ln -sf %{_bindir}/onnode.ssh $RPM_BUILD_ROOT%{_bindir}/onnode
# unfortunately samba3 needs ctdb_private.h too
install -m644 include/ctdb_private.h $RPM_BUILD_ROOT%{_includedir}/ctdb_private.h
# Remove "*.old" files
find $RPM_BUILD_ROOT -name "*.old" -exec rm -f {} \;
%clean
rm -rf $RPM_BUILD_ROOT
%post
/sbin/chkconfig --add ctdb
%preun
if [ $1 = 0 ] ; then
/sbin/chkconfig --del ctdb
/sbin/service ctdb stop >/dev/null 2>&1
fi
exit 0
%postun
if [ "$1" -ge "1" ]; then
%{initdir}/ctdb restart >/dev/null 2>&1
fi
#######################################################################
## Files section ##
#######################################################################
%files
%defattr(-,root,root)
%config(noreplace) %{_sysconfdir}/sysconfig/ctdb
%attr(755,root,root) %config %{initdir}/ctdb
%{_sysconfdir}/ctdb/events
%{_sbindir}/ctdbd
%{_bindir}/ctdb
%{_bindir}/onnode.ssh
%{_bindir}/onnode.rsh
%{_bindir}/onnode
%{_includedir}/ctdb.h
%{_includedir}/ctdb_private.h

View File

@ -1,90 +0,0 @@
#!/bin/sh
# Copyright (C) John H Terpstra 1998-2002
# Gerald (Jerry) Carter 2003
# Jim McDonough 2007
# Andrew Tridgell 2007
# The following allows environment variables to override the target directories
# the alternative is to have a file in your home directory calles .rpmmacros
# containing the following:
# %_topdir /home/mylogin/redhat
#
# Note: Under this directory rpm expects to find the same directories that are under the
# /usr/src/redhat directory
#
EXTRA_OPTIONS="$1"
RHEL="packaging/RHEL"
[ -d ${RHEL} ] || {
echo "Must run this from the ctdb directory"
exit 1
}
SPECDIR=`rpm --eval %_specdir`
SRCDIR=`rpm --eval %_sourcedir`
# At this point the SPECDIR and SRCDIR vaiables must have a value!
USERID=`id -u`
GRPID=`id -g`
VERSION='1.0'
REVISION=''
SPECFILE="ctdb.spec"
RPMVER=`rpm --version | awk '{print $3}'`
RPMBUILD="rpmbuild"
##
## Check the RPM version (paranoid)
##
case $RPMVER in
4*)
echo "Supported RPM version [$RPMVER]"
;;
*)
echo "Unknown RPM version: `rpm --version`"
exit 1
;;
esac
if [ -f Makefile ]; then
make distclean
fi
pushd .
BASEDIR=`basename $PWD`
cd ..
chown -R ${USERID}.${GRPID} $BASEDIR
if [ ! -d ctdb-${VERSION} ]; then
ln -s $BASEDIR ctdb-${VERSION} || exit 1
REMOVE_LN=$PWD/ctdb-$VERSION
fi
echo -n "Creating ctdb-${VERSION}.tar.bz2 ... "
tar --exclude=.bzr --exclude .bzrignore --exclude packaging --exclude="*~" -cf - ctdb-${VERSION}/. | bzip2 > ${SRCDIR}/ctdb-${VERSION}.tar.bz2
echo "Done."
if [ $? -ne 0 ]; then
echo "Build failed!"
[ ${REMOVE_LN} ] && rm $REMOVE_LN
exit 1
fi
popd
##
## copy additional source files
##
(cd packaging/RHEL && tar --exclude=.bzr --exclude="*~" -jcvf - setup) > ${SRCDIR}/ctdb-setup.tar.bz2
cp -p ${RHEL}/${SPECFILE} ${SPECDIR}
##
## Build
##
echo "$(basename $0): Getting Ready to build release package"
cd ${SPECDIR}
${RPMBUILD} -ba --clean --rmsource $EXTRA_OPTIONS $SPECFILE
echo "$(basename $0): Done."
[ ${REMOVE_LN} ] && rm $REMOVE_LN

View File

@ -1,104 +0,0 @@
#!/bin/sh
#
# chkconfig: - 90 36
# description: Starts and stops the clustered tdb daemon
#
# pidfile: /var/run/ctdbd/ctdbd.pid
# Source function library.
if [ -f /etc/init.d/functions ] ; then
. /etc/init.d/functions
elif [ -f /etc/rc.d/init.d/functions ] ; then
. /etc/rc.d/init.d/functions
else
exit 0
fi
# Avoid using root's TMPDIR
unset TMPDIR
# Source networking configuration.
. /etc/sysconfig/network
CTDB_OPTIONS=""
# pull in admin specified config
if [ -f /etc/sysconfig/ctdb ]; then
. /etc/sysconfig/ctdb
fi
# Check that networking is up.
[ ${NETWORKING} = "no" ] && exit 0
# build up CTDB_OPTIONS variable
[ -z "$LOGFILE" ] || CTDB_OPTIONS="$CTDB_OPTIONS --logfile=$LOGFILE"
[ -z "$NODES" ] || CTDB_OPTIONS="$CTDB_OPTIONS --nlist=$NODES"
[ -z "$CTDB_SOCKET" ] || CTDB_OPTIONS="$CTDB_OPTIONS --socket=$CTDB_SOCKET"
[ -z "$PUBLIC_ADDRESSES" ] || CTDB_OPTIONS="$CTDB_OPTIONS --public-addresses=$PUBLIC_ADDRESSES"
[ -z "$PUBLIC_INTERFACE" ] || CTDB_OPTIONS="$CTDB_OPTIONS --public-interface=$PUBLIC_INTERFACE"
[ -z "$DBDIR" ] || CTDB_OPTIONS="$CTDB_OPTIONS --dbdir=$DBDIR"
[ -z "$EVENT_SCRIPT" ] || CTDB_OPTIONS="$CTDB_OPTIONS --event-script $EVENT_SCRIPT"
[ -z "$TRANSPORT" ] || CTDB_OPTIONS="$CTDB_OPTIONS --transport $TRANSPORT"
[ -z "$DEBUGLEVEL" ] || CTDB_OPTIONS="$CTDB_OPTIONS -d $DEBUGLEVEL"
start() {
echo -n $"Starting ctdbd services: "
daemon ctdbd $CTDB_OPTIONS
RETVAL=$?
echo
[ $RETVAL -eq 0 ] && touch /var/lock/subsys/ctdb || \
RETVAL=1
return $RETVAL
}
stop() {
echo -n $"Shutting down ctdbd services: "
ctdb shutdown
RETVAL=$?
echo
[ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/ctdb
echo ""
return $RETVAL
}
restart() {
stop
start
}
rhstatus() {
ctdb status
if [ $? -ne 0 ] ; then
return 1
fi
}
# Allow status as non-root.
if [ "$1" = status ]; then
rhstatus
exit $?
fi
case "$1" in
start)
start
;;
stop)
stop
;;
restart)
restart
;;
status)
rhstatus
;;
condrestart)
[ -f /var/lock/subsys/ctdb ] && restart || :
;;
*)
echo $"Usage: $0 {start|stop|restart|status|condrestart}"
exit 1
esac
exit $?

View File

@ -1,52 +0,0 @@
# Options to ctdbd. This is read by /etc/init.d/ctdb
# the NODES file must be specified or ctdb won't start
# it should contain a list of IPs that ctdb will use
# it must be exactly the same on all cluster nodes
# defaults to /etc/ctdb/nodes
# NODES=/etc/ctdb/nodes
# the directory to put the local ctdb database files in
# defaults to /var/ctdb
# DBDIR=/var/ctdb
# the script to run when ctdb needs to ask the OS for help,
# such as when a IP address needs to be taken or released
# defaults to /etc/ctdb/events
# EVENT_SCRIPT=/etc/ctdb/events
# the location of the local ctdb socket
# defaults to /tmp/ctdb.socket
# CTDB_SOCKET=/tmp/ctdb.socket
# what transport to use. Only tcp is currently supported
# defaults to tcp
# TRANSPORT="tcp"
# should ctdb do IP takeover? If it should, then specify a file
# containing the list of public IP addresses that ctdb will manage
# Note that these IPs must be different from those in $NODES above
# there is no default
# PUBLIC_ADDRESSES=/etc/ctdb/public_addresses
# when doing IP takeover you also must specify what network interface
# to use for the public addresses
# there is no default
# PUBLIC_INTERFACE=eth0
# where to log messages
# the default is /var/log/log.ctdb
# LOGFILE=/var/log/log.ctdb
# what debug level to run at. Higher means more verbose
# the default is 0
# DEBUGLEVEL=0
# use this to specify any local tcp ports to wait on before starting
# ctdb. Use 445 and 139 for Samba
# the default is not to wait for any local services
# CTDB_WAIT_TCP_PORTS="445 139"
# any other options you might want. Run ctdbd --help for a list
# CTDB_OPTIONS=

View File

@ -1,113 +0,0 @@
%define initdir %{_sysconfdir}/init.d
Summary: Clustered TDB
Vendor: Samba Team
Packager: Samba Team <samba@samba.org>
Name: ctdb
Version: 1.0
Release: 8
Epoch: 0
License: GNU GPL version 3
Group: System Environment/Daemons
URL: http://ctdb.samba.org/
Source: ctdb-%{version}.tar.gz
Prereq: /sbin/chkconfig /bin/mktemp /usr/bin/killall
Prereq: fileutils sed /etc/init.d
Provides: ctdb = %{version}
Prefix: /usr
BuildRoot: %{_tmppath}/%{name}-%{version}-root
%description
ctdb is the clustered database used by samba
#######################################################################
%prep
%setup -q
# setup the init script and sysconfig file
%setup -T -D -n ctdb-%{version} -q
%build
CC="gcc"
## always run autogen.sh
./autogen.sh
CFLAGS="$RPM_OPT_FLAGS $EXTRA -D_GNU_SOURCE" ./configure \
--prefix=%{_prefix} \
--sysconfdir=%{_sysconfdir} \
--mandir=%{_mandir} \
--localstatedir="/var"
make showflags
make
%install
# Clean up in case there is trash left from a previous build
rm -rf $RPM_BUILD_ROOT
# Create the target build directory hierarchy
mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/sysconfig
mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/init.d
make DESTDIR=$RPM_BUILD_ROOT install
install -m644 config/ctdb.sysconfig $RPM_BUILD_ROOT%{_sysconfdir}/sysconfig/ctdb
install -m755 config/ctdb.init $RPM_BUILD_ROOT%{initdir}/ctdb
# Remove "*.old" files
find $RPM_BUILD_ROOT -name "*.old" -exec rm -f {} \;
%clean
rm -rf $RPM_BUILD_ROOT
%post
[ -x /sbin/chkconfig ] && /sbin/chkconfig --add ctdb
%preun
if [ $1 = 0 ] ; then
[ -x /sbin/chkconfig ] && /sbin/chkconfig --del ctdb
fi
exit 0
%postun
if [ "$1" -ge "1" ]; then
%{initdir}/ctdb restart >/dev/null 2>&1
fi
#######################################################################
## Files section ##
#######################################################################
%files
%defattr(-,root,root)
%config(noreplace) %{_sysconfdir}/sysconfig/ctdb
%attr(755,root,root) %config %{initdir}/ctdb
%{_sysconfdir}/ctdb/events
%{_sysconfdir}/ctdb/functions
%{_sysconfdir}/ctdb/events.d/10.interface
%{_sysconfdir}/ctdb/events.d/40.vsftpd
%{_sysconfdir}/ctdb/events.d/50.samba
%{_sysconfdir}/ctdb/events.d/60.nfs
%{_sysconfdir}/ctdb/events.d/61.nfstickle
%{_sysconfdir}/ctdb/statd-callout
%{_sbindir}/ctdbd
%{_bindir}/ctdb
%{_bindir}/smnotify
%{_bindir}/onnode.ssh
%{_bindir}/onnode.rsh
%{_bindir}/onnode
%{_mandir}/man1/ctdb.1.gz
%{_mandir}/man1/ctdbd.1.gz
%{_mandir}/man1/onnode.1.gz
%{_includedir}/ctdb.h
%{_includedir}/ctdb_private.h

View File

@ -1,89 +0,0 @@
#!/bin/sh
# Copyright (C) John H Terpstra 1998-2002
# Gerald (Jerry) Carter 2003
# Jim McDonough 2007
# Andrew Tridgell 2007
# The following allows environment variables to override the target directories
# the alternative is to have a file in your home directory calles .rpmmacros
# containing the following:
# %_topdir /home/mylogin/redhat
#
# Note: Under this directory rpm expects to find the same directories that are under the
# /usr/src/redhat directory
#
EXTRA_OPTIONS="$1"
[ -d packaging ] || {
echo "Must run this from the ctdb directory"
exit 1
}
SPECDIR=`rpm --eval %_specdir`
SRCDIR=`rpm --eval %_sourcedir`
# At this point the SPECDIR and SRCDIR vaiables must have a value!
USERID=`id -u`
GRPID=`id -g`
VERSION='1.0'
REVISION=''
SPECFILE="ctdb.spec"
RPMVER=`rpm --version | awk '{print $3}'`
RPMBUILD="rpmbuild"
##
## Check the RPM version (paranoid)
##
case $RPMVER in
4*)
echo "Supported RPM version [$RPMVER]"
;;
*)
echo "Unknown RPM version: `rpm --version`"
exit 1
;;
esac
if [ -f Makefile ]; then
make distclean
fi
pushd .
BASEDIR=`basename $PWD`
cd ..
chown -R ${USERID}.${GRPID} $BASEDIR
if [ ! -d ctdb-${VERSION} ]; then
ln -s $BASEDIR ctdb-${VERSION} || exit 1
REMOVE_LN=$PWD/ctdb-$VERSION
fi
echo -n "Creating ctdb-${VERSION}.tar.gz ... "
tar --exclude=.bzr --exclude .bzrignore --exclude="*~" -cf - ctdb-${VERSION}/. | gzip -9 --rsyncable > ${SRCDIR}/ctdb-${VERSION}.tar.gz
echo "Done."
if [ $? -ne 0 ]; then
echo "Build failed!"
[ ${REMOVE_LN} ] && rm $REMOVE_LN
exit 1
fi
popd
##
## copy additional source files
##
cp -p packaging/RPM/ctdb.spec ${SPECDIR}
##
## Build
##
echo "$(basename $0): Getting Ready to build release package"
cd ${SPECDIR}
${RPMBUILD} -ba --clean --rmsource $EXTRA_OPTIONS $SPECFILE || exit 1
echo "$(basename $0): Done."
[ ${REMOVE_LN} ] && /bin/rm -f $REMOVE_LN
exit 0

View File

@ -1,737 +0,0 @@
/*
ctdb_call protocol code
Copyright (C) Andrew Tridgell 2006
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
/*
see http://wiki.samba.org/index.php/Samba_%26_Clustering for
protocol design and packet details
*/
#include "includes.h"
#include "lib/events/events.h"
#include "../tdb/include/tdb.h"
#include "lib/util/dlinklist.h"
#include "system/network.h"
#include "system/filesys.h"
#include "../include/ctdb_private.h"
/*
find the ctdb_db from a db index
*/
struct ctdb_db_context *find_ctdb_db(struct ctdb_context *ctdb, uint32_t id)
{
struct ctdb_db_context *ctdb_db;
for (ctdb_db=ctdb->db_list; ctdb_db; ctdb_db=ctdb_db->next) {
if (ctdb_db->db_id == id) {
break;
}
}
return ctdb_db;
}
/*
a varient of input packet that can be used in lock requeue
*/
static void ctdb_call_input_pkt(void *p, struct ctdb_req_header *hdr)
{
struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context);
ctdb_input_pkt(ctdb, hdr);
}
/*
send an error reply
*/
static void ctdb_send_error(struct ctdb_context *ctdb,
struct ctdb_req_header *hdr, uint32_t status,
const char *fmt, ...) PRINTF_ATTRIBUTE(4,5);
static void ctdb_send_error(struct ctdb_context *ctdb,
struct ctdb_req_header *hdr, uint32_t status,
const char *fmt, ...)
{
va_list ap;
struct ctdb_reply_error *r;
char *msg;
int msglen, len;
va_start(ap, fmt);
msg = talloc_vasprintf(ctdb, fmt, ap);
if (msg == NULL) {
ctdb_fatal(ctdb, "Unable to allocate error in ctdb_send_error\n");
}
va_end(ap);
msglen = strlen(msg)+1;
len = offsetof(struct ctdb_reply_error, msg);
r = ctdb_transport_allocate(ctdb, msg, CTDB_REPLY_ERROR, len + msglen,
struct ctdb_reply_error);
CTDB_NO_MEMORY_FATAL(ctdb, r);
r->hdr.destnode = hdr->srcnode;
r->hdr.reqid = hdr->reqid;
r->status = status;
r->msglen = msglen;
memcpy(&r->msg[0], msg, msglen);
ctdb_queue_packet(ctdb, &r->hdr);
talloc_free(msg);
}
/*
send a redirect reply
*/
static void ctdb_call_send_redirect(struct ctdb_context *ctdb,
TDB_DATA key,
struct ctdb_req_call *c,
struct ctdb_ltdb_header *header)
{
uint32_t lmaster = ctdb_lmaster(ctdb, &key);
if (ctdb->vnn == lmaster) {
c->hdr.destnode = header->dmaster;
} else if ((c->hopcount % ctdb->tunable.max_redirect_count) == 0) {
c->hdr.destnode = lmaster;
} else {
c->hdr.destnode = header->dmaster;
}
c->hopcount++;
ctdb_queue_packet(ctdb, &c->hdr);
}
/*
send a dmaster reply
caller must have the chainlock before calling this routine. Caller must be
the lmaster
*/
static void ctdb_send_dmaster_reply(struct ctdb_db_context *ctdb_db,
struct ctdb_ltdb_header *header,
TDB_DATA key, TDB_DATA data,
uint32_t new_dmaster,
uint32_t reqid)
{
struct ctdb_context *ctdb = ctdb_db->ctdb;
struct ctdb_reply_dmaster *r;
int ret, len;
TALLOC_CTX *tmp_ctx;
if (ctdb->vnn != ctdb_lmaster(ctdb, &key)) {
DEBUG(0,(__location__ " Caller is not lmaster!\n"));
return;
}
header->dmaster = new_dmaster;
ret = ctdb_ltdb_store(ctdb_db, key, header, data);
if (ret != 0) {
ctdb_fatal(ctdb, "ctdb_req_dmaster unable to update dmaster");
return;
}
/* put the packet on a temporary context, allowing us to safely free
it below even if ctdb_reply_dmaster() has freed it already */
tmp_ctx = talloc_new(ctdb);
/* send the CTDB_REPLY_DMASTER */
len = offsetof(struct ctdb_reply_dmaster, data) + key.dsize + data.dsize;
r = ctdb_transport_allocate(ctdb, tmp_ctx, CTDB_REPLY_DMASTER, len,
struct ctdb_reply_dmaster);
CTDB_NO_MEMORY_FATAL(ctdb, r);
r->hdr.destnode = new_dmaster;
r->hdr.reqid = reqid;
r->rsn = header->rsn;
r->keylen = key.dsize;
r->datalen = data.dsize;
r->db_id = ctdb_db->db_id;
memcpy(&r->data[0], key.dptr, key.dsize);
memcpy(&r->data[key.dsize], data.dptr, data.dsize);
ctdb_queue_packet(ctdb, &r->hdr);
talloc_free(tmp_ctx);
}
/*
send a dmaster request (give another node the dmaster for a record)
This is always sent to the lmaster, which ensures that the lmaster
always knows who the dmaster is. The lmaster will then send a
CTDB_REPLY_DMASTER to the new dmaster
*/
static void ctdb_call_send_dmaster(struct ctdb_db_context *ctdb_db,
struct ctdb_req_call *c,
struct ctdb_ltdb_header *header,
TDB_DATA *key, TDB_DATA *data)
{
struct ctdb_req_dmaster *r;
struct ctdb_context *ctdb = ctdb_db->ctdb;
int len;
uint32_t lmaster = ctdb_lmaster(ctdb, key);
if (lmaster == ctdb->vnn) {
ctdb_send_dmaster_reply(ctdb_db, header, *key, *data,
c->hdr.srcnode, c->hdr.reqid);
return;
}
len = offsetof(struct ctdb_req_dmaster, data) + key->dsize + data->dsize;
r = ctdb_transport_allocate(ctdb, ctdb, CTDB_REQ_DMASTER, len,
struct ctdb_req_dmaster);
CTDB_NO_MEMORY_FATAL(ctdb, r);
r->hdr.destnode = lmaster;
r->hdr.reqid = c->hdr.reqid;
r->db_id = c->db_id;
r->rsn = header->rsn;
r->dmaster = c->hdr.srcnode;
r->keylen = key->dsize;
r->datalen = data->dsize;
memcpy(&r->data[0], key->dptr, key->dsize);
memcpy(&r->data[key->dsize], data->dptr, data->dsize);
header->dmaster = c->hdr.srcnode;
if (ctdb_ltdb_store(ctdb_db, *key, header, *data) != 0) {
ctdb_fatal(ctdb, "Failed to store record in ctdb_call_send_dmaster");
}
ctdb_queue_packet(ctdb, &r->hdr);
talloc_free(r);
}
/*
called when a CTDB_REPLY_DMASTER packet comes in, or when the lmaster
gets a CTDB_REQUEST_DMASTER for itself. We become the dmaster.
must be called with the chainlock held. This function releases the chainlock
*/
static void ctdb_become_dmaster(struct ctdb_db_context *ctdb_db,
uint32_t reqid, TDB_DATA key, TDB_DATA data,
uint64_t rsn)
{
struct ctdb_call_state *state;
struct ctdb_context *ctdb = ctdb_db->ctdb;
struct ctdb_ltdb_header header;
DEBUG(2,("vnn %u dmaster response %08x\n", ctdb->vnn, ctdb_hash(&key)));
ZERO_STRUCT(header);
header.rsn = rsn + 1;
header.dmaster = ctdb->vnn;
if (ctdb_ltdb_store(ctdb_db, key, &header, data) != 0) {
ctdb_fatal(ctdb, "ctdb_reply_dmaster store failed\n");
ctdb_ltdb_unlock(ctdb_db, key);
return;
}
state = ctdb_reqid_find(ctdb, reqid, struct ctdb_call_state);
if (state == NULL) {
DEBUG(0,("vnn %u Invalid reqid %u in ctdb_become_dmaster\n",
ctdb->vnn, reqid));
ctdb_ltdb_unlock(ctdb_db, key);
return;
}
if (reqid != state->reqid) {
/* we found a record but it was the wrong one */
DEBUG(0, ("Dropped orphan in ctdb_become_dmaster with reqid:%u\n",reqid));
ctdb_ltdb_unlock(ctdb_db, key);
return;
}
ctdb_call_local(ctdb_db, &state->call, &header, state, &data, ctdb->vnn);
ctdb_ltdb_unlock(ctdb_db, state->call.key);
state->state = CTDB_CALL_DONE;
if (state->async.fn) {
state->async.fn(state);
}
}
/*
called when a CTDB_REQ_DMASTER packet comes in
this comes into the lmaster for a record when the current dmaster
wants to give up the dmaster role and give it to someone else
*/
void ctdb_request_dmaster(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
{
struct ctdb_req_dmaster *c = (struct ctdb_req_dmaster *)hdr;
TDB_DATA key, data, data2;
struct ctdb_ltdb_header header;
struct ctdb_db_context *ctdb_db;
int ret;
key.dptr = c->data;
key.dsize = c->keylen;
data.dptr = c->data + c->keylen;
data.dsize = c->datalen;
ctdb_db = find_ctdb_db(ctdb, c->db_id);
if (!ctdb_db) {
ctdb_send_error(ctdb, hdr, -1,
"Unknown database in request. db_id==0x%08x",
c->db_id);
return;
}
/* fetch the current record */
ret = ctdb_ltdb_lock_fetch_requeue(ctdb_db, key, &header, hdr, &data2,
ctdb_call_input_pkt, ctdb, False);
if (ret == -1) {
ctdb_fatal(ctdb, "ctdb_req_dmaster failed to fetch record");
return;
}
if (ret == -2) {
DEBUG(2,(__location__ " deferring ctdb_request_dmaster\n"));
return;
}
if (ctdb_lmaster(ctdb, &key) != ctdb->vnn) {
DEBUG(0,("vnn %u dmaster request to non-lmaster lmaster=%u gen=%u curgen=%u\n",
ctdb->vnn, ctdb_lmaster(ctdb, &key),
hdr->generation, ctdb->vnn_map->generation));
ctdb_fatal(ctdb, "ctdb_req_dmaster to non-lmaster");
}
DEBUG(2,("vnn %u dmaster request on %08x for %u from %u\n",
ctdb->vnn, ctdb_hash(&key), c->dmaster, c->hdr.srcnode));
/* its a protocol error if the sending node is not the current dmaster */
if (header.dmaster != hdr->srcnode) {
DEBUG(0,("vnn %u dmaster request non-master %u dmaster=%u key %08x dbid 0x%08x gen=%u curgen=%u\n",
ctdb->vnn, hdr->srcnode, header.dmaster, ctdb_hash(&key),
ctdb_db->db_id, hdr->generation, ctdb->vnn_map->generation));
ctdb_fatal(ctdb, "ctdb_req_dmaster from non-master");
return;
}
/* use the rsn from the sending node */
header.rsn = c->rsn;
/* check if the new dmaster is the lmaster, in which case we
skip the dmaster reply */
if (c->dmaster == ctdb->vnn) {
ctdb_become_dmaster(ctdb_db, hdr->reqid, key, data, c->rsn);
} else {
ctdb_send_dmaster_reply(ctdb_db, &header, key, data, c->dmaster, hdr->reqid);
ctdb_ltdb_unlock(ctdb_db, key);
}
}
/*
called when a CTDB_REQ_CALL packet comes in
*/
void ctdb_request_call(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
{
struct ctdb_req_call *c = (struct ctdb_req_call *)hdr;
TDB_DATA data;
struct ctdb_reply_call *r;
int ret, len;
struct ctdb_ltdb_header header;
struct ctdb_call call;
struct ctdb_db_context *ctdb_db;
ctdb_db = find_ctdb_db(ctdb, c->db_id);
if (!ctdb_db) {
ctdb_send_error(ctdb, hdr, -1,
"Unknown database in request. db_id==0x%08x",
c->db_id);
return;
}
call.call_id = c->callid;
call.key.dptr = c->data;
call.key.dsize = c->keylen;
call.call_data.dptr = c->data + c->keylen;
call.call_data.dsize = c->calldatalen;
/* determine if we are the dmaster for this key. This also
fetches the record data (if any), thus avoiding a 2nd fetch of the data
if the call will be answered locally */
ret = ctdb_ltdb_lock_fetch_requeue(ctdb_db, call.key, &header, hdr, &data,
ctdb_call_input_pkt, ctdb, False);
if (ret == -1) {
ctdb_send_error(ctdb, hdr, ret, "ltdb fetch failed in ctdb_request_call");
return;
}
if (ret == -2) {
DEBUG(2,(__location__ " deferred ctdb_request_call\n"));
return;
}
/* if we are not the dmaster, then send a redirect to the
requesting node */
if (header.dmaster != ctdb->vnn) {
talloc_free(data.dptr);
ctdb_call_send_redirect(ctdb, call.key, c, &header);
ctdb_ltdb_unlock(ctdb_db, call.key);
return;
}
if (c->hopcount > ctdb->statistics.max_hop_count) {
ctdb->statistics.max_hop_count = c->hopcount;
}
/* if this nodes has done enough consecutive calls on the same record
then give them the record
or if the node requested an immediate migration
*/
if ( c->hdr.srcnode != ctdb->vnn &&
((header.laccessor == c->hdr.srcnode
&& header.lacount >= ctdb->tunable.max_lacount)
|| (c->flags & CTDB_IMMEDIATE_MIGRATION)) ) {
DEBUG(2,("vnn %u starting migration of %08x to %u\n",
ctdb->vnn, ctdb_hash(&call.key), c->hdr.srcnode));
ctdb_call_send_dmaster(ctdb_db, c, &header, &call.key, &data);
talloc_free(data.dptr);
ctdb_ltdb_unlock(ctdb_db, call.key);
return;
}
ctdb_call_local(ctdb_db, &call, &header, hdr, &data, c->hdr.srcnode);
ctdb_ltdb_unlock(ctdb_db, call.key);
len = offsetof(struct ctdb_reply_call, data) + call.reply_data.dsize;
r = ctdb_transport_allocate(ctdb, ctdb, CTDB_REPLY_CALL, len,
struct ctdb_reply_call);
CTDB_NO_MEMORY_FATAL(ctdb, r);
r->hdr.destnode = hdr->srcnode;
r->hdr.reqid = hdr->reqid;
r->status = call.status;
r->datalen = call.reply_data.dsize;
if (call.reply_data.dsize) {
memcpy(&r->data[0], call.reply_data.dptr, call.reply_data.dsize);
}
ctdb_queue_packet(ctdb, &r->hdr);
talloc_free(r);
}
/*
called when a CTDB_REPLY_CALL packet comes in
This packet comes in response to a CTDB_REQ_CALL request packet. It
contains any reply data from the call
*/
void ctdb_reply_call(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
{
struct ctdb_reply_call *c = (struct ctdb_reply_call *)hdr;
struct ctdb_call_state *state;
state = ctdb_reqid_find(ctdb, hdr->reqid, struct ctdb_call_state);
if (state == NULL) {
DEBUG(0, (__location__ " reqid %u not found\n", hdr->reqid));
return;
}
if (hdr->reqid != state->reqid) {
/* we found a record but it was the wrong one */
DEBUG(0, ("Dropped orphaned call reply with reqid:%u\n",hdr->reqid));
return;
}
state->call.reply_data.dptr = c->data;
state->call.reply_data.dsize = c->datalen;
state->call.status = c->status;
talloc_steal(state, c);
state->state = CTDB_CALL_DONE;
if (state->async.fn) {
state->async.fn(state);
}
}
/*
called when a CTDB_REPLY_DMASTER packet comes in
This packet comes in from the lmaster response to a CTDB_REQ_CALL
request packet. It means that the current dmaster wants to give us
the dmaster role
*/
void ctdb_reply_dmaster(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
{
struct ctdb_reply_dmaster *c = (struct ctdb_reply_dmaster *)hdr;
struct ctdb_db_context *ctdb_db;
TDB_DATA key, data;
int ret;
ctdb_db = find_ctdb_db(ctdb, c->db_id);
if (ctdb_db == NULL) {
DEBUG(0,("Unknown db_id 0x%x in ctdb_reply_dmaster\n", c->db_id));
return;
}
key.dptr = c->data;
key.dsize = c->keylen;
data.dptr = &c->data[key.dsize];
data.dsize = c->datalen;
ret = ctdb_ltdb_lock_requeue(ctdb_db, key, hdr,
ctdb_call_input_pkt, ctdb, False);
if (ret == -2) {
return;
}
if (ret != 0) {
DEBUG(0,(__location__ " Failed to get lock in ctdb_reply_dmaster\n"));
return;
}
ctdb_become_dmaster(ctdb_db, hdr->reqid, key, data, c->rsn);
}
/*
called when a CTDB_REPLY_ERROR packet comes in
*/
void ctdb_reply_error(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
{
struct ctdb_reply_error *c = (struct ctdb_reply_error *)hdr;
struct ctdb_call_state *state;
state = ctdb_reqid_find(ctdb, hdr->reqid, struct ctdb_call_state);
if (state == NULL) {
DEBUG(0,("vnn %u Invalid reqid %u in ctdb_reply_error\n",
ctdb->vnn, hdr->reqid));
return;
}
if (hdr->reqid != state->reqid) {
/* we found a record but it was the wrong one */
DEBUG(0, ("Dropped orphaned error reply with reqid:%u\n",hdr->reqid));
return;
}
talloc_steal(state, c);
state->state = CTDB_CALL_ERROR;
state->errmsg = (char *)c->msg;
if (state->async.fn) {
state->async.fn(state);
}
}
/*
destroy a ctdb_call
*/
static int ctdb_call_destructor(struct ctdb_call_state *state)
{
DLIST_REMOVE(state->ctdb_db->ctdb->pending_calls, state);
ctdb_reqid_remove(state->ctdb_db->ctdb, state->reqid);
return 0;
}
/*
called when a ctdb_call needs to be resent after a reconfigure event
*/
static void ctdb_call_resend(struct ctdb_call_state *state)
{
struct ctdb_context *ctdb = state->ctdb_db->ctdb;
state->generation = ctdb->vnn_map->generation;
/* use a new reqid, in case the old reply does eventually come in */
ctdb_reqid_remove(ctdb, state->reqid);
state->reqid = ctdb_reqid_new(ctdb, state);
state->c->hdr.reqid = state->reqid;
/* update the generation count for this request, so its valid with the new vnn_map */
state->c->hdr.generation = state->generation;
/* send the packet to ourselves, it will be redirected appropriately */
state->c->hdr.destnode = ctdb->vnn;
ctdb_queue_packet(ctdb, &state->c->hdr);
DEBUG(0,("resent ctdb_call\n"));
}
/*
resend all pending calls on recovery
*/
void ctdb_call_resend_all(struct ctdb_context *ctdb)
{
struct ctdb_call_state *state, *next;
for (state=ctdb->pending_calls;state;state=next) {
next = state->next;
ctdb_call_resend(state);
}
}
/*
this allows the caller to setup a async.fn
*/
static void call_local_trigger(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data)
{
struct ctdb_call_state *state = talloc_get_type(private_data, struct ctdb_call_state);
if (state->async.fn) {
state->async.fn(state);
}
}
/*
construct an event driven local ctdb_call
this is used so that locally processed ctdb_call requests are processed
in an event driven manner
*/
struct ctdb_call_state *ctdb_call_local_send(struct ctdb_db_context *ctdb_db,
struct ctdb_call *call,
struct ctdb_ltdb_header *header,
TDB_DATA *data)
{
struct ctdb_call_state *state;
struct ctdb_context *ctdb = ctdb_db->ctdb;
int ret;
state = talloc_zero(ctdb_db, struct ctdb_call_state);
CTDB_NO_MEMORY_NULL(ctdb, state);
talloc_steal(state, data->dptr);
state->state = CTDB_CALL_DONE;
state->call = *call;
state->ctdb_db = ctdb_db;
ret = ctdb_call_local(ctdb_db, &state->call, header, state, data, ctdb->vnn);
event_add_timed(ctdb->ev, state, timeval_zero(), call_local_trigger, state);
return state;
}
/*
make a remote ctdb call - async send. Called in daemon context.
This constructs a ctdb_call request and queues it for processing.
This call never blocks.
*/
struct ctdb_call_state *ctdb_daemon_call_send_remote(struct ctdb_db_context *ctdb_db,
struct ctdb_call *call,
struct ctdb_ltdb_header *header)
{
uint32_t len;
struct ctdb_call_state *state;
struct ctdb_context *ctdb = ctdb_db->ctdb;
state = talloc_zero(ctdb_db, struct ctdb_call_state);
CTDB_NO_MEMORY_NULL(ctdb, state);
state->reqid = ctdb_reqid_new(ctdb, state);
state->ctdb_db = ctdb_db;
talloc_set_destructor(state, ctdb_call_destructor);
len = offsetof(struct ctdb_req_call, data) + call->key.dsize + call->call_data.dsize;
state->c = ctdb_transport_allocate(ctdb, state, CTDB_REQ_CALL, len,
struct ctdb_req_call);
CTDB_NO_MEMORY_NULL(ctdb, state->c);
state->c->hdr.destnode = header->dmaster;
/* this limits us to 16k outstanding messages - not unreasonable */
state->c->hdr.reqid = state->reqid;
state->c->flags = call->flags;
state->c->db_id = ctdb_db->db_id;
state->c->callid = call->call_id;
state->c->hopcount = 0;
state->c->keylen = call->key.dsize;
state->c->calldatalen = call->call_data.dsize;
memcpy(&state->c->data[0], call->key.dptr, call->key.dsize);
memcpy(&state->c->data[call->key.dsize],
call->call_data.dptr, call->call_data.dsize);
state->call = *call;
state->call.call_data.dptr = &state->c->data[call->key.dsize];
state->call.key.dptr = &state->c->data[0];
state->state = CTDB_CALL_WAIT;
state->generation = ctdb->vnn_map->generation;
DLIST_ADD(ctdb->pending_calls, state);
ctdb_queue_packet(ctdb, &state->c->hdr);
return state;
}
/*
make a remote ctdb call - async recv - called in daemon context
This is called when the program wants to wait for a ctdb_call to complete and get the
results. This call will block unless the call has already completed.
*/
int ctdb_daemon_call_recv(struct ctdb_call_state *state, struct ctdb_call *call)
{
while (state->state < CTDB_CALL_DONE) {
event_loop_once(state->ctdb_db->ctdb->ev);
}
if (state->state != CTDB_CALL_DONE) {
ctdb_set_error(state->ctdb_db->ctdb, "%s", state->errmsg);
talloc_free(state);
return -1;
}
if (state->call.reply_data.dsize) {
call->reply_data.dptr = talloc_memdup(state->ctdb_db->ctdb,
state->call.reply_data.dptr,
state->call.reply_data.dsize);
call->reply_data.dsize = state->call.reply_data.dsize;
} else {
call->reply_data.dptr = NULL;
call->reply_data.dsize = 0;
}
call->status = state->call.status;
talloc_free(state);
return 0;
}
/*
send a keepalive packet to the other node
*/
void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode)
{
struct ctdb_req_keepalive *r;
r = ctdb_transport_allocate(ctdb, ctdb, CTDB_REQ_KEEPALIVE,
sizeof(struct ctdb_req_keepalive),
struct ctdb_req_keepalive);
CTDB_NO_MEMORY_FATAL(ctdb, r);
r->hdr.destnode = destnode;
r->hdr.reqid = 0;
ctdb->statistics.keepalive_packets_sent++;
ctdb_queue_packet(ctdb, &r->hdr);
talloc_free(r);
}

View File

@ -1,498 +0,0 @@
/*
ctdb_control protocol code
Copyright (C) Andrew Tridgell 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "lib/events/events.h"
#include "../tdb/include/tdb.h"
#include "system/network.h"
#include "system/filesys.h"
#include "system/wait.h"
#include "../include/ctdb_private.h"
#include "lib/util/dlinklist.h"
struct ctdb_control_state {
struct ctdb_context *ctdb;
uint32_t reqid;
ctdb_control_callback_fn_t callback;
void *private_data;
unsigned flags;
};
/*
process a control request
*/
static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
struct ctdb_req_control *c,
TDB_DATA indata,
TDB_DATA *outdata, uint32_t srcnode,
const char **errormsg,
bool *async_reply)
{
uint32_t opcode = c->opcode;
uint64_t srvid = c->srvid;
uint32_t client_id = c->client_id;
switch (opcode) {
case CTDB_CONTROL_PROCESS_EXISTS: {
CHECK_CONTROL_DATA_SIZE(sizeof(pid_t));
return kill(*(pid_t *)indata.dptr, 0);
}
case CTDB_CONTROL_SET_DEBUG: {
CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
LogLevel = *(uint32_t *)indata.dptr;
return 0;
}
case CTDB_CONTROL_GET_DEBUG: {
CHECK_CONTROL_DATA_SIZE(0);
outdata->dptr = (uint8_t *)&LogLevel;
outdata->dsize = sizeof(LogLevel);
return 0;
}
case CTDB_CONTROL_STATISTICS: {
CHECK_CONTROL_DATA_SIZE(0);
ctdb->statistics.memory_used = talloc_total_size(ctdb);
ctdb->statistics.frozen = (ctdb->freeze_mode == CTDB_FREEZE_FROZEN);
ctdb->statistics.recovering = (ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE);
outdata->dptr = (uint8_t *)&ctdb->statistics;
outdata->dsize = sizeof(ctdb->statistics);
return 0;
}
case CTDB_CONTROL_GET_ALL_TUNABLES: {
CHECK_CONTROL_DATA_SIZE(0);
outdata->dptr = (uint8_t *)&ctdb->tunable;
outdata->dsize = sizeof(ctdb->tunable);
return 0;
}
case CTDB_CONTROL_DUMP_MEMORY: {
CHECK_CONTROL_DATA_SIZE(0);
talloc_report_full(ctdb, stdout);
return 0;
}
case CTDB_CONTROL_STATISTICS_RESET: {
CHECK_CONTROL_DATA_SIZE(0);
ZERO_STRUCT(ctdb->statistics);
return 0;
}
case CTDB_CONTROL_GETVNNMAP:
return ctdb_control_getvnnmap(ctdb, opcode, indata, outdata);
case CTDB_CONTROL_GET_DBMAP:
return ctdb_control_getdbmap(ctdb, opcode, indata, outdata);
case CTDB_CONTROL_GET_NODEMAP:
return ctdb_control_getnodemap(ctdb, opcode, indata, outdata);
case CTDB_CONTROL_SETVNNMAP:
return ctdb_control_setvnnmap(ctdb, opcode, indata, outdata);
case CTDB_CONTROL_PULL_DB:
CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_control_pulldb));
return ctdb_control_pull_db(ctdb, indata, outdata);
case CTDB_CONTROL_SET_DMASTER:
CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_control_set_dmaster));
return ctdb_control_set_dmaster(ctdb, indata);
case CTDB_CONTROL_PUSH_DB:
return ctdb_control_push_db(ctdb, indata);
case CTDB_CONTROL_GET_RECMODE: {
return ctdb->recovery_mode;
}
case CTDB_CONTROL_SET_RECMASTER: {
CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
DEBUG(0,("Attempt to set recmaster when not frozen\n"));
return -1;
}
ctdb->recovery_master = ((uint32_t *)(&indata.dptr[0]))[0];
return 0;
}
case CTDB_CONTROL_GET_RECMASTER:
return ctdb->recovery_master;
case CTDB_CONTROL_GET_PID:
return getpid();
case CTDB_CONTROL_GET_VNN:
return ctdb->vnn;
case CTDB_CONTROL_PING:
CHECK_CONTROL_DATA_SIZE(0);
return ctdb->statistics.num_clients;
case CTDB_CONTROL_GET_DBNAME: {
uint32_t db_id;
struct ctdb_db_context *ctdb_db;
CHECK_CONTROL_DATA_SIZE(sizeof(db_id));
db_id = *(uint32_t *)indata.dptr;
ctdb_db = find_ctdb_db(ctdb, db_id);
if (ctdb_db == NULL) return -1;
outdata->dptr = discard_const(ctdb_db->db_name);
outdata->dsize = strlen(ctdb_db->db_name)+1;
return 0;
}
case CTDB_CONTROL_GETDBPATH: {
uint32_t db_id;
struct ctdb_db_context *ctdb_db;
CHECK_CONTROL_DATA_SIZE(sizeof(db_id));
db_id = *(uint32_t *)indata.dptr;
ctdb_db = find_ctdb_db(ctdb, db_id);
if (ctdb_db == NULL) return -1;
outdata->dptr = discard_const(ctdb_db->db_path);
outdata->dsize = strlen(ctdb_db->db_path)+1;
return 0;
}
case CTDB_CONTROL_DB_ATTACH:
return ctdb_control_db_attach(ctdb, indata, outdata);
case CTDB_CONTROL_SET_CALL: {
struct ctdb_control_set_call *sc =
(struct ctdb_control_set_call *)indata.dptr;
CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_control_set_call));
return ctdb_daemon_set_call(ctdb, sc->db_id, sc->fn, sc->id);
}
case CTDB_CONTROL_TRAVERSE_START:
CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_traverse_start));
return ctdb_control_traverse_start(ctdb, indata, outdata, srcnode);
case CTDB_CONTROL_TRAVERSE_ALL:
return ctdb_control_traverse_all(ctdb, indata, outdata);
case CTDB_CONTROL_TRAVERSE_DATA:
return ctdb_control_traverse_data(ctdb, indata, outdata);
case CTDB_CONTROL_REGISTER_SRVID:
return daemon_register_message_handler(ctdb, client_id, srvid);
case CTDB_CONTROL_DEREGISTER_SRVID:
return daemon_deregister_message_handler(ctdb, client_id, srvid);
case CTDB_CONTROL_ENABLE_SEQNUM:
CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
return ctdb_ltdb_enable_seqnum(ctdb, *(uint32_t *)indata.dptr);
case CTDB_CONTROL_UPDATE_SEQNUM:
CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
return ctdb_ltdb_update_seqnum(ctdb, *(uint32_t *)indata.dptr, srcnode);
case CTDB_CONTROL_FREEZE:
CHECK_CONTROL_DATA_SIZE(0);
return ctdb_control_freeze(ctdb, c, async_reply);
case CTDB_CONTROL_THAW:
CHECK_CONTROL_DATA_SIZE(0);
return ctdb_control_thaw(ctdb);
case CTDB_CONTROL_SET_RECMODE:
CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
return ctdb_control_set_recmode(ctdb, c, indata, async_reply, errormsg);
case CTDB_CONTROL_SET_MONMODE:
CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
ctdb->monitoring_mode = *(uint32_t *)indata.dptr;
return 0;
case CTDB_CONTROL_GET_MONMODE:
return ctdb->monitoring_mode;
case CTDB_CONTROL_SHUTDOWN:
ctdb_release_all_ips(ctdb);
ctdb->methods->shutdown(ctdb);
ctdb_event_script(ctdb, "shutdown");
DEBUG(0,("shutting down\n"));
exit(0);
case CTDB_CONTROL_MAX_RSN:
CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t));
return ctdb_control_max_rsn(ctdb, indata, outdata);
case CTDB_CONTROL_SET_RSN_NONEMPTY:
CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_control_set_rsn_nonempty));
return ctdb_control_set_rsn_nonempty(ctdb, indata, outdata);
case CTDB_CONTROL_TAKEOVER_IP:
CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_public_ip));
return ctdb_control_takeover_ip(ctdb, c, indata, async_reply);
case CTDB_CONTROL_RELEASE_IP:
CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_public_ip));
return ctdb_control_release_ip(ctdb, c, indata, async_reply);
case CTDB_CONTROL_GET_PUBLIC_IPS:
CHECK_CONTROL_DATA_SIZE(0);
return ctdb_control_get_public_ips(ctdb, c, outdata);
case CTDB_CONTROL_DELETE_LOW_RSN:
CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_control_delete_low_rsn));
return ctdb_control_delete_low_rsn(ctdb, indata, outdata);
case CTDB_CONTROL_TCP_CLIENT:
CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_control_tcp));
return ctdb_control_tcp_client(ctdb, client_id, srcnode, indata);
case CTDB_CONTROL_STARTUP:
CHECK_CONTROL_DATA_SIZE(0);
return ctdb_control_startup(ctdb, srcnode);
case CTDB_CONTROL_TCP_ADD:
CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_control_tcp_vnn));
return ctdb_control_tcp_add(ctdb, indata);
case CTDB_CONTROL_TCP_REMOVE:
CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_control_tcp_vnn));
return ctdb_control_tcp_remove(ctdb, indata);
case CTDB_CONTROL_SET_TUNABLE:
return ctdb_control_set_tunable(ctdb, indata);
case CTDB_CONTROL_GET_TUNABLE:
return ctdb_control_get_tunable(ctdb, indata, outdata);
case CTDB_CONTROL_LIST_TUNABLES:
return ctdb_control_list_tunables(ctdb, outdata);
case CTDB_CONTROL_MODIFY_FLAGS:
CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_node_modflags));
return ctdb_control_modflags(ctdb, indata);
default:
DEBUG(0,(__location__ " Unknown CTDB control opcode %u\n", opcode));
return -1;
}
}
/*
send a reply for a ctdb control
*/
void ctdb_request_control_reply(struct ctdb_context *ctdb, struct ctdb_req_control *c,
TDB_DATA *outdata, int32_t status, const char *errormsg)
{
struct ctdb_reply_control *r;
size_t len;
/* some controls send no reply */
if (c->flags & CTDB_CTRL_FLAG_NOREPLY) {
return;
}
len = offsetof(struct ctdb_reply_control, data) + (outdata?outdata->dsize:0);
if (errormsg) {
len += strlen(errormsg);
}
r = ctdb_transport_allocate(ctdb, ctdb, CTDB_REPLY_CONTROL, len, struct ctdb_reply_control);
CTDB_NO_MEMORY_VOID(ctdb, r);
r->hdr.destnode = c->hdr.srcnode;
r->hdr.reqid = c->hdr.reqid;
r->status = status;
r->datalen = outdata?outdata->dsize:0;
if (outdata && outdata->dsize) {
memcpy(&r->data[0], outdata->dptr, outdata->dsize);
}
if (errormsg) {
r->errorlen = strlen(errormsg);
memcpy(&r->data[r->datalen], errormsg, r->errorlen);
}
ctdb_queue_packet(ctdb, &r->hdr);
talloc_free(r);
}
/*
called when a CTDB_REQ_CONTROL packet comes in
*/
void ctdb_request_control(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
{
struct ctdb_req_control *c = (struct ctdb_req_control *)hdr;
TDB_DATA data, *outdata;
int32_t status;
bool async_reply = False;
const char *errormsg = NULL;
data.dptr = &c->data[0];
data.dsize = c->datalen;
outdata = talloc_zero(c, TDB_DATA);
status = ctdb_control_dispatch(ctdb, c, data, outdata, hdr->srcnode,
&errormsg, &async_reply);
if (!async_reply) {
ctdb_request_control_reply(ctdb, c, outdata, status, errormsg);
}
}
/*
called when a CTDB_REPLY_CONTROL packet comes in
*/
void ctdb_reply_control(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
{
struct ctdb_reply_control *c = (struct ctdb_reply_control *)hdr;
TDB_DATA data;
struct ctdb_control_state *state;
const char *errormsg = NULL;
state = ctdb_reqid_find(ctdb, hdr->reqid, struct ctdb_control_state);
if (state == NULL) {
DEBUG(0,("vnn %u Invalid reqid %u in ctdb_reply_control\n",
ctdb->vnn, hdr->reqid));
return;
}
if (hdr->reqid != state->reqid) {
/* we found a record but it was the wrong one */
DEBUG(0, ("Dropped orphaned control reply with reqid:%u\n", hdr->reqid));
return;
}
data.dptr = &c->data[0];
data.dsize = c->datalen;
if (c->errorlen) {
errormsg = talloc_strndup(state,
(char *)&c->data[c->datalen], c->errorlen);
}
/* make state a child of the packet, so it goes away when the packet
is freed. */
talloc_steal(hdr, state);
state->callback(ctdb, c->status, data, errormsg, state->private_data);
}
static int ctdb_control_destructor(struct ctdb_control_state *state)
{
ctdb_reqid_remove(state->ctdb, state->reqid);
return 0;
}
/*
handle a timeout of a control
*/
static void ctdb_control_timeout(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data)
{
struct ctdb_control_state *state = talloc_get_type(private_data, struct ctdb_control_state);
TALLOC_CTX *tmp_ctx = talloc_new(ev);
state->ctdb->statistics.timeouts.control++;
talloc_steal(tmp_ctx, state);
state->callback(state->ctdb, -1, tdb_null,
"ctdb_control timed out",
state->private_data);
talloc_free(tmp_ctx);
}
/*
send a control message to a node
*/
int ctdb_daemon_send_control(struct ctdb_context *ctdb, uint32_t destnode,
uint64_t srvid, uint32_t opcode, uint32_t client_id,
uint32_t flags,
TDB_DATA data,
ctdb_control_callback_fn_t callback,
void *private_data)
{
struct ctdb_req_control *c;
struct ctdb_control_state *state;
size_t len;
if (((destnode == CTDB_BROADCAST_VNNMAP) ||
(destnode == CTDB_BROADCAST_ALL) ||
(destnode == CTDB_BROADCAST_CONNECTED)) &&
!(flags & CTDB_CTRL_FLAG_NOREPLY)) {
DEBUG(0,("Attempt to broadcast control without NOREPLY\n"));
return -1;
}
if (destnode != CTDB_BROADCAST_VNNMAP &&
destnode != CTDB_BROADCAST_ALL &&
destnode != CTDB_BROADCAST_CONNECTED &&
(!ctdb_validate_vnn(ctdb, destnode) ||
(ctdb->nodes[destnode]->flags & NODE_FLAGS_DISCONNECTED))) {
if (!(flags & CTDB_CTRL_FLAG_NOREPLY)) {
callback(ctdb, -1, tdb_null, "ctdb_control to disconnected node", private_data);
}
return 0;
}
/* the state is made a child of private_data if possible. This means any reply
will be discarded if the private_data goes away */
state = talloc(private_data?private_data:ctdb, struct ctdb_control_state);
CTDB_NO_MEMORY(ctdb, state);
state->reqid = ctdb_reqid_new(ctdb, state);
state->callback = callback;
state->private_data = private_data;
state->ctdb = ctdb;
state->flags = flags;
talloc_set_destructor(state, ctdb_control_destructor);
len = offsetof(struct ctdb_req_control, data) + data.dsize;
c = ctdb_transport_allocate(ctdb, state, CTDB_REQ_CONTROL, len,
struct ctdb_req_control);
CTDB_NO_MEMORY(ctdb, c);
talloc_set_name_const(c, "ctdb_req_control packet");
c->hdr.destnode = destnode;
c->hdr.reqid = state->reqid;
c->opcode = opcode;
c->client_id = client_id;
c->flags = flags;
c->srvid = srvid;
c->datalen = data.dsize;
if (data.dsize) {
memcpy(&c->data[0], data.dptr, data.dsize);
}
ctdb_queue_packet(ctdb, &c->hdr);
if (flags & CTDB_CTRL_FLAG_NOREPLY) {
talloc_free(state);
return 0;
}
if (ctdb->tunable.control_timeout) {
event_add_timed(ctdb->ev, state,
timeval_current_ofs(ctdb->tunable.control_timeout, 0),
ctdb_control_timeout, state);
}
talloc_free(c);
return 0;
}

View File

@ -1,926 +0,0 @@
/*
ctdb daemon code
Copyright (C) Andrew Tridgell 2006
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "../tdb/include/tdb.h"
#include "lib/events/events.h"
#include "lib/util/dlinklist.h"
#include "system/network.h"
#include "system/filesys.h"
#include "system/wait.h"
#include "../include/ctdb.h"
#include "../include/ctdb_private.h"
static void daemon_incoming_packet(void *, struct ctdb_req_header *);
/*
handler for when a node changes its flags
*/
static void flag_change_handler(struct ctdb_context *ctdb, uint64_t srvid,
TDB_DATA data, void *private_data)
{
struct ctdb_node_flag_change *c = (struct ctdb_node_flag_change *)data.dptr;
if (data.dsize != sizeof(*c) || !ctdb_validate_vnn(ctdb, c->vnn)) {
DEBUG(0,(__location__ "Invalid data in ctdb_node_flag_change\n"));
return;
}
if (!ctdb_validate_vnn(ctdb, c->vnn)) {
DEBUG(0,("Bad vnn %u in flag_change_handler\n", c->vnn));
return;
}
/* don't get the disconnected flag from the other node */
ctdb->nodes[c->vnn]->flags =
(ctdb->nodes[c->vnn]->flags&NODE_FLAGS_DISCONNECTED)
| (c->flags & ~NODE_FLAGS_DISCONNECTED);
DEBUG(2,("Node flags for node %u are now 0x%x\n", c->vnn, ctdb->nodes[c->vnn]->flags));
/* make sure we don't hold any IPs when we shouldn't */
if (c->vnn == ctdb->vnn &&
(ctdb->nodes[c->vnn]->flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_BANNED))) {
ctdb_release_all_ips(ctdb);
}
}
/* called when the "startup" event script has finished */
static void ctdb_start_transport(struct ctdb_context *ctdb, int status, void *p)
{
if (status != 0) {
DEBUG(0,("startup event failed!\n"));
ctdb_fatal(ctdb, "startup event script failed");
}
/* start the transport running */
if (ctdb->methods->start(ctdb) != 0) {
DEBUG(0,("transport failed to start!\n"));
ctdb_fatal(ctdb, "transport failed to start");
}
/* start the recovery daemon process */
if (ctdb_start_recoverd(ctdb) != 0) {
DEBUG(0,("Failed to start recovery daemon\n"));
exit(11);
}
/* a handler for when nodes are disabled/enabled */
ctdb_register_message_handler(ctdb, ctdb, CTDB_SRVID_NODE_FLAGS_CHANGED,
flag_change_handler, NULL);
/* start monitoring for dead nodes */
ctdb_start_monitoring(ctdb);
}
/* go into main ctdb loop */
static void ctdb_main_loop(struct ctdb_context *ctdb)
{
int ret = -1;
if (strcmp(ctdb->transport, "tcp") == 0) {
int ctdb_tcp_init(struct ctdb_context *);
ret = ctdb_tcp_init(ctdb);
}
#ifdef USE_INFINIBAND
if (strcmp(ctdb->transport, "ib") == 0) {
int ctdb_ibw_init(struct ctdb_context *);
ret = ctdb_ibw_init(ctdb);
}
#endif
if (ret != 0) {
DEBUG(0,("Failed to initialise transport '%s'\n", ctdb->transport));
return;
}
/* initialise the transport */
if (ctdb->methods->initialise(ctdb) != 0) {
DEBUG(0,("transport failed to initialise!\n"));
ctdb_fatal(ctdb, "transport failed to initialise");
}
/* tell all other nodes we've just started up */
ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL,
0, CTDB_CONTROL_STARTUP, 0,
CTDB_CTRL_FLAG_NOREPLY,
tdb_null, NULL, NULL);
/* release any IPs we hold from previous runs of the daemon */
ctdb_release_all_ips(ctdb);
ret = ctdb_event_script_callback(ctdb, timeval_zero(), ctdb,
ctdb_start_transport, NULL, "startup");
if (ret != 0) {
DEBUG(0,("Failed startup event script\n"));
return;
}
/* go into a wait loop to allow other nodes to complete */
event_loop_wait(ctdb->ev);
DEBUG(0,("event_loop_wait() returned. this should not happen\n"));
exit(1);
}
static void block_signal(int signum)
{
struct sigaction act;
memset(&act, 0, sizeof(act));
act.sa_handler = SIG_IGN;
sigemptyset(&act.sa_mask);
sigaddset(&act.sa_mask, signum);
sigaction(signum, &act, NULL);
}
/*
send a packet to a client
*/
static int daemon_queue_send(struct ctdb_client *client, struct ctdb_req_header *hdr)
{
client->ctdb->statistics.client_packets_sent++;
return ctdb_queue_send(client->queue, (uint8_t *)hdr, hdr->length);
}
/*
message handler for when we are in daemon mode. This redirects the message
to the right client
*/
static void daemon_message_handler(struct ctdb_context *ctdb, uint64_t srvid,
TDB_DATA data, void *private_data)
{
struct ctdb_client *client = talloc_get_type(private_data, struct ctdb_client);
struct ctdb_req_message *r;
int len;
/* construct a message to send to the client containing the data */
len = offsetof(struct ctdb_req_message, data) + data.dsize;
r = ctdbd_allocate_pkt(ctdb, ctdb, CTDB_REQ_MESSAGE,
len, struct ctdb_req_message);
CTDB_NO_MEMORY_VOID(ctdb, r);
talloc_set_name_const(r, "req_message packet");
r->srvid = srvid;
r->datalen = data.dsize;
memcpy(&r->data[0], data.dptr, data.dsize);
daemon_queue_send(client, &r->hdr);
talloc_free(r);
}
/*
this is called when the ctdb daemon received a ctdb request to
set the srvid from the client
*/
int daemon_register_message_handler(struct ctdb_context *ctdb, uint32_t client_id, uint64_t srvid)
{
struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
int res;
if (client == NULL) {
DEBUG(0,("Bad client_id in daemon_request_register_message_handler\n"));
return -1;
}
res = ctdb_register_message_handler(ctdb, client, srvid, daemon_message_handler, client);
if (res != 0) {
DEBUG(0,(__location__ " Failed to register handler %llu in daemon\n",
(unsigned long long)srvid));
} else {
DEBUG(2,(__location__ " Registered message handler for srvid=%llu\n",
(unsigned long long)srvid));
}
/* this is a hack for Samba - we now know the pid of the Samba client */
if ((srvid & 0xFFFFFFFF) == srvid &&
kill(srvid, 0) == 0) {
client->pid = srvid;
DEBUG(0,(__location__ " Registered PID %u for client %u\n",
(unsigned)client->pid, client_id));
}
return res;
}
/*
this is called when the ctdb daemon received a ctdb request to
remove a srvid from the client
*/
int daemon_deregister_message_handler(struct ctdb_context *ctdb, uint32_t client_id, uint64_t srvid)
{
struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
if (client == NULL) {
DEBUG(0,("Bad client_id in daemon_request_deregister_message_handler\n"));
return -1;
}
return ctdb_deregister_message_handler(ctdb, srvid, client);
}
/*
destroy a ctdb_client
*/
static int ctdb_client_destructor(struct ctdb_client *client)
{
ctdb_takeover_client_destructor_hook(client);
ctdb_reqid_remove(client->ctdb, client->client_id);
client->ctdb->statistics.num_clients--;
return 0;
}
/*
this is called when the ctdb daemon received a ctdb request message
from a local client over the unix domain socket
*/
static void daemon_request_message_from_client(struct ctdb_client *client,
struct ctdb_req_message *c)
{
TDB_DATA data;
int res;
/* maybe the message is for another client on this node */
if (ctdb_get_vnn(client->ctdb)==c->hdr.destnode) {
ctdb_request_message(client->ctdb, (struct ctdb_req_header *)c);
return;
}
/* its for a remote node */
data.dptr = &c->data[0];
data.dsize = c->datalen;
res = ctdb_daemon_send_message(client->ctdb, c->hdr.destnode,
c->srvid, data);
if (res != 0) {
DEBUG(0,(__location__ " Failed to send message to remote node %u\n",
c->hdr.destnode));
}
}
struct daemon_call_state {
struct ctdb_client *client;
uint32_t reqid;
struct ctdb_call *call;
struct timeval start_time;
};
/*
complete a call from a client
*/
static void daemon_call_from_client_callback(struct ctdb_call_state *state)
{
struct daemon_call_state *dstate = talloc_get_type(state->async.private_data,
struct daemon_call_state);
struct ctdb_reply_call *r;
int res;
uint32_t length;
struct ctdb_client *client = dstate->client;
talloc_steal(client, dstate);
talloc_steal(dstate, dstate->call);
res = ctdb_daemon_call_recv(state, dstate->call);
if (res != 0) {
DEBUG(0, (__location__ " ctdbd_call_recv() returned error\n"));
client->ctdb->statistics.pending_calls--;
ctdb_latency(&client->ctdb->statistics.max_call_latency, dstate->start_time);
return;
}
length = offsetof(struct ctdb_reply_call, data) + dstate->call->reply_data.dsize;
r = ctdbd_allocate_pkt(client->ctdb, dstate, CTDB_REPLY_CALL,
length, struct ctdb_reply_call);
if (r == NULL) {
DEBUG(0, (__location__ " Failed to allocate reply_call in ctdb daemon\n"));
client->ctdb->statistics.pending_calls--;
ctdb_latency(&client->ctdb->statistics.max_call_latency, dstate->start_time);
return;
}
r->hdr.reqid = dstate->reqid;
r->datalen = dstate->call->reply_data.dsize;
memcpy(&r->data[0], dstate->call->reply_data.dptr, r->datalen);
res = daemon_queue_send(client, &r->hdr);
if (res != 0) {
DEBUG(0, (__location__ " Failed to queue packet from daemon to client\n"));
}
ctdb_latency(&client->ctdb->statistics.max_call_latency, dstate->start_time);
talloc_free(dstate);
client->ctdb->statistics.pending_calls--;
}
static void daemon_request_call_from_client(struct ctdb_client *client,
struct ctdb_req_call *c);
/*
this is called when the ctdb daemon received a ctdb request call
from a local client over the unix domain socket
*/
static void daemon_request_call_from_client(struct ctdb_client *client,
struct ctdb_req_call *c)
{
struct ctdb_call_state *state;
struct ctdb_db_context *ctdb_db;
struct daemon_call_state *dstate;
struct ctdb_call *call;
struct ctdb_ltdb_header header;
TDB_DATA key, data;
int ret;
struct ctdb_context *ctdb = client->ctdb;
ctdb->statistics.total_calls++;
ctdb->statistics.pending_calls++;
ctdb_db = find_ctdb_db(client->ctdb, c->db_id);
if (!ctdb_db) {
DEBUG(0, (__location__ " Unknown database in request. db_id==0x%08x",
c->db_id));
ctdb->statistics.pending_calls--;
return;
}
key.dptr = c->data;
key.dsize = c->keylen;
ret = ctdb_ltdb_lock_fetch_requeue(ctdb_db, key, &header,
(struct ctdb_req_header *)c, &data,
daemon_incoming_packet, client, True);
if (ret == -2) {
/* will retry later */
ctdb->statistics.pending_calls--;
return;
}
if (ret != 0) {
DEBUG(0,(__location__ " Unable to fetch record\n"));
ctdb->statistics.pending_calls--;
return;
}
dstate = talloc(client, struct daemon_call_state);
if (dstate == NULL) {
ctdb_ltdb_unlock(ctdb_db, key);
DEBUG(0,(__location__ " Unable to allocate dstate\n"));
ctdb->statistics.pending_calls--;
return;
}
dstate->start_time = timeval_current();
dstate->client = client;
dstate->reqid = c->hdr.reqid;
talloc_steal(dstate, data.dptr);
call = dstate->call = talloc_zero(dstate, struct ctdb_call);
if (call == NULL) {
ctdb_ltdb_unlock(ctdb_db, key);
DEBUG(0,(__location__ " Unable to allocate call\n"));
ctdb->statistics.pending_calls--;
ctdb_latency(&ctdb->statistics.max_call_latency, dstate->start_time);
return;
}
call->call_id = c->callid;
call->key = key;
call->call_data.dptr = c->data + c->keylen;
call->call_data.dsize = c->calldatalen;
call->flags = c->flags;
if (header.dmaster == ctdb->vnn) {
state = ctdb_call_local_send(ctdb_db, call, &header, &data);
} else {
state = ctdb_daemon_call_send_remote(ctdb_db, call, &header);
}
ctdb_ltdb_unlock(ctdb_db, key);
if (state == NULL) {
DEBUG(0,(__location__ " Unable to setup call send\n"));
ctdb->statistics.pending_calls--;
ctdb_latency(&ctdb->statistics.max_call_latency, dstate->start_time);
return;
}
talloc_steal(state, dstate);
talloc_steal(client, state);
state->async.fn = daemon_call_from_client_callback;
state->async.private_data = dstate;
}
static void daemon_request_control_from_client(struct ctdb_client *client,
struct ctdb_req_control *c);
/* data contains a packet from the client */
static void daemon_incoming_packet(void *p, struct ctdb_req_header *hdr)
{
struct ctdb_client *client = talloc_get_type(p, struct ctdb_client);
TALLOC_CTX *tmp_ctx;
struct ctdb_context *ctdb = client->ctdb;
/* place the packet as a child of a tmp_ctx. We then use
talloc_free() below to free it. If any of the calls want
to keep it, then they will steal it somewhere else, and the
talloc_free() will be a no-op */
tmp_ctx = talloc_new(client);
talloc_steal(tmp_ctx, hdr);
if (hdr->ctdb_magic != CTDB_MAGIC) {
ctdb_set_error(client->ctdb, "Non CTDB packet rejected in daemon\n");
goto done;
}
if (hdr->ctdb_version != CTDB_VERSION) {
ctdb_set_error(client->ctdb, "Bad CTDB version 0x%x rejected in daemon\n", hdr->ctdb_version);
goto done;
}
switch (hdr->operation) {
case CTDB_REQ_CALL:
ctdb->statistics.client.req_call++;
daemon_request_call_from_client(client, (struct ctdb_req_call *)hdr);
break;
case CTDB_REQ_MESSAGE:
ctdb->statistics.client.req_message++;
daemon_request_message_from_client(client, (struct ctdb_req_message *)hdr);
break;
case CTDB_REQ_CONTROL:
ctdb->statistics.client.req_control++;
daemon_request_control_from_client(client, (struct ctdb_req_control *)hdr);
break;
default:
DEBUG(0,(__location__ " daemon: unrecognized operation %u\n",
hdr->operation));
}
done:
talloc_free(tmp_ctx);
}
/*
called when the daemon gets a incoming packet
*/
static void ctdb_daemon_read_cb(uint8_t *data, size_t cnt, void *args)
{
struct ctdb_client *client = talloc_get_type(args, struct ctdb_client);
struct ctdb_req_header *hdr;
if (cnt == 0) {
talloc_free(client);
return;
}
client->ctdb->statistics.client_packets_recv++;
if (cnt < sizeof(*hdr)) {
ctdb_set_error(client->ctdb, "Bad packet length %u in daemon\n",
(unsigned)cnt);
return;
}
hdr = (struct ctdb_req_header *)data;
if (cnt != hdr->length) {
ctdb_set_error(client->ctdb, "Bad header length %u expected %u\n in daemon",
(unsigned)hdr->length, (unsigned)cnt);
return;
}
if (hdr->ctdb_magic != CTDB_MAGIC) {
ctdb_set_error(client->ctdb, "Non CTDB packet rejected\n");
return;
}
if (hdr->ctdb_version != CTDB_VERSION) {
ctdb_set_error(client->ctdb, "Bad CTDB version 0x%x rejected in daemon\n", hdr->ctdb_version);
return;
}
DEBUG(3,(__location__ " client request %u of type %u length %u from "
"node %u to %u\n", hdr->reqid, hdr->operation, hdr->length,
hdr->srcnode, hdr->destnode));
/* it is the responsibility of the incoming packet function to free 'data' */
daemon_incoming_packet(client, hdr);
}
static void ctdb_accept_client(struct event_context *ev, struct fd_event *fde,
uint16_t flags, void *private_data)
{
struct sockaddr_in addr;
socklen_t len;
int fd;
struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
struct ctdb_client *client;
memset(&addr, 0, sizeof(addr));
len = sizeof(addr);
fd = accept(ctdb->daemon.sd, (struct sockaddr *)&addr, &len);
if (fd == -1) {
return;
}
set_nonblocking(fd);
set_close_on_exec(fd);
client = talloc_zero(ctdb, struct ctdb_client);
client->ctdb = ctdb;
client->fd = fd;
client->client_id = ctdb_reqid_new(ctdb, client);
ctdb->statistics.num_clients++;
client->queue = ctdb_queue_setup(ctdb, client, fd, CTDB_DS_ALIGNMENT,
ctdb_daemon_read_cb, client);
talloc_set_destructor(client, ctdb_client_destructor);
}
/*
create a unix domain socket and bind it
return a file descriptor open on the socket
*/
static int ux_socket_bind(struct ctdb_context *ctdb)
{
struct sockaddr_un addr;
ctdb->daemon.sd = socket(AF_UNIX, SOCK_STREAM, 0);
if (ctdb->daemon.sd == -1) {
return -1;
}
set_nonblocking(ctdb->daemon.sd);
set_close_on_exec(ctdb->daemon.sd);
#if 0
/* AIX doesn't like this :( */
if (fchown(ctdb->daemon.sd, geteuid(), getegid()) != 0 ||
fchmod(ctdb->daemon.sd, 0700) != 0) {
DEBUG(0,("Unable to secure ctdb socket '%s', ctdb->daemon.name\n"));
goto failed;
}
#endif
set_nonblocking(ctdb->daemon.sd);
memset(&addr, 0, sizeof(addr));
addr.sun_family = AF_UNIX;
strncpy(addr.sun_path, ctdb->daemon.name, sizeof(addr.sun_path));
if (bind(ctdb->daemon.sd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
DEBUG(0,("Unable to bind on ctdb socket '%s'\n", ctdb->daemon.name));
goto failed;
}
if (listen(ctdb->daemon.sd, 10) != 0) {
DEBUG(0,("Unable to listen on ctdb socket '%s'\n", ctdb->daemon.name));
goto failed;
}
return 0;
failed:
close(ctdb->daemon.sd);
ctdb->daemon.sd = -1;
return -1;
}
/*
delete the socket on exit - called on destruction of autofree context
*/
static int unlink_destructor(const char *name)
{
unlink(name);
return 0;
}
/*
start the protocol going as a daemon
*/
int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork)
{
int res;
struct fd_event *fde;
const char *domain_socket_name;
/* get rid of any old sockets */
unlink(ctdb->daemon.name);
/* create a unix domain stream socket to listen to */
res = ux_socket_bind(ctdb);
if (res!=0) {
DEBUG(0,(__location__ " Failed to open CTDB unix domain socket\n"));
exit(10);
}
if (do_fork && fork()) {
return 0;
}
tdb_reopen_all(False);
if (do_fork) {
setsid();
}
block_signal(SIGPIPE);
/* try to set us up as realtime */
ctdb_set_realtime(true);
/* ensure the socket is deleted on exit of the daemon */
domain_socket_name = talloc_strdup(talloc_autofree_context(), ctdb->daemon.name);
talloc_set_destructor(domain_socket_name, unlink_destructor);
ctdb->ev = s4_event_context_init(NULL);
/* start frozen, then let the first election sort things out */
if (!ctdb_blocking_freeze(ctdb)) {
DEBUG(0,("Failed to get initial freeze\n"));
exit(12);
}
/* force initial recovery for election */
ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
/* now start accepting clients, only can do this once frozen */
fde = event_add_fd(ctdb->ev, ctdb, ctdb->daemon.sd,
EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
ctdb_accept_client, ctdb);
ctdb_main_loop(ctdb);
return 0;
}
/*
allocate a packet for use in daemon<->daemon communication
*/
struct ctdb_req_header *_ctdb_transport_allocate(struct ctdb_context *ctdb,
TALLOC_CTX *mem_ctx,
enum ctdb_operation operation,
size_t length, size_t slength,
const char *type)
{
int size;
struct ctdb_req_header *hdr;
length = MAX(length, slength);
size = (length+(CTDB_DS_ALIGNMENT-1)) & ~(CTDB_DS_ALIGNMENT-1);
hdr = (struct ctdb_req_header *)ctdb->methods->allocate_pkt(mem_ctx, size);
if (hdr == NULL) {
DEBUG(0,("Unable to allocate transport packet for operation %u of length %u\n",
operation, (unsigned)length));
return NULL;
}
talloc_set_name_const(hdr, type);
memset(hdr, 0, slength);
hdr->length = length;
hdr->operation = operation;
hdr->ctdb_magic = CTDB_MAGIC;
hdr->ctdb_version = CTDB_VERSION;
hdr->generation = ctdb->vnn_map->generation;
hdr->srcnode = ctdb->vnn;
return hdr;
}
struct daemon_control_state {
struct daemon_control_state *next, *prev;
struct ctdb_client *client;
struct ctdb_req_control *c;
uint32_t reqid;
struct ctdb_node *node;
};
/*
callback when a control reply comes in
*/
static void daemon_control_callback(struct ctdb_context *ctdb,
int32_t status, TDB_DATA data,
const char *errormsg,
void *private_data)
{
struct daemon_control_state *state = talloc_get_type(private_data,
struct daemon_control_state);
struct ctdb_client *client = state->client;
struct ctdb_reply_control *r;
size_t len;
/* construct a message to send to the client containing the data */
len = offsetof(struct ctdb_reply_control, data) + data.dsize;
if (errormsg) {
len += strlen(errormsg);
}
r = ctdbd_allocate_pkt(ctdb, state, CTDB_REPLY_CONTROL, len,
struct ctdb_reply_control);
CTDB_NO_MEMORY_VOID(ctdb, r);
r->hdr.reqid = state->reqid;
r->status = status;
r->datalen = data.dsize;
r->errorlen = 0;
memcpy(&r->data[0], data.dptr, data.dsize);
if (errormsg) {
r->errorlen = strlen(errormsg);
memcpy(&r->data[r->datalen], errormsg, r->errorlen);
}
daemon_queue_send(client, &r->hdr);
talloc_free(state);
}
/*
fail all pending controls to a disconnected node
*/
void ctdb_daemon_cancel_controls(struct ctdb_context *ctdb, struct ctdb_node *node)
{
struct daemon_control_state *state;
while ((state = node->pending_controls)) {
DLIST_REMOVE(node->pending_controls, state);
daemon_control_callback(ctdb, (uint32_t)-1, tdb_null,
"node is disconnected", state);
}
}
/*
destroy a daemon_control_state
*/
static int daemon_control_destructor(struct daemon_control_state *state)
{
if (state->node) {
DLIST_REMOVE(state->node->pending_controls, state);
}
return 0;
}
/*
this is called when the ctdb daemon received a ctdb request control
from a local client over the unix domain socket
*/
static void daemon_request_control_from_client(struct ctdb_client *client,
struct ctdb_req_control *c)
{
TDB_DATA data;
int res;
struct daemon_control_state *state;
TALLOC_CTX *tmp_ctx = talloc_new(client);
if (c->hdr.destnode == CTDB_CURRENT_NODE) {
c->hdr.destnode = client->ctdb->vnn;
}
state = talloc(client, struct daemon_control_state);
CTDB_NO_MEMORY_VOID(client->ctdb, state);
state->client = client;
state->c = talloc_steal(state, c);
state->reqid = c->hdr.reqid;
if (ctdb_validate_vnn(client->ctdb, c->hdr.destnode)) {
state->node = client->ctdb->nodes[c->hdr.destnode];
DLIST_ADD(state->node->pending_controls, state);
} else {
state->node = NULL;
}
talloc_set_destructor(state, daemon_control_destructor);
if (c->flags & CTDB_CTRL_FLAG_NOREPLY) {
talloc_steal(tmp_ctx, state);
}
data.dptr = &c->data[0];
data.dsize = c->datalen;
res = ctdb_daemon_send_control(client->ctdb, c->hdr.destnode,
c->srvid, c->opcode, client->client_id,
c->flags,
data, daemon_control_callback,
state);
if (res != 0) {
DEBUG(0,(__location__ " Failed to send control to remote node %u\n",
c->hdr.destnode));
}
talloc_free(tmp_ctx);
}
/*
register a call function
*/
int ctdb_daemon_set_call(struct ctdb_context *ctdb, uint32_t db_id,
ctdb_fn_t fn, int id)
{
struct ctdb_registered_call *call;
struct ctdb_db_context *ctdb_db;
ctdb_db = find_ctdb_db(ctdb, db_id);
if (ctdb_db == NULL) {
return -1;
}
call = talloc(ctdb_db, struct ctdb_registered_call);
call->fn = fn;
call->id = id;
DLIST_ADD(ctdb_db->calls, call);
return 0;
}
/*
this local messaging handler is ugly, but is needed to prevent
recursion in ctdb_send_message() when the destination node is the
same as the source node
*/
struct ctdb_local_message {
struct ctdb_context *ctdb;
uint64_t srvid;
TDB_DATA data;
};
static void ctdb_local_message_trigger(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data)
{
struct ctdb_local_message *m = talloc_get_type(private_data,
struct ctdb_local_message);
int res;
res = ctdb_dispatch_message(m->ctdb, m->srvid, m->data);
if (res != 0) {
DEBUG(0, (__location__ " Failed to dispatch message for srvid=%llu\n",
(unsigned long long)m->srvid));
}
talloc_free(m);
}
static int ctdb_local_message(struct ctdb_context *ctdb, uint64_t srvid, TDB_DATA data)
{
struct ctdb_local_message *m;
m = talloc(ctdb, struct ctdb_local_message);
CTDB_NO_MEMORY(ctdb, m);
m->ctdb = ctdb;
m->srvid = srvid;
m->data = data;
m->data.dptr = talloc_memdup(m, m->data.dptr, m->data.dsize);
if (m->data.dptr == NULL) {
talloc_free(m);
return -1;
}
/* this needs to be done as an event to prevent recursion */
event_add_timed(ctdb->ev, m, timeval_zero(), ctdb_local_message_trigger, m);
return 0;
}
/*
send a ctdb message
*/
int ctdb_daemon_send_message(struct ctdb_context *ctdb, uint32_t vnn,
uint64_t srvid, TDB_DATA data)
{
struct ctdb_req_message *r;
int len;
/* see if this is a message to ourselves */
if (vnn == ctdb->vnn) {
return ctdb_local_message(ctdb, srvid, data);
}
len = offsetof(struct ctdb_req_message, data) + data.dsize;
r = ctdb_transport_allocate(ctdb, ctdb, CTDB_REQ_MESSAGE, len,
struct ctdb_req_message);
CTDB_NO_MEMORY(ctdb, r);
r->hdr.destnode = vnn;
r->srvid = srvid;
r->datalen = data.dsize;
memcpy(&r->data[0], data.dptr, data.dsize);
ctdb_queue_packet(ctdb, &r->hdr);
talloc_free(r);
return 0;
}

View File

@ -1,255 +0,0 @@
/*
ctdb freeze handling
Copyright (C) Andrew Tridgell 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "lib/events/events.h"
#include "../tdb/include/tdb.h"
#include "system/network.h"
#include "system/filesys.h"
#include "system/wait.h"
#include "../include/ctdb_private.h"
#include "lib/util/dlinklist.h"
/*
lock all databases
*/
static int ctdb_lock_all_databases(struct ctdb_context *ctdb)
{
struct ctdb_db_context *ctdb_db;
for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
if (tdb_lockall(ctdb_db->ltdb->tdb) != 0) {
return -1;
}
}
return 0;
}
/*
a list of control requests waiting for a freeze lock child to get
the database locks
*/
struct ctdb_freeze_waiter {
struct ctdb_freeze_waiter *next, *prev;
struct ctdb_context *ctdb;
struct ctdb_req_control *c;
int32_t status;
};
/* a handle to a freeze lock child process */
struct ctdb_freeze_handle {
struct ctdb_context *ctdb;
pid_t child;
int fd;
struct ctdb_freeze_waiter *waiters;
};
/*
destroy a freeze handle
*/
static int ctdb_freeze_handle_destructor(struct ctdb_freeze_handle *h)
{
h->ctdb->freeze_mode = CTDB_FREEZE_NONE;
kill(h->child, SIGKILL);
waitpid(h->child, NULL, 0);
return 0;
}
/*
called when the child writes its status to us
*/
static void ctdb_freeze_lock_handler(struct event_context *ev, struct fd_event *fde,
uint16_t flags, void *private_data)
{
struct ctdb_freeze_handle *h = talloc_get_type(private_data, struct ctdb_freeze_handle);
int32_t status;
struct ctdb_freeze_waiter *w;
if (h->ctdb->freeze_mode == CTDB_FREEZE_FROZEN) {
DEBUG(0,("freeze child died - unfreezing\n"));
talloc_free(h);
return;
}
if (read(h->fd, &status, sizeof(status)) != sizeof(status)) {
DEBUG(0,("read error from freeze lock child\n"));
status = -1;
}
if (status == -1) {
DEBUG(0,("Failed to get locks in ctdb_freeze_child\n"));
/* we didn't get the locks - destroy the handle */
talloc_free(h);
return;
}
h->ctdb->freeze_mode = CTDB_FREEZE_FROZEN;
/* notify the waiters */
while ((w = h->ctdb->freeze_handle->waiters)) {
w->status = status;
DLIST_REMOVE(h->ctdb->freeze_handle->waiters, w);
talloc_free(w);
}
}
/*
create a child which gets locks on all the open databases, then calls the callback telling the parent
that it is done
*/
static struct ctdb_freeze_handle *ctdb_freeze_lock(struct ctdb_context *ctdb)
{
struct ctdb_freeze_handle *h;
int fd[2];
struct fd_event *fde;
h = talloc_zero(ctdb, struct ctdb_freeze_handle);
CTDB_NO_MEMORY_VOID(ctdb, h);
h->ctdb = ctdb;
/* use socketpair() instead of pipe() so we have bi-directional fds */
if (socketpair(AF_UNIX, SOCK_STREAM, 0, fd) != 0) {
DEBUG(0,("Failed to create pipe for ctdb_freeze_lock\n"));
talloc_free(h);
return NULL;
}
h->child = fork();
if (h->child == -1) {
DEBUG(0,("Failed to fork child for ctdb_freeze_lock\n"));
talloc_free(h);
return NULL;
}
if (h->child == 0) {
int ret;
/* in the child */
close(fd[0]);
ret = ctdb_lock_all_databases(ctdb);
if (ret != 0) {
_exit(0);
}
write(fd[1], &ret, sizeof(ret));
/* the read here means we will die if the parent exits */
read(fd[1], &ret, sizeof(ret));
_exit(0);
}
talloc_set_destructor(h, ctdb_freeze_handle_destructor);
close(fd[1]);
h->fd = fd[0];
fde = event_add_fd(ctdb->ev, h, h->fd, EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
ctdb_freeze_lock_handler, h);
if (fde == NULL) {
DEBUG(0,("Failed to setup fd event for ctdb_freeze_lock\n"));
close(fd[0]);
talloc_free(h);
return NULL;
}
return h;
}
/*
destroy a waiter for a freeze mode change
*/
static int ctdb_freeze_waiter_destructor(struct ctdb_freeze_waiter *w)
{
DLIST_REMOVE(w->ctdb->freeze_handle->waiters, w);
ctdb_request_control_reply(w->ctdb, w->c, NULL, w->status, NULL);
return 0;
}
/*
start the freeze process
*/
void ctdb_start_freeze(struct ctdb_context *ctdb)
{
if (ctdb->freeze_mode == CTDB_FREEZE_FROZEN) {
/* we're already frozen */
return;
}
/* if there isn't a freeze lock child then create one */
if (!ctdb->freeze_handle) {
ctdb->freeze_handle = ctdb_freeze_lock(ctdb);
CTDB_NO_MEMORY_VOID(ctdb, ctdb->freeze_handle);
ctdb->freeze_mode = CTDB_FREEZE_PENDING;
}
}
/*
freeze the databases
*/
int32_t ctdb_control_freeze(struct ctdb_context *ctdb, struct ctdb_req_control *c, bool *async_reply)
{
struct ctdb_freeze_waiter *w;
if (ctdb->freeze_mode == CTDB_FREEZE_FROZEN) {
/* we're already frozen */
return 0;
}
ctdb_start_freeze(ctdb);
/* add ourselves to list of waiters */
w = talloc(ctdb->freeze_handle, struct ctdb_freeze_waiter);
CTDB_NO_MEMORY(ctdb, w);
w->ctdb = ctdb;
w->c = talloc_steal(w, c);
w->status = -1;
talloc_set_destructor(w, ctdb_freeze_waiter_destructor);
DLIST_ADD(ctdb->freeze_handle->waiters, w);
/* we won't reply till later */
*async_reply = True;
return 0;
}
/*
block until we are frozen, used during daemon startup
*/
bool ctdb_blocking_freeze(struct ctdb_context *ctdb)
{
ctdb_start_freeze(ctdb);
/* block until frozen */
while (ctdb->freeze_mode == CTDB_FREEZE_PENDING) {
event_loop_once(ctdb->ev);
}
return ctdb->freeze_mode == CTDB_FREEZE_FROZEN;
}
/*
thaw the databases
*/
int32_t ctdb_control_thaw(struct ctdb_context *ctdb)
{
talloc_free(ctdb->freeze_handle);
ctdb->freeze_handle = NULL;
ctdb_call_resend_all(ctdb);
return 0;
}

View File

@ -1,164 +0,0 @@
/*
wait for a tdb chain lock
Copyright (C) Andrew Tridgell 2006
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "lib/events/events.h"
#include "system/filesys.h"
#include "system/wait.h"
#include "../tdb/include/tdb.h"
#include "../include/ctdb_private.h"
struct lockwait_handle {
struct ctdb_context *ctdb;
struct ctdb_db_context *ctdb_db;
struct fd_event *fde;
int fd[2];
pid_t child;
void *private_data;
void (*callback)(void *);
TDB_DATA key;
struct timeval start_time;
};
static void lockwait_handler(struct event_context *ev, struct fd_event *fde,
uint16_t flags, void *private_data)
{
struct lockwait_handle *h = talloc_get_type(private_data,
struct lockwait_handle);
void (*callback)(void *) = h->callback;
void *p = h->private_data;
pid_t child = h->child;
TDB_DATA key = h->key;
struct tdb_context *tdb = h->ctdb_db->ltdb->tdb;
TALLOC_CTX *tmp_ctx = talloc_new(ev);
key.dptr = talloc_memdup(tmp_ctx, key.dptr, key.dsize);
talloc_set_destructor(h, NULL);
ctdb_latency(&h->ctdb->statistics.max_lockwait_latency, h->start_time);
h->ctdb->statistics.pending_lockwait_calls--;
/* the handle needs to go away when the context is gone - when
the handle goes away this implicitly closes the pipe, which
kills the child holding the lock */
talloc_steal(tmp_ctx, h);
if (h->ctdb->flags & CTDB_FLAG_TORTURE) {
if (tdb_chainlock_nonblock(tdb, key) == 0) {
ctdb_fatal(h->ctdb, "got chain lock while lockwait child active");
}
}
tdb_chainlock_mark(tdb, key);
callback(p);
tdb_chainlock_unmark(tdb, key);
kill(child, SIGKILL);
waitpid(child, NULL, 0);
talloc_free(tmp_ctx);
}
static int lockwait_destructor(struct lockwait_handle *h)
{
h->ctdb->statistics.pending_lockwait_calls--;
kill(h->child, SIGKILL);
waitpid(h->child, NULL, 0);
return 0;
}
/*
setup a non-blocking chainlock on a tdb record. If this function
returns NULL then it could not get the chainlock. Otherwise it
returns a opaque handle, and will call callback() once it has
managed to get the chainlock. You can cancel it by using talloc_free
on the returned handle.
It is the callers responsibility to unlock the chainlock once
acquired
*/
struct lockwait_handle *ctdb_lockwait(struct ctdb_db_context *ctdb_db,
TDB_DATA key,
void (*callback)(void *private_data),
void *private_data)
{
struct lockwait_handle *result;
int ret;
pid_t parent = getpid();
ctdb_db->ctdb->statistics.lockwait_calls++;
ctdb_db->ctdb->statistics.pending_lockwait_calls++;
if (!(result = talloc_zero(private_data, struct lockwait_handle))) {
ctdb_db->ctdb->statistics.pending_lockwait_calls--;
return NULL;
}
ret = pipe(result->fd);
if (ret != 0) {
talloc_free(result);
ctdb_db->ctdb->statistics.pending_lockwait_calls--;
return NULL;
}
result->child = fork();
if (result->child == (pid_t)-1) {
close(result->fd[0]);
close(result->fd[1]);
talloc_free(result);
ctdb_db->ctdb->statistics.pending_lockwait_calls--;
return NULL;
}
result->callback = callback;
result->private_data = private_data;
result->ctdb = ctdb_db->ctdb;
result->ctdb_db = ctdb_db;
result->key = key;
if (result->child == 0) {
char c = 0;
close(result->fd[0]);
tdb_chainlock(ctdb_db->ltdb->tdb, key);
write(result->fd[1], &c, 1);
/* make sure we die when our parent dies */
while (kill(parent, 0) == 0 || errno != ESRCH) {
sleep(5);
}
_exit(0);
}
close(result->fd[1]);
talloc_set_destructor(result, lockwait_destructor);
result->fde = event_add_fd(ctdb_db->ctdb->ev, result, result->fd[0],
EVENT_FD_READ|EVENT_FD_AUTOCLOSE, lockwait_handler,
(void *)result);
if (result->fde == NULL) {
talloc_free(result);
ctdb_db->ctdb->statistics.pending_lockwait_calls--;
return NULL;
}
result->start_time = timeval_current();
return result;
}

View File

@ -1,366 +0,0 @@
/*
ctdb ltdb code - server side
Copyright (C) Andrew Tridgell 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "lib/events/events.h"
#include "../tdb/include/tdb.h"
#include "system/network.h"
#include "system/filesys.h"
#include "../include/ctdb_private.h"
#include "tdb_wrap.h"
#include "lib/util/dlinklist.h"
/*
this is the dummy null procedure that all databases support
*/
static int ctdb_null_func(struct ctdb_call_info *call)
{
return 0;
}
/*
this is a plain fetch procedure that all databases support
*/
static int ctdb_fetch_func(struct ctdb_call_info *call)
{
call->reply_data = &call->record_data;
return 0;
}
struct lock_fetch_state {
struct ctdb_context *ctdb;
void (*recv_pkt)(void *, struct ctdb_req_header *);
void *recv_context;
struct ctdb_req_header *hdr;
uint32_t generation;
bool ignore_generation;
};
/*
called when we should retry the operation
*/
static void lock_fetch_callback(void *p)
{
struct lock_fetch_state *state = talloc_get_type(p, struct lock_fetch_state);
if (!state->ignore_generation &&
state->generation != state->ctdb->vnn_map->generation) {
DEBUG(0,("Discarding previous generation lockwait packet\n"));
talloc_free(state->hdr);
return;
}
state->recv_pkt(state->recv_context, state->hdr);
DEBUG(2,(__location__ " PACKET REQUEUED\n"));
}
/*
do a non-blocking ltdb_lock, deferring this ctdb request until we
have the chainlock
It does the following:
1) tries to get the chainlock. If it succeeds, then it returns 0
2) if it fails to get a chainlock immediately then it sets up a
non-blocking chainlock via ctdb_lockwait, and when it gets the
chainlock it re-submits this ctdb request to the main packet
receive function
This effectively queues all ctdb requests that cannot be
immediately satisfied until it can get the lock. This means that
the main ctdb daemon will not block waiting for a chainlock held by
a client
There are 3 possible return values:
0: means that it got the lock immediately.
-1: means that it failed to get the lock, and won't retry
-2: means that it failed to get the lock immediately, but will retry
*/
int ctdb_ltdb_lock_requeue(struct ctdb_db_context *ctdb_db,
TDB_DATA key, struct ctdb_req_header *hdr,
void (*recv_pkt)(void *, struct ctdb_req_header *),
void *recv_context, bool ignore_generation)
{
int ret;
struct tdb_context *tdb = ctdb_db->ltdb->tdb;
struct lockwait_handle *h;
struct lock_fetch_state *state;
ret = tdb_chainlock_nonblock(tdb, key);
if (ret != 0 &&
!(errno == EACCES || errno == EAGAIN || errno == EDEADLK)) {
/* a hard failure - don't try again */
return -1;
}
/* when torturing, ensure we test the contended path */
if ((ctdb_db->ctdb->flags & CTDB_FLAG_TORTURE) &&
random() % 5 == 0) {
ret = -1;
tdb_chainunlock(tdb, key);
}
/* first the non-contended path */
if (ret == 0) {
return 0;
}
state = talloc(hdr, struct lock_fetch_state);
state->ctdb = ctdb_db->ctdb;
state->hdr = hdr;
state->recv_pkt = recv_pkt;
state->recv_context = recv_context;
state->generation = ctdb_db->ctdb->vnn_map->generation;
state->ignore_generation = ignore_generation;
/* now the contended path */
h = ctdb_lockwait(ctdb_db, key, lock_fetch_callback, state);
if (h == NULL) {
tdb_chainunlock(tdb, key);
return -1;
}
/* we need to move the packet off the temporary context in ctdb_input_pkt(),
so it won't be freed yet */
talloc_steal(state, hdr);
talloc_steal(state, h);
/* now tell the caller than we will retry asynchronously */
return -2;
}
/*
a varient of ctdb_ltdb_lock_requeue that also fetches the record
*/
int ctdb_ltdb_lock_fetch_requeue(struct ctdb_db_context *ctdb_db,
TDB_DATA key, struct ctdb_ltdb_header *header,
struct ctdb_req_header *hdr, TDB_DATA *data,
void (*recv_pkt)(void *, struct ctdb_req_header *),
void *recv_context, bool ignore_generation)
{
int ret;
ret = ctdb_ltdb_lock_requeue(ctdb_db, key, hdr, recv_pkt,
recv_context, ignore_generation);
if (ret == 0) {
ret = ctdb_ltdb_fetch(ctdb_db, key, header, hdr, data);
if (ret != 0) {
ctdb_ltdb_unlock(ctdb_db, key);
}
}
return ret;
}
/*
paraoid check to see if the db is empty
*/
static void ctdb_check_db_empty(struct ctdb_db_context *ctdb_db)
{
struct tdb_context *tdb = ctdb_db->ltdb->tdb;
int count = tdb_traverse_read(tdb, NULL, NULL);
if (count != 0) {
DEBUG(0,(__location__ " tdb '%s' not empty on attach! aborting\n",
ctdb_db->db_path));
ctdb_fatal(ctdb_db->ctdb, "database not empty on attach");
}
}
/*
a client has asked to attach a new database
*/
int32_t ctdb_control_db_attach(struct ctdb_context *ctdb, TDB_DATA indata,
TDB_DATA *outdata)
{
const char *db_name = (const char *)indata.dptr;
struct ctdb_db_context *ctdb_db, *tmp_db;
int ret;
/* see if we already have this name */
for (tmp_db=ctdb->db_list;tmp_db;tmp_db=tmp_db->next) {
if (strcmp(db_name, tmp_db->db_name) == 0) {
/* this is not an error */
outdata->dptr = (uint8_t *)&tmp_db->db_id;
outdata->dsize = sizeof(tmp_db->db_id);
return 0;
}
}
ctdb_db = talloc_zero(ctdb, struct ctdb_db_context);
CTDB_NO_MEMORY(ctdb, ctdb_db);
ctdb_db->ctdb = ctdb;
ctdb_db->db_name = talloc_strdup(ctdb_db, db_name);
CTDB_NO_MEMORY(ctdb, ctdb_db->db_name);
ctdb_db->db_id = ctdb_hash(&indata);
outdata->dptr = (uint8_t *)&ctdb_db->db_id;
outdata->dsize = sizeof(ctdb_db->db_id);
/* check for hash collisions */
for (tmp_db=ctdb->db_list;tmp_db;tmp_db=tmp_db->next) {
if (tmp_db->db_id == ctdb_db->db_id) {
DEBUG(0,("db_id 0x%x hash collision. name1='%s' name2='%s'\n",
tmp_db->db_id, db_name, tmp_db->db_name));
talloc_free(ctdb_db);
return -1;
}
}
if (ctdb->db_directory == NULL) {
ctdb->db_directory = VARDIR "/ctdb";
}
/* make sure the db directory exists */
if (mkdir(ctdb->db_directory, 0700) == -1 && errno != EEXIST) {
DEBUG(0,(__location__ " Unable to create ctdb directory '%s'\n",
ctdb->db_directory));
talloc_free(ctdb_db);
return -1;
}
/* open the database */
ctdb_db->db_path = talloc_asprintf(ctdb_db, "%s/%s.%u",
ctdb->db_directory,
db_name, ctdb->vnn);
ctdb_db->ltdb = tdb_wrap_open(ctdb, ctdb_db->db_path,
ctdb->tunable.database_hash_size,
TDB_CLEAR_IF_FIRST, O_CREAT|O_RDWR, 0666);
if (ctdb_db->ltdb == NULL) {
DEBUG(0,("Failed to open tdb '%s'\n", ctdb_db->db_path));
talloc_free(ctdb_db);
return -1;
}
ctdb_check_db_empty(ctdb_db);
DLIST_ADD(ctdb->db_list, ctdb_db);
/*
all databases support the "null" function. we need this in
order to do forced migration of records
*/
ret = ctdb_daemon_set_call(ctdb, ctdb_db->db_id, ctdb_null_func, CTDB_NULL_FUNC);
if (ret != 0) {
DEBUG(0,("Failed to setup null function for '%s'\n", ctdb_db->db_name));
talloc_free(ctdb_db);
return -1;
}
/*
all databases support the "fetch" function. we need this
for efficient Samba3 ctdb fetch
*/
ret = ctdb_daemon_set_call(ctdb, ctdb_db->db_id, ctdb_fetch_func, CTDB_FETCH_FUNC);
if (ret != 0) {
DEBUG(0,("Failed to setup fetch function for '%s'\n", ctdb_db->db_name));
talloc_free(ctdb_db);
return -1;
}
/* tell all the other nodes about this database */
ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_ALL, 0,
CTDB_CONTROL_DB_ATTACH, 0, CTDB_CTRL_FLAG_NOREPLY,
indata, NULL, NULL);
DEBUG(1,("Attached to database '%s'\n", ctdb_db->db_path));
/* success */
return 0;
}
/*
called when a broadcast seqnum update comes in
*/
int32_t ctdb_ltdb_update_seqnum(struct ctdb_context *ctdb, uint32_t db_id, uint32_t srcnode)
{
struct ctdb_db_context *ctdb_db;
if (srcnode == ctdb->vnn) {
/* don't update ourselves! */
return 0;
}
ctdb_db = find_ctdb_db(ctdb, db_id);
if (!ctdb_db) {
DEBUG(0,("Unknown db_id 0x%x in ctdb_ltdb_update_seqnum\n", db_id));
return -1;
}
tdb_increment_seqnum_nonblock(ctdb_db->ltdb->tdb);
ctdb_db->seqnum = tdb_get_seqnum(ctdb_db->ltdb->tdb);
return 0;
}
/*
timer to check for seqnum changes in a ltdb and propogate them
*/
static void ctdb_ltdb_seqnum_check(struct event_context *ev, struct timed_event *te,
struct timeval t, void *p)
{
struct ctdb_db_context *ctdb_db = talloc_get_type(p, struct ctdb_db_context);
struct ctdb_context *ctdb = ctdb_db->ctdb;
uint32_t new_seqnum = tdb_get_seqnum(ctdb_db->ltdb->tdb);
if (new_seqnum != ctdb_db->seqnum) {
/* something has changed - propogate it */
TDB_DATA data;
data.dptr = (uint8_t *)&ctdb_db->db_id;
data.dsize = sizeof(uint32_t);
ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_VNNMAP, 0,
CTDB_CONTROL_UPDATE_SEQNUM, 0, CTDB_CTRL_FLAG_NOREPLY,
data, NULL, NULL);
}
ctdb_db->seqnum = new_seqnum;
/* setup a new timer */
ctdb_db->te =
event_add_timed(ctdb->ev, ctdb_db,
timeval_current_ofs(ctdb->tunable.seqnum_frequency, 0),
ctdb_ltdb_seqnum_check, ctdb_db);
}
/*
enable seqnum handling on this db
*/
int32_t ctdb_ltdb_enable_seqnum(struct ctdb_context *ctdb, uint32_t db_id)
{
struct ctdb_db_context *ctdb_db;
ctdb_db = find_ctdb_db(ctdb, db_id);
if (!ctdb_db) {
DEBUG(0,("Unknown db_id 0x%x in ctdb_ltdb_enable_seqnum\n", db_id));
return -1;
}
if (ctdb_db->te == NULL) {
ctdb_db->te =
event_add_timed(ctdb->ev, ctdb_db,
timeval_current_ofs(ctdb->tunable.seqnum_frequency, 0),
ctdb_ltdb_seqnum_check, ctdb_db);
}
tdb_enable_seqnum(ctdb_db->ltdb->tdb);
ctdb_db->seqnum = tdb_get_seqnum(ctdb_db->ltdb->tdb);
return 0;
}

View File

@ -1,227 +0,0 @@
/*
monitoring links to all other nodes to detect dead nodes
Copyright (C) Ronnie Sahlberg 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "lib/events/events.h"
#include "system/filesys.h"
#include "system/wait.h"
#include "../include/ctdb_private.h"
/*
see if any nodes are dead
*/
static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data)
{
struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
int i;
if (ctdb->monitoring_mode == CTDB_MONITORING_DISABLED) {
event_add_timed(ctdb->ev, ctdb->monitor_context,
timeval_current_ofs(ctdb->tunable.keepalive_interval, 0),
ctdb_check_for_dead_nodes, ctdb);
return;
}
/* send a keepalive to all other nodes, unless */
for (i=0;i<ctdb->num_nodes;i++) {
struct ctdb_node *node = ctdb->nodes[i];
if (node->vnn == ctdb->vnn) {
continue;
}
if (node->flags & NODE_FLAGS_DISCONNECTED) {
/* it might have come alive again */
if (node->rx_cnt != 0) {
ctdb_node_connected(node);
}
continue;
}
if (node->rx_cnt == 0) {
node->dead_count++;
} else {
node->dead_count = 0;
}
node->rx_cnt = 0;
if (node->dead_count >= ctdb->tunable.keepalive_limit) {
DEBUG(0,("dead count reached for node %u\n", node->vnn));
ctdb_node_dead(node);
ctdb_send_keepalive(ctdb, node->vnn);
/* maybe tell the transport layer to kill the
sockets as well?
*/
continue;
}
if (node->tx_cnt == 0) {
DEBUG(5,("sending keepalive to %u\n", node->vnn));
ctdb_send_keepalive(ctdb, node->vnn);
}
node->tx_cnt = 0;
}
event_add_timed(ctdb->ev, ctdb->monitor_context,
timeval_current_ofs(ctdb->tunable.keepalive_interval, 0),
ctdb_check_for_dead_nodes, ctdb);
}
static void ctdb_check_health(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data);
/*
called when a health monitoring event script finishes
*/
static void ctdb_health_callback(struct ctdb_context *ctdb, int status, void *p)
{
struct ctdb_node *node = ctdb->nodes[ctdb->vnn];
TDB_DATA data;
struct ctdb_node_flag_change c;
event_add_timed(ctdb->ev, ctdb->monitor_context,
timeval_current_ofs(ctdb->tunable.monitor_interval, 0),
ctdb_check_health, ctdb);
if (status != 0 && !(node->flags & NODE_FLAGS_UNHEALTHY)) {
DEBUG(0,("monitor event failed - disabling node\n"));
node->flags |= NODE_FLAGS_UNHEALTHY;
} else if (status == 0 && (node->flags & NODE_FLAGS_UNHEALTHY)) {
DEBUG(0,("monitor event OK - node re-enabled\n"));
ctdb->nodes[ctdb->vnn]->flags &= ~NODE_FLAGS_UNHEALTHY;
} else {
/* no change */
return;
}
c.vnn = ctdb->vnn;
c.flags = node->flags;
data.dptr = (uint8_t *)&c;
data.dsize = sizeof(c);
/* tell the other nodes that something has changed */
ctdb_daemon_send_message(ctdb, CTDB_BROADCAST_CONNECTED,
CTDB_SRVID_NODE_FLAGS_CHANGED, data);
}
/*
see if the event scripts think we are healthy
*/
static void ctdb_check_health(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data)
{
struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
int ret;
if (ctdb->monitoring_mode == CTDB_MONITORING_DISABLED) {
event_add_timed(ctdb->ev, ctdb->monitor_context,
timeval_current_ofs(ctdb->tunable.monitor_interval, 0),
ctdb_check_health, ctdb);
return;
}
ret = ctdb_event_script_callback(ctdb,
timeval_current_ofs(ctdb->tunable.script_timeout, 0),
ctdb->monitor_context, ctdb_health_callback, ctdb, "monitor");
if (ret != 0) {
DEBUG(0,("Unable to launch monitor event script\n"));
event_add_timed(ctdb->ev, ctdb->monitor_context,
timeval_current_ofs(ctdb->tunable.monitor_interval, 0),
ctdb_check_health, ctdb);
}
}
/* stop any monitoring */
void ctdb_stop_monitoring(struct ctdb_context *ctdb)
{
talloc_free(ctdb->monitor_context);
ctdb->monitor_context = talloc_new(ctdb);
CTDB_NO_MEMORY_FATAL(ctdb, ctdb->monitor_context);
}
/*
start watching for nodes that might be dead
*/
void ctdb_start_monitoring(struct ctdb_context *ctdb)
{
struct timed_event *te;
ctdb_stop_monitoring(ctdb);
te = event_add_timed(ctdb->ev, ctdb->monitor_context,
timeval_current_ofs(ctdb->tunable.keepalive_interval, 0),
ctdb_check_for_dead_nodes, ctdb);
CTDB_NO_MEMORY_FATAL(ctdb, te);
te = event_add_timed(ctdb->ev, ctdb->monitor_context,
timeval_current_ofs(ctdb->tunable.monitor_interval, 0),
ctdb_check_health, ctdb);
CTDB_NO_MEMORY_FATAL(ctdb, te);
}
/*
modify flags on a node
*/
int32_t ctdb_control_modflags(struct ctdb_context *ctdb, TDB_DATA indata)
{
struct ctdb_node_modflags *m = (struct ctdb_node_modflags *)indata.dptr;
TDB_DATA data;
struct ctdb_node_flag_change c;
struct ctdb_node *node = ctdb->nodes[ctdb->vnn];
uint32_t old_flags = node->flags;
node->flags |= m->set;
node->flags &= ~m->clear;
if (node->flags == old_flags) {
/* no change */
return 0;
}
DEBUG(0, ("Control modflags on node %u - flags now 0x%x\n", ctdb->vnn, node->flags));
/* if we have been banned, go into recovery mode */
c.vnn = ctdb->vnn;
c.flags = node->flags;
data.dptr = (uint8_t *)&c;
data.dsize = sizeof(c);
/* tell the other nodes that something has changed */
ctdb_daemon_send_message(ctdb, CTDB_BROADCAST_CONNECTED,
CTDB_SRVID_NODE_FLAGS_CHANGED, data);
if ((node->flags & NODE_FLAGS_BANNED) && !(old_flags & NODE_FLAGS_BANNED)) {
/* make sure we are frozen */
DEBUG(0,("This node has been banned - forcing freeze and recovery\n"));
ctdb_start_freeze(ctdb);
ctdb_release_all_ips(ctdb);
ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
}
return 0;
}

View File

@ -1,679 +0,0 @@
/*
ctdb recovery code
Copyright (C) Andrew Tridgell 2007
Copyright (C) Ronnie Sahlberg 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "lib/events/events.h"
#include "../tdb/include/tdb.h"
#include "system/network.h"
#include "system/filesys.h"
#include "system/wait.h"
#include "../include/ctdb_private.h"
#include "lib/util/dlinklist.h"
/*
lock all databases - mark only
*/
static int ctdb_lock_all_databases_mark(struct ctdb_context *ctdb)
{
struct ctdb_db_context *ctdb_db;
if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
DEBUG(0,("Attempt to mark all databases locked when not frozen\n"));
return -1;
}
for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
if (tdb_lockall_mark(ctdb_db->ltdb->tdb) != 0) {
return -1;
}
}
return 0;
}
/*
lock all databases - unmark only
*/
static int ctdb_lock_all_databases_unmark(struct ctdb_context *ctdb)
{
struct ctdb_db_context *ctdb_db;
if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
DEBUG(0,("Attempt to unmark all databases locked when not frozen\n"));
return -1;
}
for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
if (tdb_lockall_unmark(ctdb_db->ltdb->tdb) != 0) {
return -1;
}
}
return 0;
}
int
ctdb_control_getvnnmap(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA indata, TDB_DATA *outdata)
{
CHECK_CONTROL_DATA_SIZE(0);
struct ctdb_vnn_map_wire *map;
size_t len;
len = offsetof(struct ctdb_vnn_map_wire, map) + sizeof(uint32_t)*ctdb->vnn_map->size;
map = talloc_size(outdata, len);
CTDB_NO_MEMORY_VOID(ctdb, map);
map->generation = ctdb->vnn_map->generation;
map->size = ctdb->vnn_map->size;
memcpy(map->map, ctdb->vnn_map->map, sizeof(uint32_t)*map->size);
outdata->dsize = len;
outdata->dptr = (uint8_t *)map;
return 0;
}
int
ctdb_control_setvnnmap(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA indata, TDB_DATA *outdata)
{
struct ctdb_vnn_map_wire *map = (struct ctdb_vnn_map_wire *)indata.dptr;
if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
DEBUG(0,("Attempt to set vnnmap when not frozen\n"));
return -1;
}
talloc_free(ctdb->vnn_map);
ctdb->vnn_map = talloc(ctdb, struct ctdb_vnn_map);
CTDB_NO_MEMORY(ctdb, ctdb->vnn_map);
ctdb->vnn_map->generation = map->generation;
ctdb->vnn_map->size = map->size;
ctdb->vnn_map->map = talloc_array(ctdb->vnn_map, uint32_t, map->size);
CTDB_NO_MEMORY(ctdb, ctdb->vnn_map->map);
memcpy(ctdb->vnn_map->map, map->map, sizeof(uint32_t)*map->size);
return 0;
}
int
ctdb_control_getdbmap(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA indata, TDB_DATA *outdata)
{
uint32_t i, len;
struct ctdb_db_context *ctdb_db;
struct ctdb_dbid_map *dbid_map;
CHECK_CONTROL_DATA_SIZE(0);
len = 0;
for(ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next){
len++;
}
outdata->dsize = offsetof(struct ctdb_dbid_map, dbids) + 4*len;
outdata->dptr = (unsigned char *)talloc_zero_size(outdata, outdata->dsize);
if (!outdata->dptr) {
DEBUG(0, (__location__ " Failed to allocate dbmap array\n"));
exit(1);
}
dbid_map = (struct ctdb_dbid_map *)outdata->dptr;
dbid_map->num = len;
for(i=0,ctdb_db=ctdb->db_list;ctdb_db;i++,ctdb_db=ctdb_db->next){
dbid_map->dbids[i] = ctdb_db->db_id;
}
return 0;
}
int
ctdb_control_getnodemap(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA indata, TDB_DATA *outdata)
{
uint32_t i, num_nodes;
struct ctdb_node_map *node_map;
CHECK_CONTROL_DATA_SIZE(0);
num_nodes = ctdb->num_nodes;
outdata->dsize = offsetof(struct ctdb_node_map, nodes) + num_nodes*sizeof(struct ctdb_node_and_flags);
outdata->dptr = (unsigned char *)talloc_zero_size(outdata, outdata->dsize);
if (!outdata->dptr) {
DEBUG(0, (__location__ " Failed to allocate nodemap array\n"));
exit(1);
}
node_map = (struct ctdb_node_map *)outdata->dptr;
node_map->num = num_nodes;
for (i=0; i<num_nodes; i++) {
inet_aton(ctdb->nodes[i]->address.address, &node_map->nodes[i].sin.sin_addr);
node_map->nodes[i].vnn = ctdb->nodes[i]->vnn;
node_map->nodes[i].flags = ctdb->nodes[i]->flags;
}
return 0;
}
struct getkeys_params {
struct ctdb_context *ctdb;
uint32_t lmaster;
uint32_t rec_count;
struct getkeys_rec {
TDB_DATA key;
TDB_DATA data;
} *recs;
};
static int traverse_getkeys(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *p)
{
struct getkeys_params *params = (struct getkeys_params *)p;
uint32_t lmaster;
lmaster = ctdb_lmaster(params->ctdb, &key);
/* only include this record if the lmaster matches or if
the wildcard lmaster (-1) was specified.
*/
if ((params->lmaster != CTDB_LMASTER_ANY) && (params->lmaster != lmaster)) {
return 0;
}
params->recs = talloc_realloc(NULL, params->recs, struct getkeys_rec, params->rec_count+1);
key.dptr = talloc_memdup(params->recs, key.dptr, key.dsize);
data.dptr = talloc_memdup(params->recs, data.dptr, data.dsize);
params->recs[params->rec_count].key = key;
params->recs[params->rec_count].data = data;
params->rec_count++;
return 0;
}
/*
pul a bunch of records from a ltdb, filtering by lmaster
*/
int32_t ctdb_control_pull_db(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
{
struct ctdb_control_pulldb *pull;
struct ctdb_db_context *ctdb_db;
struct getkeys_params params;
struct ctdb_control_pulldb_reply *reply;
int i;
size_t len = 0;
if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
DEBUG(0,("rejecting ctdb_control_pull_db when not frozen\n"));
return -1;
}
pull = (struct ctdb_control_pulldb *)indata.dptr;
ctdb_db = find_ctdb_db(ctdb, pull->db_id);
if (!ctdb_db) {
DEBUG(0,(__location__ " Unknown db\n"));
return -1;
}
params.ctdb = ctdb;
params.lmaster = pull->lmaster;
params.rec_count = 0;
params.recs = talloc_array(outdata, struct getkeys_rec, 0);
CTDB_NO_MEMORY(ctdb, params.recs);
if (ctdb_lock_all_databases_mark(ctdb) != 0) {
DEBUG(0,(__location__ " Failed to get lock on entired db - failing\n"));
return -1;
}
tdb_traverse_read(ctdb_db->ltdb->tdb, traverse_getkeys, &params);
ctdb_lock_all_databases_unmark(ctdb);
reply = talloc(outdata, struct ctdb_control_pulldb_reply);
CTDB_NO_MEMORY(ctdb, reply);
reply->db_id = pull->db_id;
reply->count = params.rec_count;
len = offsetof(struct ctdb_control_pulldb_reply, data);
for (i=0;i<reply->count;i++) {
struct ctdb_rec_data *rec;
rec = ctdb_marshall_record(outdata, 0, params.recs[i].key, params.recs[i].data);
reply = talloc_realloc_size(outdata, reply, rec->length + len);
memcpy(len+(uint8_t *)reply, rec, rec->length);
len += rec->length;
talloc_free(rec);
}
talloc_free(params.recs);
outdata->dptr = (uint8_t *)reply;
outdata->dsize = len;
return 0;
}
/*
push a bunch of records into a ltdb, filtering by rsn
*/
int32_t ctdb_control_push_db(struct ctdb_context *ctdb, TDB_DATA indata)
{
struct ctdb_control_pulldb_reply *reply = (struct ctdb_control_pulldb_reply *)indata.dptr;
struct ctdb_db_context *ctdb_db;
int i, ret;
struct ctdb_rec_data *rec;
if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
DEBUG(0,("rejecting ctdb_control_push_db when not frozen\n"));
return -1;
}
if (indata.dsize < offsetof(struct ctdb_control_pulldb_reply, data)) {
DEBUG(0,(__location__ " invalid data in pulldb reply\n"));
return -1;
}
ctdb_db = find_ctdb_db(ctdb, reply->db_id);
if (!ctdb_db) {
DEBUG(0,(__location__ " Unknown db 0x%08x\n", reply->db_id));
return -1;
}
if (ctdb_lock_all_databases_mark(ctdb) != 0) {
DEBUG(0,(__location__ " Failed to get lock on entired db - failing\n"));
return -1;
}
rec = (struct ctdb_rec_data *)&reply->data[0];
DEBUG(3,("starting push of %u records for dbid 0x%x\n",
reply->count, reply->db_id));
for (i=0;i<reply->count;i++) {
TDB_DATA key, data;
struct ctdb_ltdb_header *hdr, header;
key.dptr = &rec->data[0];
key.dsize = rec->keylen;
data.dptr = &rec->data[key.dsize];
data.dsize = rec->datalen;
if (data.dsize < sizeof(struct ctdb_ltdb_header)) {
DEBUG(0,(__location__ " bad ltdb record\n"));
goto failed;
}
hdr = (struct ctdb_ltdb_header *)data.dptr;
data.dptr += sizeof(*hdr);
data.dsize -= sizeof(*hdr);
ret = ctdb_ltdb_fetch(ctdb_db, key, &header, NULL, NULL);
if (ret != 0) {
DEBUG(0, (__location__ " Unable to fetch record\n"));
goto failed;
}
/* The check for dmaster gives priority to the dmaster
if the rsn values are equal */
if (header.rsn < hdr->rsn ||
(header.dmaster != ctdb->vnn && header.rsn == hdr->rsn)) {
ret = ctdb_ltdb_store(ctdb_db, key, hdr, data);
if (ret != 0) {
DEBUG(0, (__location__ " Unable to store record\n"));
goto failed;
}
}
rec = (struct ctdb_rec_data *)(rec->length + (uint8_t *)rec);
}
DEBUG(3,("finished push of %u records for dbid 0x%x\n",
reply->count, reply->db_id));
ctdb_lock_all_databases_unmark(ctdb);
return 0;
failed:
ctdb_lock_all_databases_unmark(ctdb);
return -1;
}
static int traverse_setdmaster(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *p)
{
uint32_t *dmaster = (uint32_t *)p;
struct ctdb_ltdb_header *header = (struct ctdb_ltdb_header *)data.dptr;
int ret;
header->dmaster = *dmaster;
ret = tdb_store(tdb, key, data, TDB_REPLACE);
if (ret) {
DEBUG(0,(__location__ " failed to write tdb data back ret:%d\n",ret));
return ret;
}
return 0;
}
int32_t ctdb_control_set_dmaster(struct ctdb_context *ctdb, TDB_DATA indata)
{
struct ctdb_control_set_dmaster *p = (struct ctdb_control_set_dmaster *)indata.dptr;
struct ctdb_db_context *ctdb_db;
if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
DEBUG(0,("rejecting ctdb_control_set_dmaster when not frozen\n"));
return -1;
}
ctdb_db = find_ctdb_db(ctdb, p->db_id);
if (!ctdb_db) {
DEBUG(0,(__location__ " Unknown db 0x%08x\n", p->db_id));
return -1;
}
if (ctdb_lock_all_databases_mark(ctdb) != 0) {
DEBUG(0,(__location__ " Failed to get lock on entired db - failing\n"));
return -1;
}
tdb_traverse(ctdb_db->ltdb->tdb, traverse_setdmaster, &p->dmaster);
ctdb_lock_all_databases_unmark(ctdb);
return 0;
}
struct ctdb_set_recmode_state {
struct ctdb_req_control *c;
uint32_t recmode;
};
/*
called when the 'recovered' event script has finished
*/
static void ctdb_recovered_callback(struct ctdb_context *ctdb, int status, void *p)
{
struct ctdb_set_recmode_state *state = talloc_get_type(p, struct ctdb_set_recmode_state);
ctdb_start_monitoring(ctdb);
if (status == 0) {
ctdb->recovery_mode = state->recmode;
} else {
DEBUG(0,(__location__ " recovered event script failed (status %d)\n", status));
}
ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
talloc_free(state);
}
/*
set the recovery mode
*/
int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
struct ctdb_req_control *c,
TDB_DATA indata, bool *async_reply,
const char **errormsg)
{
uint32_t recmode = *(uint32_t *)indata.dptr;
int ret;
struct ctdb_set_recmode_state *state;
if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
DEBUG(0,("Attempt to change recovery mode to %u when not frozen\n",
recmode));
(*errormsg) = "Cannot change recovery mode while not frozen";
return -1;
}
if (recmode != CTDB_RECOVERY_NORMAL ||
ctdb->recovery_mode != CTDB_RECOVERY_ACTIVE) {
ctdb->recovery_mode = recmode;
return 0;
}
/* some special handling when ending recovery mode */
state = talloc(ctdb, struct ctdb_set_recmode_state);
CTDB_NO_MEMORY(ctdb, state);
/* we should not be able to get the lock on the nodes list, as it should be
held by the recovery master */
if (ctdb_recovery_lock(ctdb, false)) {
DEBUG(0,("ERROR: recovery lock file %s not locked when recovering!\n",
ctdb->recovery_lock_file));
return -1;
}
state->c = talloc_steal(state, c);
state->recmode = recmode;
ctdb_stop_monitoring(ctdb);
/* call the events script to tell all subsystems that we have recovered */
ret = ctdb_event_script_callback(ctdb,
timeval_current_ofs(ctdb->tunable.script_timeout, 0),
state,
ctdb_recovered_callback,
state, "recovered");
if (ret != 0) {
return ret;
}
*async_reply = true;
return 0;
}
/*
callback for ctdb_control_max_rsn
*/
static int traverse_max_rsn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *p)
{
struct ctdb_ltdb_header *h = (struct ctdb_ltdb_header *)data.dptr;
uint64_t *max_rsn = (uint64_t *)p;
if (data.dsize >= sizeof(*h)) {
(*max_rsn) = MAX(*max_rsn, h->rsn);
}
return 0;
}
/*
get max rsn across an entire db
*/
int32_t ctdb_control_max_rsn(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
{
struct ctdb_db_context *ctdb_db;
uint32_t db_id = *(uint32_t *)indata.dptr;
uint64_t max_rsn = 0;
int ret;
if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
DEBUG(0,("rejecting ctdb_control_max_rsn when not frozen\n"));
return -1;
}
ctdb_db = find_ctdb_db(ctdb, db_id);
if (!ctdb_db) {
DEBUG(0,(__location__ " Unknown db\n"));
return -1;
}
if (ctdb_lock_all_databases_mark(ctdb) != 0) {
DEBUG(0,(__location__ " Failed to get lock on entired db - failing\n"));
return -1;
}
ret = tdb_traverse_read(ctdb_db->ltdb->tdb, traverse_max_rsn, &max_rsn);
if (ret < 0) {
DEBUG(0,(__location__ " traverse failed in ctdb_control_max_rsn\n"));
return -1;
}
ctdb_lock_all_databases_unmark(ctdb);
outdata->dptr = (uint8_t *)talloc(outdata, uint64_t);
if (!outdata->dptr) {
return -1;
}
(*(uint64_t *)outdata->dptr) = max_rsn;
outdata->dsize = sizeof(uint64_t);
return 0;
}
/*
callback for ctdb_control_set_rsn_nonempty
*/
static int traverse_set_rsn_nonempty(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *p)
{
struct ctdb_ltdb_header *h = (struct ctdb_ltdb_header *)data.dptr;
uint64_t *rsn = (uint64_t *)p;
if (data.dsize > sizeof(*h)) {
h->rsn = *rsn;
if (tdb_store(tdb, key, data, TDB_REPLACE) != 0) {
return -1;
}
}
return 0;
}
/*
set rsn for all non-empty records in a database to a given rsn
*/
int32_t ctdb_control_set_rsn_nonempty(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
{
struct ctdb_control_set_rsn_nonempty *p = (struct ctdb_control_set_rsn_nonempty *)indata.dptr;
struct ctdb_db_context *ctdb_db;
int ret;
if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
DEBUG(0,("rejecting ctdb_control_set_rsn_nonempty when not frozen\n"));
return -1;
}
ctdb_db = find_ctdb_db(ctdb, p->db_id);
if (!ctdb_db) {
DEBUG(0,(__location__ " Unknown db\n"));
return -1;
}
if (ctdb_lock_all_databases_mark(ctdb) != 0) {
DEBUG(0,(__location__ " Failed to get lock on entired db - failing\n"));
return -1;
}
ret = tdb_traverse(ctdb_db->ltdb->tdb, traverse_set_rsn_nonempty, &p->rsn);
if (ret < 0) {
DEBUG(0,(__location__ " traverse failed in ctdb_control_set_rsn_nonempty\n"));
return -1;
}
ctdb_lock_all_databases_unmark(ctdb);
return 0;
}
/*
callback for ctdb_control_delete_low_rsn
*/
static int traverse_delete_low_rsn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *p)
{
struct ctdb_ltdb_header *h = (struct ctdb_ltdb_header *)data.dptr;
uint64_t *rsn = (uint64_t *)p;
if (data.dsize < sizeof(*h) || h->rsn < *rsn) {
if (tdb_delete(tdb, key) != 0) {
return -1;
}
}
return 0;
}
/*
delete any records with a rsn < the given rsn
*/
int32_t ctdb_control_delete_low_rsn(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
{
struct ctdb_control_delete_low_rsn *p = (struct ctdb_control_delete_low_rsn *)indata.dptr;
struct ctdb_db_context *ctdb_db;
int ret;
if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
DEBUG(0,("rejecting ctdb_control_delete_low_rsn when not frozen\n"));
return -1;
}
ctdb_db = find_ctdb_db(ctdb, p->db_id);
if (!ctdb_db) {
DEBUG(0,(__location__ " Unknown db\n"));
return -1;
}
if (ctdb_lock_all_databases_mark(ctdb) != 0) {
DEBUG(0,(__location__ " Failed to get lock on entired db - failing\n"));
return -1;
}
ret = tdb_traverse(ctdb_db->ltdb->tdb, traverse_delete_low_rsn, &p->rsn);
if (ret < 0) {
DEBUG(0,(__location__ " traverse failed in ctdb_control_delete_low_rsn\n"));
return -1;
}
ctdb_lock_all_databases_unmark(ctdb);
return 0;
}
/*
try and get the recovery lock in shared storage - should only work
on the recovery master recovery daemon. Anywhere else is a bug
*/
bool ctdb_recovery_lock(struct ctdb_context *ctdb, bool keep)
{
struct flock lock;
if (ctdb->recovery_lock_fd != -1) {
close(ctdb->recovery_lock_fd);
}
ctdb->recovery_lock_fd = open(ctdb->recovery_lock_file, O_RDWR|O_CREAT, 0600);
if (ctdb->recovery_lock_fd == -1) {
DEBUG(0,("Unable to open %s - (%s)\n",
ctdb->recovery_lock_file, strerror(errno)));
return false;
}
lock.l_type = F_WRLCK;
lock.l_whence = SEEK_SET;
lock.l_start = 0;
lock.l_len = 1;
lock.l_pid = 0;
if (fcntl(ctdb->recovery_lock_fd, F_SETLK, &lock) != 0) {
return false;
}
if (!keep) {
close(ctdb->recovery_lock_fd);
ctdb->recovery_lock_fd = -1;
}
return true;
}

File diff suppressed because it is too large Load Diff

View File

@ -1,469 +0,0 @@
/*
ctdb main protocol code
Copyright (C) Andrew Tridgell 2006
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "../tdb/include/tdb.h"
#include "lib/events/events.h"
#include "lib/util/dlinklist.h"
#include "system/network.h"
#include "system/filesys.h"
#include "../include/ctdb_private.h"
/*
choose the transport we will use
*/
int ctdb_set_transport(struct ctdb_context *ctdb, const char *transport)
{
ctdb->transport = talloc_strdup(ctdb, transport);
return 0;
}
/*
choose the recovery lock file
*/
int ctdb_set_recovery_lock_file(struct ctdb_context *ctdb, const char *file)
{
ctdb->recovery_lock_file = talloc_strdup(ctdb, file);
return 0;
}
/*
choose the logfile location
*/
int ctdb_set_logfile(struct ctdb_context *ctdb, const char *logfile)
{
ctdb->logfile = talloc_strdup(ctdb, logfile);
if (ctdb->logfile != NULL && strcmp(logfile, "-") != 0) {
int fd;
fd = open(ctdb->logfile, O_WRONLY|O_APPEND|O_CREAT, 0666);
if (fd == -1) {
printf("Failed to open logfile %s\n", ctdb->logfile);
abort();
}
close(1);
close(2);
if (fd != 1) {
dup2(fd, 1);
close(fd);
}
/* also catch stderr of subcommands to the log file */
dup2(1, 2);
}
return 0;
}
/*
set the directory for the local databases
*/
int ctdb_set_tdb_dir(struct ctdb_context *ctdb, const char *dir)
{
ctdb->db_directory = talloc_strdup(ctdb, dir);
if (ctdb->db_directory == NULL) {
return -1;
}
return 0;
}
/*
add a node to the list of active nodes
*/
static int ctdb_add_node(struct ctdb_context *ctdb, char *nstr)
{
struct ctdb_node *node, **nodep;
nodep = talloc_realloc(ctdb, ctdb->nodes, struct ctdb_node *, ctdb->num_nodes+1);
CTDB_NO_MEMORY(ctdb, nodep);
ctdb->nodes = nodep;
nodep = &ctdb->nodes[ctdb->num_nodes];
(*nodep) = talloc_zero(ctdb->nodes, struct ctdb_node);
CTDB_NO_MEMORY(ctdb, *nodep);
node = *nodep;
if (ctdb_parse_address(ctdb, node, nstr, &node->address) != 0) {
return -1;
}
node->ctdb = ctdb;
node->name = talloc_asprintf(node, "%s:%u",
node->address.address,
node->address.port);
/* this assumes that the nodes are kept in sorted order, and no gaps */
node->vnn = ctdb->num_nodes;
/* nodes start out disconnected */
node->flags |= NODE_FLAGS_DISCONNECTED;
if (ctdb->address.address &&
ctdb_same_address(&ctdb->address, &node->address)) {
ctdb->vnn = node->vnn;
node->flags &= ~NODE_FLAGS_DISCONNECTED;
}
ctdb->num_nodes++;
node->dead_count = 0;
return 0;
}
/*
setup the node list from a file
*/
int ctdb_set_nlist(struct ctdb_context *ctdb, const char *nlist)
{
char **lines;
int nlines;
int i;
talloc_free(ctdb->node_list_file);
ctdb->node_list_file = talloc_strdup(ctdb, nlist);
lines = file_lines_load(nlist, &nlines, ctdb);
if (lines == NULL) {
ctdb_set_error(ctdb, "Failed to load nlist '%s'\n", nlist);
return -1;
}
while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
nlines--;
}
for (i=0;i<nlines;i++) {
if (ctdb_add_node(ctdb, lines[i]) != 0) {
talloc_free(lines);
return -1;
}
}
/* initialize the vnn mapping table now that we have num_nodes setup */
/*
XXX we currently initialize it to the maximum number of nodes to
XXX make it behave the same way as previously.
XXX Once we have recovery working we should initialize this always to
XXX generation==0 (==invalid) and let the recovery tool populate this
XXX table for the daemons.
*/
ctdb->vnn_map = talloc(ctdb, struct ctdb_vnn_map);
CTDB_NO_MEMORY(ctdb, ctdb->vnn_map);
ctdb->vnn_map->generation = 1;
ctdb->vnn_map->size = ctdb->num_nodes;
ctdb->vnn_map->map = talloc_array(ctdb->vnn_map, uint32_t, ctdb->vnn_map->size);
CTDB_NO_MEMORY(ctdb, ctdb->vnn_map->map);
for(i=0;i<ctdb->vnn_map->size;i++) {
ctdb->vnn_map->map[i] = i;
}
talloc_free(lines);
return 0;
}
/*
setup the local node address
*/
int ctdb_set_address(struct ctdb_context *ctdb, const char *address)
{
if (ctdb_parse_address(ctdb, ctdb, address, &ctdb->address) != 0) {
return -1;
}
ctdb->name = talloc_asprintf(ctdb, "%s:%u",
ctdb->address.address,
ctdb->address.port);
return 0;
}
/*
return the number of active nodes
*/
uint32_t ctdb_get_num_active_nodes(struct ctdb_context *ctdb)
{
int i;
uint32_t count=0;
for (i=0;i<ctdb->vnn_map->size;i++) {
struct ctdb_node *node = ctdb->nodes[ctdb->vnn_map->map[i]];
if (!(node->flags & NODE_FLAGS_INACTIVE)) {
count++;
}
}
return count;
}
/*
called when we need to process a packet. This can be a requeued packet
after a lockwait, or a real packet from another node
*/
void ctdb_input_pkt(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
{
TALLOC_CTX *tmp_ctx;
/* place the packet as a child of the tmp_ctx. We then use
talloc_free() below to free it. If any of the calls want
to keep it, then they will steal it somewhere else, and the
talloc_free() will only free the tmp_ctx */
tmp_ctx = talloc_new(ctdb);
talloc_steal(tmp_ctx, hdr);
DEBUG(3,(__location__ " ctdb request %u of type %u length %u from "
"node %u to %u\n", hdr->reqid, hdr->operation, hdr->length,
hdr->srcnode, hdr->destnode));
switch (hdr->operation) {
case CTDB_REQ_CALL:
case CTDB_REPLY_CALL:
case CTDB_REQ_DMASTER:
case CTDB_REPLY_DMASTER:
/* for ctdb_call inter-node operations verify that the
remote node that sent us the call is running in the
same generation instance as this node
*/
if (ctdb->vnn_map->generation != hdr->generation) {
DEBUG(0,(__location__ " ctdb request %u"
" length %u from node %u to %u had an"
" invalid generation id:%u while our"
" generation id is:%u\n",
hdr->reqid, hdr->length,
hdr->srcnode, hdr->destnode,
hdr->generation, ctdb->vnn_map->generation));
goto done;
}
}
switch (hdr->operation) {
case CTDB_REQ_CALL:
ctdb->statistics.node.req_call++;
ctdb_request_call(ctdb, hdr);
break;
case CTDB_REPLY_CALL:
ctdb->statistics.node.reply_call++;
ctdb_reply_call(ctdb, hdr);
break;
case CTDB_REPLY_ERROR:
ctdb->statistics.node.reply_error++;
ctdb_reply_error(ctdb, hdr);
break;
case CTDB_REQ_DMASTER:
ctdb->statistics.node.req_dmaster++;
ctdb_request_dmaster(ctdb, hdr);
break;
case CTDB_REPLY_DMASTER:
ctdb->statistics.node.reply_dmaster++;
ctdb_reply_dmaster(ctdb, hdr);
break;
case CTDB_REQ_MESSAGE:
ctdb->statistics.node.req_message++;
ctdb_request_message(ctdb, hdr);
break;
case CTDB_REQ_CONTROL:
ctdb->statistics.node.req_control++;
ctdb_request_control(ctdb, hdr);
break;
case CTDB_REPLY_CONTROL:
ctdb->statistics.node.reply_control++;
ctdb_reply_control(ctdb, hdr);
break;
case CTDB_REQ_KEEPALIVE:
ctdb->statistics.keepalive_packets_recv++;
break;
default:
DEBUG(0,("%s: Packet with unknown operation %u\n",
__location__, hdr->operation));
break;
}
done:
talloc_free(tmp_ctx);
}
/*
called by the transport layer when a node is dead
*/
void ctdb_node_dead(struct ctdb_node *node)
{
if (node->flags & NODE_FLAGS_DISCONNECTED) {
DEBUG(1,("%s: node %s is already marked disconnected: %u connected\n",
node->ctdb->name, node->name,
node->ctdb->num_connected));
return;
}
node->ctdb->num_connected--;
node->flags |= NODE_FLAGS_DISCONNECTED;
node->rx_cnt = 0;
node->dead_count = 0;
DEBUG(1,("%s: node %s is dead: %u connected\n",
node->ctdb->name, node->name, node->ctdb->num_connected));
ctdb_daemon_cancel_controls(node->ctdb, node);
}
/*
called by the transport layer when a node is connected
*/
void ctdb_node_connected(struct ctdb_node *node)
{
if (!(node->flags & NODE_FLAGS_DISCONNECTED)) {
DEBUG(1,("%s: node %s is already marked connected: %u connected\n",
node->ctdb->name, node->name,
node->ctdb->num_connected));
return;
}
node->ctdb->num_connected++;
node->dead_count = 0;
node->flags &= ~NODE_FLAGS_DISCONNECTED;
DEBUG(1,("%s: connected to %s - %u connected\n",
node->ctdb->name, node->name, node->ctdb->num_connected));
}
struct queue_next {
struct ctdb_context *ctdb;
struct ctdb_req_header *hdr;
};
/*
trigered when a deferred packet is due
*/
static void queue_next_trigger(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data)
{
struct queue_next *q = talloc_get_type(private_data, struct queue_next);
ctdb_input_pkt(q->ctdb, q->hdr);
talloc_free(q);
}
/*
defer a packet, so it is processed on the next event loop
this is used for sending packets to ourselves
*/
static void ctdb_defer_packet(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
{
struct queue_next *q;
q = talloc(ctdb, struct queue_next);
if (q == NULL) {
DEBUG(0,(__location__ " Failed to allocate deferred packet\n"));
return;
}
q->ctdb = ctdb;
q->hdr = talloc_memdup(ctdb, hdr, hdr->length);
if (q->hdr == NULL) {
DEBUG(0,("Error copying deferred packet to self\n"));
return;
}
#if 0
/* use this to put packets directly into our recv function */
ctdb_input_pkt(q->ctdb, q->hdr);
#else
event_add_timed(ctdb->ev, q, timeval_zero(), queue_next_trigger, q);
#endif
}
/*
broadcast a packet to all nodes
*/
static void ctdb_broadcast_packet_all(struct ctdb_context *ctdb,
struct ctdb_req_header *hdr)
{
int i;
for (i=0;i<ctdb->num_nodes;i++) {
hdr->destnode = ctdb->nodes[i]->vnn;
ctdb_queue_packet(ctdb, hdr);
}
}
/*
broadcast a packet to all nodes in the current vnnmap
*/
static void ctdb_broadcast_packet_vnnmap(struct ctdb_context *ctdb,
struct ctdb_req_header *hdr)
{
int i;
for (i=0;i<ctdb->vnn_map->size;i++) {
hdr->destnode = ctdb->vnn_map->map[i];
ctdb_queue_packet(ctdb, hdr);
}
}
/*
broadcast a packet to all connected nodes
*/
static void ctdb_broadcast_packet_connected(struct ctdb_context *ctdb,
struct ctdb_req_header *hdr)
{
int i;
for (i=0;i<ctdb->num_nodes;i++) {
if (!(ctdb->nodes[i]->flags & NODE_FLAGS_DISCONNECTED)) {
hdr->destnode = ctdb->nodes[i]->vnn;
ctdb_queue_packet(ctdb, hdr);
}
}
}
/*
queue a packet or die
*/
void ctdb_queue_packet(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
{
struct ctdb_node *node;
switch (hdr->destnode) {
case CTDB_BROADCAST_ALL:
ctdb_broadcast_packet_all(ctdb, hdr);
return;
case CTDB_BROADCAST_VNNMAP:
ctdb_broadcast_packet_vnnmap(ctdb, hdr);
return;
case CTDB_BROADCAST_CONNECTED:
ctdb_broadcast_packet_connected(ctdb, hdr);
return;
}
ctdb->statistics.node_packets_sent++;
if (!ctdb_validate_vnn(ctdb, hdr->destnode)) {
DEBUG(0,(__location__ " cant send to node %u that does not exist\n",
hdr->destnode));
return;
}
node = ctdb->nodes[hdr->destnode];
if (hdr->destnode == ctdb->vnn) {
ctdb_defer_packet(ctdb, hdr);
} else {
node->tx_cnt++;
if (ctdb->methods->queue_pkt(node, (uint8_t *)hdr, hdr->length) != 0) {
ctdb_fatal(ctdb, "Unable to queue packet\n");
}
}
}

View File

@ -1,822 +0,0 @@
/*
ctdb recovery code
Copyright (C) Ronnie Sahlberg 2007
Copyright (C) Andrew Tridgell 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "lib/events/events.h"
#include "../tdb/include/tdb.h"
#include "lib/util/dlinklist.h"
#include "system/network.h"
#include "system/filesys.h"
#include "system/wait.h"
#include "../include/ctdb_private.h"
#define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
#define CTDB_ARP_INTERVAL 1
#define CTDB_ARP_REPEAT 3
struct ctdb_takeover_arp {
struct ctdb_context *ctdb;
uint32_t count;
struct sockaddr_in sin;
struct ctdb_tcp_list *tcp_list;
};
/*
lists of tcp endpoints
*/
struct ctdb_tcp_list {
struct ctdb_tcp_list *prev, *next;
uint32_t vnn;
struct sockaddr_in saddr;
struct sockaddr_in daddr;
};
/*
list of clients to kill on IP release
*/
struct ctdb_client_ip {
struct ctdb_client_ip *prev, *next;
struct ctdb_context *ctdb;
struct sockaddr_in ip;
uint32_t client_id;
};
/*
send a gratuitous arp
*/
static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data)
{
struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
struct ctdb_takeover_arp);
int ret;
struct ctdb_tcp_list *tcp;
ret = ctdb_sys_send_arp(&arp->sin, arp->ctdb->takeover.interface);
if (ret != 0) {
DEBUG(0,(__location__ " sending of arp failed (%s)\n", strerror(errno)));
}
for (tcp=arp->tcp_list;tcp;tcp=tcp->next) {
DEBUG(2,("sending tcp tickle ack for %u->%s:%u\n",
(unsigned)ntohs(tcp->daddr.sin_port),
inet_ntoa(tcp->saddr.sin_addr),
(unsigned)ntohs(tcp->saddr.sin_port)));
ret = ctdb_sys_send_tcp(&tcp->saddr, &tcp->daddr, 0, 0, 0);
if (ret != 0) {
DEBUG(0,(__location__ " Failed to send tcp tickle ack for %s\n",
inet_ntoa(tcp->saddr.sin_addr)));
}
}
arp->count++;
if (arp->count == CTDB_ARP_REPEAT) {
talloc_free(arp);
return;
}
event_add_timed(arp->ctdb->ev, arp->ctdb->takeover.last_ctx,
timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
ctdb_control_send_arp, arp);
}
struct takeover_callback_state {
struct ctdb_req_control *c;
struct sockaddr_in *sin;
};
/*
called when takeip event finishes
*/
static void takeover_ip_callback(struct ctdb_context *ctdb, int status,
void *private_data)
{
struct takeover_callback_state *state =
talloc_get_type(private_data, struct takeover_callback_state);
struct ctdb_takeover_arp *arp;
char *ip = inet_ntoa(state->sin->sin_addr);
struct ctdb_tcp_list *tcp;
ctdb_start_monitoring(ctdb);
if (status != 0) {
DEBUG(0,(__location__ " Failed to takeover IP %s on interface %s\n",
ip, ctdb->takeover.interface));
ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
talloc_free(state);
return;
}
if (!ctdb->takeover.last_ctx) {
ctdb->takeover.last_ctx = talloc_new(ctdb);
if (!ctdb->takeover.last_ctx) goto failed;
}
arp = talloc_zero(ctdb->takeover.last_ctx, struct ctdb_takeover_arp);
if (!arp) goto failed;
arp->ctdb = ctdb;
arp->sin = *state->sin;
/* add all of the known tcp connections for this IP to the
list of tcp connections to send tickle acks for */
for (tcp=ctdb->tcp_list;tcp;tcp=tcp->next) {
if (state->sin->sin_addr.s_addr == tcp->daddr.sin_addr.s_addr) {
struct ctdb_tcp_list *t2 = talloc(arp, struct ctdb_tcp_list);
if (t2 == NULL) goto failed;
*t2 = *tcp;
DLIST_ADD(arp->tcp_list, t2);
}
}
event_add_timed(arp->ctdb->ev, arp->ctdb->takeover.last_ctx,
timeval_zero(), ctdb_control_send_arp, arp);
/* the control succeeded */
ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
talloc_free(state);
return;
failed:
ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
talloc_free(state);
return;
}
/*
take over an ip address
*/
int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
struct ctdb_req_control *c,
TDB_DATA indata,
bool *async_reply)
{
int ret;
struct takeover_callback_state *state;
struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
char *ip = inet_ntoa(pip->sin.sin_addr);
/* update out node table */
ctdb->nodes[pip->vnn]->takeover_vnn = pip->takeover_vnn;
/* if our kernel already has this IP, do nothing */
if (ctdb_sys_have_ip(ip)) {
return 0;
}
state = talloc(ctdb, struct takeover_callback_state);
CTDB_NO_MEMORY(ctdb, state);
state->c = talloc_steal(ctdb, c);
state->sin = talloc(ctdb, struct sockaddr_in);
CTDB_NO_MEMORY(ctdb, state->sin);
*state->sin = pip->sin;
DEBUG(0,("Takover of IP %s/%u on interface %s\n",
ip, ctdb->nodes[ctdb->vnn]->public_netmask_bits,
ctdb->takeover.interface));
ctdb_stop_monitoring(ctdb);
ret = ctdb_event_script_callback(ctdb,
timeval_current_ofs(ctdb->tunable.script_timeout, 0),
state, takeover_ip_callback, state,
"takeip %s %s %u",
ctdb->takeover.interface,
ip,
ctdb->nodes[ctdb->vnn]->public_netmask_bits);
if (ret != 0) {
DEBUG(0,(__location__ " Failed to takeover IP %s on interface %s\n",
ip, ctdb->takeover.interface));
talloc_free(state);
return -1;
}
/* tell ctdb_control.c that we will be replying asynchronously */
*async_reply = true;
return 0;
}
/*
kill any clients that are registered with a IP that is being released
*/
static void release_kill_clients(struct ctdb_context *ctdb, struct in_addr in)
{
struct ctdb_client_ip *ip;
for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
if (ip->ip.sin_addr.s_addr == in.s_addr) {
struct ctdb_client *client = ctdb_reqid_find(ctdb,
ip->client_id,
struct ctdb_client);
if (client->pid != 0) {
DEBUG(0,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
(unsigned)client->pid, inet_ntoa(in),
ip->client_id));
kill(client->pid, SIGKILL);
}
}
}
}
/*
called when releaseip event finishes
*/
static void release_ip_callback(struct ctdb_context *ctdb, int status,
void *private_data)
{
struct takeover_callback_state *state =
talloc_get_type(private_data, struct takeover_callback_state);
char *ip = inet_ntoa(state->sin->sin_addr);
TDB_DATA data;
struct ctdb_tcp_list *tcp;
ctdb_start_monitoring(ctdb);
/* send a message to all clients of this node telling them
that the cluster has been reconfigured and they should
release any sockets on this IP */
data.dptr = (uint8_t *)ip;
data.dsize = strlen(ip)+1;
ctdb_daemon_send_message(ctdb, ctdb->vnn, CTDB_SRVID_RELEASE_IP, data);
/* kill clients that have registered with this IP */
release_kill_clients(ctdb, state->sin->sin_addr);
/* tell other nodes about any tcp connections we were holding with this IP */
for (tcp=ctdb->tcp_list;tcp;tcp=tcp->next) {
if (tcp->vnn == ctdb->vnn &&
state->sin->sin_addr.s_addr == tcp->daddr.sin_addr.s_addr) {
struct ctdb_control_tcp_vnn t;
t.vnn = ctdb->vnn;
t.src = tcp->saddr;
t.dest = tcp->daddr;
data.dptr = (uint8_t *)&t;
data.dsize = sizeof(t);
ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
CTDB_CONTROL_TCP_ADD,
0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
}
}
/* the control succeeded */
ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
talloc_free(state);
}
/*
release an ip address
*/
int32_t ctdb_control_release_ip(struct ctdb_context *ctdb,
struct ctdb_req_control *c,
TDB_DATA indata,
bool *async_reply)
{
int ret;
struct takeover_callback_state *state;
struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
char *ip = inet_ntoa(pip->sin.sin_addr);
/* update out node table */
ctdb->nodes[pip->vnn]->takeover_vnn = pip->takeover_vnn;
if (!ctdb_sys_have_ip(ip)) {
return 0;
}
DEBUG(0,("Release of IP %s/%u on interface %s\n",
ip, ctdb->nodes[ctdb->vnn]->public_netmask_bits,
ctdb->takeover.interface));
/* stop any previous arps */
talloc_free(ctdb->takeover.last_ctx);
ctdb->takeover.last_ctx = NULL;
state = talloc(ctdb, struct takeover_callback_state);
CTDB_NO_MEMORY(ctdb, state);
state->c = talloc_steal(state, c);
state->sin = talloc(state, struct sockaddr_in);
CTDB_NO_MEMORY(ctdb, state->sin);
*state->sin = pip->sin;
ctdb_stop_monitoring(ctdb);
ret = ctdb_event_script_callback(ctdb,
timeval_current_ofs(ctdb->tunable.script_timeout, 0),
state, release_ip_callback, state,
"releaseip %s %s %u",
ctdb->takeover.interface,
ip,
ctdb->nodes[ctdb->vnn]->public_netmask_bits);
if (ret != 0) {
DEBUG(0,(__location__ " Failed to release IP %s on interface %s\n",
ip, ctdb->takeover.interface));
talloc_free(state);
return -1;
}
/* tell the control that we will be reply asynchronously */
*async_reply = true;
return 0;
}
/*
setup the event script
*/
int ctdb_set_event_script(struct ctdb_context *ctdb, const char *script)
{
ctdb->takeover.event_script = talloc_strdup(ctdb, script);
CTDB_NO_MEMORY(ctdb, ctdb->takeover.event_script);
return 0;
}
/*
setup the public address list from a file
*/
int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
{
char **lines;
int nlines;
int i;
lines = file_lines_load(alist, &nlines, ctdb);
if (lines == NULL) {
ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
return -1;
}
while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
nlines--;
}
if (nlines != ctdb->num_nodes) {
DEBUG(0,("Number of lines in %s does not match number of nodes!\n", alist));
talloc_free(lines);
return -1;
}
for (i=0;i<nlines;i++) {
char *p;
struct in_addr in;
ctdb->nodes[i]->public_address = talloc_strdup(ctdb->nodes[i], lines[i]);
CTDB_NO_MEMORY(ctdb, ctdb->nodes[i]->public_address);
ctdb->nodes[i]->takeover_vnn = -1;
/* see if they supplied a netmask length */
p = strchr(ctdb->nodes[i]->public_address, '/');
if (!p) {
DEBUG(0,("You must supply a netmask for public address %s\n",
ctdb->nodes[i]->public_address));
return -1;
}
*p = 0;
ctdb->nodes[i]->public_netmask_bits = atoi(p+1);
if (ctdb->nodes[i]->public_netmask_bits > 32) {
DEBUG(0, ("Illegal netmask for IP %s\n", ctdb->nodes[i]->public_address));
return -1;
}
if (inet_aton(ctdb->nodes[i]->public_address, &in) == 0) {
DEBUG(0,("Badly formed IP '%s' in public address list\n", ctdb->nodes[i]->public_address));
return -1;
}
}
talloc_free(lines);
return 0;
}
/*
see if two IPs are on the same subnet
*/
static bool ctdb_same_subnet(const char *ip1, const char *ip2, uint8_t netmask_bits)
{
struct in_addr in1, in2;
uint32_t mask;
inet_aton(ip1, &in1);
inet_aton(ip2, &in2);
mask = ~((1LL<<(32-netmask_bits))-1);
if ((ntohl(in1.s_addr) & mask) != (ntohl(in2.s_addr) & mask)) {
return false;
}
return true;
}
/*
try to find an available node to take a given nodes IP that meets the
criterion given by the flags
*/
static void ctdb_takeover_find_node(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap,
int start_node, uint32_t mask_flags)
{
int j;
for (j=(start_node+1)%nodemap->num;
j != start_node;
j=(j+1)%nodemap->num) {
if (!(nodemap->nodes[j].flags & mask_flags) &&
ctdb_same_subnet(ctdb->nodes[j]->public_address,
ctdb->nodes[start_node]->public_address,
ctdb->nodes[j]->public_netmask_bits)) {
ctdb->nodes[start_node]->takeover_vnn = nodemap->nodes[j].vnn;
break;
}
}
}
/*
make any IP alias changes for public addresses that are necessary
*/
int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
{
int i, j;
int ret;
struct ctdb_public_ip ip;
ZERO_STRUCT(ip);
/* Work out which node will look after each public IP.
* takeover_node cycles over the nodes and is incremented each time a
* node has been assigned to take over for another node.
* This spreads the failed nodes out across the remaining
* nodes more evenly
*/
for (i=0;i<nodemap->num;i++) {
if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
ctdb->nodes[i]->takeover_vnn = nodemap->nodes[i].vnn;
} else {
ctdb->nodes[i]->takeover_vnn = (uint32_t)-1;
ctdb_takeover_find_node(ctdb, nodemap, i, NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED);
/* if no enabled node can take it, then we
might as well use any active node. It
probably means that some subsystem (such as
NFS) is sick on all nodes. Best we can do
is to keep the other services up. */
if (ctdb->nodes[i]->takeover_vnn == (uint32_t)-1) {
ctdb_takeover_find_node(ctdb, nodemap, i, NODE_FLAGS_INACTIVE);
}
if (ctdb->nodes[i]->takeover_vnn == (uint32_t)-1) {
DEBUG(0,(__location__ " No node available on same network to take %s\n",
ctdb->nodes[i]->public_address));
}
}
}
/* at this point ctdb->nodes[i]->takeover_vnn is the vnn which will own each IP */
/* now tell all nodes to delete any alias that they should not
have. This will be a NOOP on nodes that don't currently
hold the given alias */
for (i=0;i<nodemap->num;i++) {
/* don't talk to unconnected nodes, but do talk to banned nodes */
if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
continue;
}
/* tell this node to delete all of the aliases that it should not have */
for (j=0;j<nodemap->num;j++) {
if (ctdb->nodes[j]->takeover_vnn != nodemap->nodes[i].vnn) {
ip.vnn = j;
ip.takeover_vnn = ctdb->nodes[j]->takeover_vnn;
ip.sin.sin_family = AF_INET;
inet_aton(ctdb->nodes[j]->public_address, &ip.sin.sin_addr);
ret = ctdb_ctrl_release_ip(ctdb, TAKEOVER_TIMEOUT(),
nodemap->nodes[i].vnn,
&ip);
if (ret != 0) {
DEBUG(0,("Failed to tell vnn %u to release IP %s\n",
nodemap->nodes[i].vnn,
ctdb->nodes[j]->public_address));
return -1;
}
}
}
}
/* tell all nodes to get their own IPs */
for (i=0;i<nodemap->num;i++) {
if (ctdb->nodes[i]->takeover_vnn == -1) {
/* this IP won't be taken over */
continue;
}
ip.vnn = i;
ip.takeover_vnn = ctdb->nodes[i]->takeover_vnn;
ip.sin.sin_family = AF_INET;
inet_aton(ctdb->nodes[i]->public_address, &ip.sin.sin_addr);
ret = ctdb_ctrl_takeover_ip(ctdb, TAKEOVER_TIMEOUT(),
ctdb->nodes[i]->takeover_vnn,
&ip);
if (ret != 0) {
DEBUG(0,("Failed asking vnn %u to take over IP %s\n",
ctdb->nodes[i]->takeover_vnn,
ctdb->nodes[i]->public_address));
return -1;
}
}
return 0;
}
/*
destroy a ctdb_client_ip structure
*/
static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
{
DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
return 0;
}
/*
called by a client to inform us of a TCP connection that it is managing
that should tickled with an ACK when IP takeover is done
*/
int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id, uint32_t vnn,
TDB_DATA indata)
{
struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
struct ctdb_control_tcp *p = (struct ctdb_control_tcp *)indata.dptr;
struct ctdb_tcp_list *tcp;
struct ctdb_control_tcp_vnn t;
int ret;
TDB_DATA data;
struct ctdb_client_ip *ip;
ip = talloc(client, struct ctdb_client_ip);
CTDB_NO_MEMORY(ctdb, ip);
ip->ctdb = ctdb;
ip->ip = p->dest;
ip->client_id = client_id;
talloc_set_destructor(ip, ctdb_client_ip_destructor);
DLIST_ADD(ctdb->client_ip_list, ip);
tcp = talloc(client, struct ctdb_tcp_list);
CTDB_NO_MEMORY(ctdb, tcp);
tcp->vnn = vnn;
tcp->saddr = p->src;
tcp->daddr = p->dest;
DLIST_ADD(client->tcp_list, tcp);
t.vnn = vnn;
t.src = p->src;
t.dest = p->dest;
data.dptr = (uint8_t *)&t;
data.dsize = sizeof(t);
/* tell all nodes about this tcp connection */
ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0,
CTDB_CONTROL_TCP_ADD,
0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
if (ret != 0) {
DEBUG(0,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
return -1;
}
return 0;
}
/*
see if two sockaddr_in are the same
*/
static bool same_sockaddr_in(struct sockaddr_in *in1, struct sockaddr_in *in2)
{
return in1->sin_family == in2->sin_family &&
in1->sin_port == in2->sin_port &&
in1->sin_addr.s_addr == in2->sin_addr.s_addr;
}
/*
find a tcp address on a list
*/
static struct ctdb_tcp_list *ctdb_tcp_find(struct ctdb_tcp_list *list,
struct ctdb_tcp_list *tcp)
{
while (list) {
if (same_sockaddr_in(&list->saddr, &tcp->saddr) &&
same_sockaddr_in(&list->daddr, &tcp->daddr)) {
return list;
}
list = list->next;
}
return NULL;
}
/*
called by a daemon to inform us of a TCP connection that one of its
clients managing that should tickled with an ACK when IP takeover is
done
*/
int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
{
struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
struct ctdb_tcp_list *tcp;
tcp = talloc(ctdb, struct ctdb_tcp_list);
CTDB_NO_MEMORY(ctdb, tcp);
tcp->vnn = p->vnn;
tcp->saddr = p->src;
tcp->daddr = p->dest;
if (NULL == ctdb_tcp_find(ctdb->tcp_list, tcp)) {
DLIST_ADD(ctdb->tcp_list, tcp);
DEBUG(2,("Added tickle info for %s:%u from vnn %u\n",
inet_ntoa(tcp->daddr.sin_addr), ntohs(tcp->daddr.sin_port),
tcp->vnn));
} else {
DEBUG(4,("Already had tickle info for %s:%u from vnn %u\n",
inet_ntoa(tcp->daddr.sin_addr), ntohs(tcp->daddr.sin_port),
tcp->vnn));
}
return 0;
}
/*
called by a daemon to inform us of a TCP connection that one of its
clients managing that should tickled with an ACK when IP takeover is
done
*/
int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
{
struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
struct ctdb_tcp_list t, *tcp;
t.vnn = p->vnn;
t.saddr = p->src;
t.daddr = p->dest;
tcp = ctdb_tcp_find(ctdb->tcp_list, &t);
if (tcp) {
DEBUG(2,("Removed tickle info for %s:%u from vnn %u\n",
inet_ntoa(tcp->daddr.sin_addr), ntohs(tcp->daddr.sin_port),
tcp->vnn));
DLIST_REMOVE(ctdb->tcp_list, tcp);
talloc_free(tcp);
}
return 0;
}
/*
called when a daemon restarts - wipes all tcp entries from that vnn
*/
int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
{
struct ctdb_tcp_list *tcp, *next;
for (tcp=ctdb->tcp_list;tcp;tcp=next) {
next = tcp->next;
if (tcp->vnn == vnn) {
DLIST_REMOVE(ctdb->tcp_list, tcp);
talloc_free(tcp);
}
/* and tell the new guy about any that he should have
from us */
if (tcp->vnn == ctdb->vnn) {
struct ctdb_control_tcp_vnn t;
TDB_DATA data;
t.vnn = tcp->vnn;
t.src = tcp->saddr;
t.dest = tcp->daddr;
data.dptr = (uint8_t *)&t;
data.dsize = sizeof(t);
ctdb_daemon_send_control(ctdb, vnn, 0,
CTDB_CONTROL_TCP_ADD,
0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
}
}
return 0;
}
/*
called when a client structure goes away - hook to remove
elements from the tcp_list in all daemons
*/
void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
{
while (client->tcp_list) {
TDB_DATA data;
struct ctdb_control_tcp_vnn p;
struct ctdb_tcp_list *tcp = client->tcp_list;
DLIST_REMOVE(client->tcp_list, tcp);
p.vnn = tcp->vnn;
p.src = tcp->saddr;
p.dest = tcp->daddr;
data.dptr = (uint8_t *)&p;
data.dsize = sizeof(p);
ctdb_daemon_send_control(client->ctdb, CTDB_BROADCAST_CONNECTED, 0,
CTDB_CONTROL_TCP_REMOVE,
0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
talloc_free(tcp);
}
}
/*
release all IPs on shutdown
*/
void ctdb_release_all_ips(struct ctdb_context *ctdb)
{
int i;
if (!ctdb->takeover.enabled) {
return;
}
for (i=0;i<ctdb->num_nodes;i++) {
struct ctdb_node *node = ctdb->nodes[i];
if (ctdb_sys_have_ip(node->public_address)) {
struct in_addr in;
ctdb_event_script(ctdb, "releaseip %s %s %u",
ctdb->takeover.interface,
node->public_address,
node->public_netmask_bits);
if (inet_aton(node->public_address, &in) != 0) {
release_kill_clients(ctdb, in);
}
}
}
}
/*
get list of public IPs
*/
int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control *c, TDB_DATA *outdata)
{
int i, len;
struct ctdb_all_public_ips *ips;
len = offsetof(struct ctdb_all_public_ips, ips) + ctdb->num_nodes*sizeof(struct ctdb_public_ip);
ips = talloc_zero_size(outdata, len);
CTDB_NO_MEMORY(ctdb, ips);
outdata->dsize = len;
outdata->dptr = (uint8_t *)ips;
ips->num = ctdb->num_nodes;
for(i=0;i<ctdb->num_nodes;i++){
ips->ips[i].vnn = i;
ips->ips[i].takeover_vnn = ctdb->nodes[i]->takeover_vnn;
ips->ips[i].sin.sin_family = AF_INET;
if (ctdb->nodes[i]->public_address) {
inet_aton(ctdb->nodes[i]->public_address, &ips->ips[i].sin.sin_addr);
}
}
return 0;
}

View File

@ -1,462 +0,0 @@
/*
efficient async ctdb traverse
Copyright (C) Andrew Tridgell 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "lib/events/events.h"
#include "system/filesys.h"
#include "system/wait.h"
#include "../tdb/include/tdb.h"
#include "../include/ctdb_private.h"
typedef void (*ctdb_traverse_fn_t)(void *private_data, TDB_DATA key, TDB_DATA data);
/*
handle returned to caller - freeing this handler will kill the child and
terminate the traverse
*/
struct ctdb_traverse_local_handle {
struct ctdb_db_context *ctdb_db;
int fd[2];
pid_t child;
void *private_data;
ctdb_traverse_fn_t callback;
struct timeval start_time;
struct ctdb_queue *queue;
};
/*
called when data is available from the child
*/
static void ctdb_traverse_local_handler(uint8_t *rawdata, size_t length, void *private_data)
{
struct ctdb_traverse_local_handle *h = talloc_get_type(private_data,
struct ctdb_traverse_local_handle);
TDB_DATA key, data;
ctdb_traverse_fn_t callback = h->callback;
void *p = h->private_data;
struct ctdb_rec_data *tdata = (struct ctdb_rec_data *)rawdata;
if (rawdata == NULL || length < 4 || length != tdata->length) {
/* end of traverse */
talloc_free(h);
callback(p, tdb_null, tdb_null);
return;
}
key.dsize = tdata->keylen;
key.dptr = &tdata->data[0];
data.dsize = tdata->datalen;
data.dptr = &tdata->data[tdata->keylen];
callback(p, key, data);
}
/*
destroy a in-flight traverse operation
*/
static int traverse_local_destructor(struct ctdb_traverse_local_handle *h)
{
kill(h->child, SIGKILL);
waitpid(h->child, NULL, 0);
return 0;
}
/*
callback from tdb_traverse_read()
*/
static int ctdb_traverse_local_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *p)
{
struct ctdb_traverse_local_handle *h = talloc_get_type(p,
struct ctdb_traverse_local_handle);
struct ctdb_rec_data *d;
struct ctdb_ltdb_header *hdr;
/* filter out non-authoritative and zero-length records */
hdr = (struct ctdb_ltdb_header *)data.dptr;
if (data.dsize <= sizeof(struct ctdb_ltdb_header) ||
hdr->dmaster != h->ctdb_db->ctdb->vnn) {
return 0;
}
d = ctdb_marshall_record(h, 0, key, data);
if (d == NULL) {
/* error handling is tricky in this child code .... */
return -1;
}
if (write(h->fd[1], (uint8_t *)d, d->length) != d->length) {
return -1;
}
return 0;
}
/*
setup a non-blocking traverse of a local ltdb. The callback function
will be called on every record in the local ltdb. To stop the
travserse, talloc_free() the travserse_handle.
The traverse is finished when the callback is called with tdb_null for key and data
*/
static struct ctdb_traverse_local_handle *ctdb_traverse_local(struct ctdb_db_context *ctdb_db,
ctdb_traverse_fn_t callback,
void *private_data)
{
struct ctdb_traverse_local_handle *h;
int ret;
h = talloc_zero(ctdb_db, struct ctdb_traverse_local_handle);
if (h == NULL) {
return NULL;
}
ret = pipe(h->fd);
if (ret != 0) {
talloc_free(h);
return NULL;
}
h->child = fork();
if (h->child == (pid_t)-1) {
close(h->fd[0]);
close(h->fd[1]);
talloc_free(h);
return NULL;
}
h->callback = callback;
h->private_data = private_data;
h->ctdb_db = ctdb_db;
if (h->child == 0) {
/* start the traverse in the child */
close(h->fd[0]);
tdb_traverse_read(ctdb_db->ltdb->tdb, ctdb_traverse_local_fn, h);
_exit(0);
}
close(h->fd[1]);
talloc_set_destructor(h, traverse_local_destructor);
/*
setup a packet queue between the child and the parent. This
copes with all the async and packet boundary issues
*/
h->queue = ctdb_queue_setup(ctdb_db->ctdb, h, h->fd[0], 0, ctdb_traverse_local_handler, h);
if (h->queue == NULL) {
talloc_free(h);
return NULL;
}
h->start_time = timeval_current();
return h;
}
struct ctdb_traverse_all_handle {
struct ctdb_context *ctdb;
uint32_t reqid;
ctdb_traverse_fn_t callback;
void *private_data;
uint32_t null_count;
};
/*
destroy a traverse_all op
*/
static int ctdb_traverse_all_destructor(struct ctdb_traverse_all_handle *state)
{
ctdb_reqid_remove(state->ctdb, state->reqid);
return 0;
}
struct ctdb_traverse_all {
uint32_t db_id;
uint32_t reqid;
uint32_t vnn;
};
/* called when a traverse times out */
static void ctdb_traverse_all_timeout(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data)
{
struct ctdb_traverse_all_handle *state = talloc_get_type(private_data, struct ctdb_traverse_all_handle);
state->ctdb->statistics.timeouts.traverse++;
state->callback(state->private_data, tdb_null, tdb_null);
talloc_free(state);
}
/*
setup a cluster-wide non-blocking traverse of a ctdb. The
callback function will be called on every record in the local
ltdb. To stop the travserse, talloc_free() the traverse_handle.
The traverse is finished when the callback is called with tdb_null
for key and data
*/
static struct ctdb_traverse_all_handle *ctdb_daemon_traverse_all(struct ctdb_db_context *ctdb_db,
ctdb_traverse_fn_t callback,
void *private_data)
{
struct ctdb_traverse_all_handle *state;
struct ctdb_context *ctdb = ctdb_db->ctdb;
int ret;
TDB_DATA data;
struct ctdb_traverse_all r;
state = talloc(ctdb_db, struct ctdb_traverse_all_handle);
if (state == NULL) {
return NULL;
}
state->ctdb = ctdb;
state->reqid = ctdb_reqid_new(ctdb_db->ctdb, state);
state->callback = callback;
state->private_data = private_data;
state->null_count = 0;
talloc_set_destructor(state, ctdb_traverse_all_destructor);
r.db_id = ctdb_db->db_id;
r.reqid = state->reqid;
r.vnn = ctdb->vnn;
data.dptr = (uint8_t *)&r;
data.dsize = sizeof(r);
/* tell all the nodes in the cluster to start sending records to this node */
ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_VNNMAP, 0,
CTDB_CONTROL_TRAVERSE_ALL,
0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
if (ret != 0) {
talloc_free(state);
return NULL;
}
/* timeout the traverse */
event_add_timed(ctdb->ev, state,
timeval_current_ofs(ctdb->tunable.traverse_timeout, 0),
ctdb_traverse_all_timeout, state);
return state;
}
struct traverse_all_state {
struct ctdb_context *ctdb;
struct ctdb_traverse_local_handle *h;
uint32_t reqid;
uint32_t srcnode;
};
/*
called for each record during a traverse all
*/
static void traverse_all_callback(void *p, TDB_DATA key, TDB_DATA data)
{
struct traverse_all_state *state = talloc_get_type(p, struct traverse_all_state);
int ret;
struct ctdb_rec_data *d;
TDB_DATA cdata;
d = ctdb_marshall_record(state, state->reqid, key, data);
if (d == NULL) {
/* darn .... */
DEBUG(0,("Out of memory in traverse_all_callback\n"));
return;
}
cdata.dptr = (uint8_t *)d;
cdata.dsize = d->length;
ret = ctdb_daemon_send_control(state->ctdb, state->srcnode, 0, CTDB_CONTROL_TRAVERSE_DATA,
0, CTDB_CTRL_FLAG_NOREPLY, cdata, NULL, NULL);
if (ret != 0) {
DEBUG(0,("Failed to send traverse data\n"));
}
if (key.dsize == 0 && data.dsize == 0) {
/* we're done */
talloc_free(state);
}
}
/*
called when a CTDB_CONTROL_TRAVERSE_ALL control comes in. We then
setup a traverse of our local ltdb, sending the records as
CTDB_CONTROL_TRAVERSE_DATA records back to the originator
*/
int32_t ctdb_control_traverse_all(struct ctdb_context *ctdb, TDB_DATA data, TDB_DATA *outdata)
{
struct ctdb_traverse_all *c = (struct ctdb_traverse_all *)data.dptr;
struct traverse_all_state *state;
struct ctdb_db_context *ctdb_db;
if (data.dsize != sizeof(struct ctdb_traverse_all)) {
DEBUG(0,("Invalid size in ctdb_control_traverse_all\n"));
return -1;
}
ctdb_db = find_ctdb_db(ctdb, c->db_id);
if (ctdb_db == NULL) {
return -1;
}
state = talloc(ctdb_db, struct traverse_all_state);
if (state == NULL) {
return -1;
}
state->reqid = c->reqid;
state->srcnode = c->vnn;
state->ctdb = ctdb;
state->h = ctdb_traverse_local(ctdb_db, traverse_all_callback, state);
if (state->h == NULL) {
talloc_free(state);
return -1;
}
return 0;
}
/*
called when a CTDB_CONTROL_TRAVERSE_DATA control comes in. We then
call the traverse_all callback with the record
*/
int32_t ctdb_control_traverse_data(struct ctdb_context *ctdb, TDB_DATA data, TDB_DATA *outdata)
{
struct ctdb_rec_data *d = (struct ctdb_rec_data *)data.dptr;
struct ctdb_traverse_all_handle *state;
TDB_DATA key;
ctdb_traverse_fn_t callback;
void *private_data;
if (data.dsize < sizeof(uint32_t) || data.dsize != d->length) {
DEBUG(0,("Bad record size in ctdb_control_traverse_data\n"));
return -1;
}
state = ctdb_reqid_find(ctdb, d->reqid, struct ctdb_traverse_all_handle);
if (state == NULL || d->reqid != state->reqid) {
/* traverse might have been terminated already */
return -1;
}
key.dsize = d->keylen;
key.dptr = &d->data[0];
data.dsize = d->datalen;
data.dptr = &d->data[d->keylen];
if (key.dsize == 0 && data.dsize == 0) {
state->null_count++;
if (state->null_count != ctdb_get_num_active_nodes(ctdb)) {
return 0;
}
}
callback = state->callback;
private_data = state->private_data;
callback(private_data, key, data);
if (key.dsize == 0 && data.dsize == 0) {
/* we've received all of the null replies, so all
nodes are finished */
talloc_free(state);
}
return 0;
}
struct traverse_start_state {
struct ctdb_context *ctdb;
struct ctdb_traverse_all_handle *h;
uint32_t srcnode;
uint32_t reqid;
uint64_t srvid;
};
/*
callback which sends records as messages to the client
*/
static void traverse_start_callback(void *p, TDB_DATA key, TDB_DATA data)
{
struct traverse_start_state *state;
struct ctdb_rec_data *d;
TDB_DATA cdata;
state = talloc_get_type(p, struct traverse_start_state);
d = ctdb_marshall_record(state, state->reqid, key, data);
if (d == NULL) {
return;
}
cdata.dptr = (uint8_t *)d;
cdata.dsize = d->length;
ctdb_dispatch_message(state->ctdb, state->srvid, cdata);
if (key.dsize == 0 && data.dsize == 0) {
/* end of traverse */
talloc_free(state);
}
}
/*
start a traverse_all - called as a control from a client
*/
int32_t ctdb_control_traverse_start(struct ctdb_context *ctdb, TDB_DATA data,
TDB_DATA *outdata, uint32_t srcnode)
{
struct ctdb_traverse_start *d = (struct ctdb_traverse_start *)data.dptr;
struct traverse_start_state *state;
struct ctdb_db_context *ctdb_db;
if (data.dsize != sizeof(*d)) {
DEBUG(0,("Bad record size in ctdb_control_traverse_start\n"));
return -1;
}
ctdb_db = find_ctdb_db(ctdb, d->db_id);
if (ctdb_db == NULL) {
return -1;
}
state = talloc(ctdb_db, struct traverse_start_state);
if (state == NULL) {
return -1;
}
state->srcnode = srcnode;
state->reqid = d->reqid;
state->srvid = d->srvid;
state->ctdb = ctdb;
state->h = ctdb_daemon_traverse_all(ctdb_db, traverse_start_callback, state);
if (state->h == NULL) {
talloc_free(state);
return -1;
}
return 0;
}

View File

@ -1,163 +0,0 @@
/*
ctdb tunables code
Copyright (C) Andrew Tridgell 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "../include/ctdb_private.h"
static const struct {
const char *name;
uint32_t default_v;
size_t offset;
} tunable_map[] = {
{ "MaxRedirectCount", 3, offsetof(struct ctdb_tunable, max_redirect_count) },
{ "SeqnumFrequency", 1, offsetof(struct ctdb_tunable, seqnum_frequency) },
{ "ControlTimeout", 60, offsetof(struct ctdb_tunable, control_timeout) },
{ "TraverseTimeout", 20, offsetof(struct ctdb_tunable, traverse_timeout) },
{ "KeepaliveInterval", 2, offsetof(struct ctdb_tunable, keepalive_interval) },
{ "KeepaliveLimit", 5, offsetof(struct ctdb_tunable, keepalive_limit) },
{ "MaxLACount", 7, offsetof(struct ctdb_tunable, max_lacount) },
{ "RecoverTimeout", 5, offsetof(struct ctdb_tunable, recover_timeout) },
{ "RecoverInterval", 1, offsetof(struct ctdb_tunable, recover_interval) },
{ "ElectionTimeout", 3, offsetof(struct ctdb_tunable, election_timeout) },
{ "TakeoverTimeout", 5, offsetof(struct ctdb_tunable, takeover_timeout) },
{ "MonitorInterval", 15, offsetof(struct ctdb_tunable, monitor_interval) },
{ "EventScriptTimeout", 20, offsetof(struct ctdb_tunable, script_timeout) },
{ "RecoveryGracePeriod", 60, offsetof(struct ctdb_tunable, recovery_grace_period) },
{ "RecoveryBanPeriod", 300, offsetof(struct ctdb_tunable, recovery_ban_period) },
{ "DatabaseHashSize", 10000, offsetof(struct ctdb_tunable, database_hash_size) },
{ "RerecoveryTimeout", 10, offsetof(struct ctdb_tunable, rerecovery_timeout) },
};
/*
set all tunables to defaults
*/
void ctdb_tunables_set_defaults(struct ctdb_context *ctdb)
{
int i;
for (i=0;i<ARRAY_SIZE(tunable_map);i++) {
*(uint32_t *)(tunable_map[i].offset + (uint8_t*)&ctdb->tunable) = tunable_map[i].default_v;
}
}
/*
get a tunable
*/
int32_t ctdb_control_get_tunable(struct ctdb_context *ctdb, TDB_DATA indata,
TDB_DATA *outdata)
{
struct ctdb_control_get_tunable *t =
(struct ctdb_control_get_tunable *)indata.dptr;
char *name;
uint32_t val;
int i;
if (indata.dsize < sizeof(*t) ||
t->length > indata.dsize - offsetof(struct ctdb_control_get_tunable, name)) {
DEBUG(0,("Bad indata in ctdb_control_get_tunable\n"));
return -1;
}
name = talloc_strndup(ctdb, (char*)t->name, t->length);
CTDB_NO_MEMORY(ctdb, name);
for (i=0;i<ARRAY_SIZE(tunable_map);i++) {
if (strcasecmp(name, tunable_map[i].name) == 0) break;
}
talloc_free(name);
if (i == ARRAY_SIZE(tunable_map)) {
return -1;
}
val = *(uint32_t *)(tunable_map[i].offset + (uint8_t*)&ctdb->tunable);
outdata->dptr = (uint8_t *)talloc(outdata, uint32_t);
CTDB_NO_MEMORY(ctdb, outdata->dptr);
*(uint32_t *)outdata->dptr = val;
outdata->dsize = sizeof(uint32_t);
return 0;
}
/*
set a tunable
*/
int32_t ctdb_control_set_tunable(struct ctdb_context *ctdb, TDB_DATA indata)
{
struct ctdb_control_set_tunable *t =
(struct ctdb_control_set_tunable *)indata.dptr;
char *name;
int i;
if (indata.dsize < sizeof(*t) ||
t->length > indata.dsize - offsetof(struct ctdb_control_set_tunable, name)) {
DEBUG(0,("Bad indata in ctdb_control_set_tunable\n"));
return -1;
}
name = talloc_strndup(ctdb, (char *)t->name, t->length);
CTDB_NO_MEMORY(ctdb, name);
for (i=0;i<ARRAY_SIZE(tunable_map);i++) {
if (strcasecmp(name, tunable_map[i].name) == 0) break;
}
talloc_free(name);
if (i == ARRAY_SIZE(tunable_map)) {
return -1;
}
*(uint32_t *)(tunable_map[i].offset + (uint8_t*)&ctdb->tunable) = t->value;
return 0;
}
/*
list tunables
*/
int32_t ctdb_control_list_tunables(struct ctdb_context *ctdb, TDB_DATA *outdata)
{
char *list = NULL;
int i;
struct ctdb_control_list_tunable *t;
list = talloc_strdup(outdata, tunable_map[0].name);
CTDB_NO_MEMORY(ctdb, list);
for (i=1;i<ARRAY_SIZE(tunable_map);i++) {
list = talloc_asprintf_append(list, ":%s", tunable_map[i].name);
CTDB_NO_MEMORY(ctdb, list);
}
outdata->dsize = offsetof(struct ctdb_control_list_tunable, data) +
strlen(list) + 1;
outdata->dptr = talloc_size(outdata, outdata->dsize);
CTDB_NO_MEMORY(ctdb, outdata->dptr);
t = (struct ctdb_control_list_tunable *)outdata->dptr;
t->length = strlen(list)+1;
memcpy(t->data, list, t->length);
talloc_free(list);
return 0;
}

View File

@ -1,229 +0,0 @@
/*
standalone ctdb daemon
Copyright (C) Andrew Tridgell 2006
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "lib/events/events.h"
#include "system/filesys.h"
#include "popt.h"
#include "system/wait.h"
#include "cmdline.h"
#include "../include/ctdb_private.h"
static void block_signal(int signum)
{
struct sigaction act;
memset(&act, 0, sizeof(act));
act.sa_handler = SIG_IGN;
sigemptyset(&act.sa_mask);
sigaddset(&act.sa_mask, signum);
sigaction(signum, &act, NULL);
}
static struct {
const char *nlist;
const char *transport;
const char *myaddress;
const char *public_address_list;
const char *public_interface;
const char *event_script;
const char *logfile;
const char *recovery_lock_file;
const char *db_dir;
} options = {
.nlist = ETCDIR "/ctdb/nodes",
.transport = "tcp",
.event_script = ETCDIR "/ctdb/events",
.logfile = VARDIR "/log/log.ctdb",
.db_dir = VARDIR "/ctdb",
};
/*
called by the transport layer when a packet comes in
*/
static void ctdb_recv_pkt(struct ctdb_context *ctdb, uint8_t *data, uint32_t length)
{
struct ctdb_req_header *hdr = (struct ctdb_req_header *)data;
ctdb->statistics.node_packets_recv++;
/* up the counter for this source node, so we know its alive */
if (ctdb_validate_vnn(ctdb, hdr->srcnode)) {
/* as a special case, redirected calls don't increment the rx_cnt */
if (hdr->operation != CTDB_REQ_CALL ||
((struct ctdb_req_call *)hdr)->hopcount == 0) {
ctdb->nodes[hdr->srcnode]->rx_cnt++;
}
}
ctdb_input_pkt(ctdb, hdr);
}
static const struct ctdb_upcalls ctdb_upcalls = {
.recv_pkt = ctdb_recv_pkt,
.node_dead = ctdb_node_dead,
.node_connected = ctdb_node_connected
};
/*
main program
*/
int main(int argc, const char *argv[])
{
struct ctdb_context *ctdb;
int interactive = 0;
struct poptOption popt_options[] = {
POPT_AUTOHELP
POPT_CTDB_CMDLINE
{ "interactive", 'i', POPT_ARG_NONE, &interactive, 0, "don't fork", NULL },
{ "public-addresses", 0, POPT_ARG_STRING, &options.public_address_list, 0, "public address list file", "filename" },
{ "public-interface", 0, POPT_ARG_STRING, &options.public_interface, 0, "public interface", "interface"},
{ "event-script", 0, POPT_ARG_STRING, &options.event_script, 0, "event script", "filename" },
{ "logfile", 0, POPT_ARG_STRING, &options.logfile, 0, "log file location", "filename" },
{ "nlist", 0, POPT_ARG_STRING, &options.nlist, 0, "node list file", "filename" },
{ "listen", 0, POPT_ARG_STRING, &options.myaddress, 0, "address to listen on", "address" },
{ "transport", 0, POPT_ARG_STRING, &options.transport, 0, "protocol transport", NULL },
{ "dbdir", 0, POPT_ARG_STRING, &options.db_dir, 0, "directory for the tdb files", NULL },
{ "reclock", 0, POPT_ARG_STRING, &options.recovery_lock_file, 0, "location of recovery lock file", "filename" },
POPT_TABLEEND
};
int opt, ret;
const char **extra_argv;
int extra_argc = 0;
poptContext pc;
struct event_context *ev;
pc = poptGetContext(argv[0], argc, argv, popt_options, POPT_CONTEXT_KEEP_FIRST);
while ((opt = poptGetNextOpt(pc)) != -1) {
switch (opt) {
default:
fprintf(stderr, "Invalid option %s: %s\n",
poptBadOption(pc, 0), poptStrerror(opt));
exit(1);
}
}
/* setup the remaining options for the main program to use */
extra_argv = poptGetArgs(pc);
if (extra_argv) {
extra_argv++;
while (extra_argv[extra_argc]) extra_argc++;
}
if (!options.recovery_lock_file) {
DEBUG(0,("You must specifiy the location of a recovery lock file with --reclock\n"));
exit(1);
}
block_signal(SIGPIPE);
ev = s4_event_context_init(NULL);
ctdb = ctdb_cmdline_init(ev);
ctdb->recovery_mode = CTDB_RECOVERY_NORMAL;
ctdb->recovery_master = (uint32_t)-1;
ctdb->upcalls = &ctdb_upcalls;
ctdb->idr = idr_init(ctdb);
ctdb->recovery_lock_fd = -1;
ctdb->monitoring_mode = CTDB_MONITORING_ACTIVE;
ctdb_tunables_set_defaults(ctdb);
ret = ctdb_set_recovery_lock_file(ctdb, options.recovery_lock_file);
if (ret == -1) {
printf("ctdb_set_recovery_lock_file failed - %s\n", ctdb_errstr(ctdb));
exit(1);
}
ret = ctdb_set_transport(ctdb, options.transport);
if (ret == -1) {
printf("ctdb_set_transport failed - %s\n", ctdb_errstr(ctdb));
exit(1);
}
/* tell ctdb what address to listen on */
if (options.myaddress) {
ret = ctdb_set_address(ctdb, options.myaddress);
if (ret == -1) {
printf("ctdb_set_address failed - %s\n", ctdb_errstr(ctdb));
exit(1);
}
}
/* tell ctdb what nodes are available */
ret = ctdb_set_nlist(ctdb, options.nlist);
if (ret == -1) {
printf("ctdb_set_nlist failed - %s\n", ctdb_errstr(ctdb));
exit(1);
}
if (options.db_dir) {
ret = ctdb_set_tdb_dir(ctdb, options.db_dir);
if (ret == -1) {
printf("ctdb_set_tdb_dir failed - %s\n", ctdb_errstr(ctdb));
exit(1);
}
}
ret = ctdb_set_logfile(ctdb, options.logfile);
if (ret == -1) {
printf("ctdb_set_logfile to %s failed - %s\n", options.logfile, ctdb_errstr(ctdb));
exit(1);
}
if (options.public_interface) {
ctdb->takeover.interface = talloc_strdup(ctdb, options.public_interface);
CTDB_NO_MEMORY(ctdb, ctdb->takeover.interface);
}
if (options.public_address_list) {
ret = ctdb_set_public_addresses(ctdb, options.public_address_list);
if (ret == -1) {
printf("Unable to setup public address list\n");
exit(1);
}
ctdb->takeover.enabled = true;
}
ret = ctdb_set_event_script(ctdb, options.event_script);
if (ret == -1) {
printf("Unable to setup event script\n");
exit(1);
}
/* useful default logfile */
if (ctdb->logfile == NULL) {
char *name = talloc_asprintf(ctdb, "%s/log.ctdb.vnn%u",
VARDIR, ctdb->vnn);
ctdb_set_logfile(ctdb, name);
talloc_free(name);
}
/* start the protocol running (as a child) */
return ctdb_start_daemon(ctdb, interactive?False:True);
}

View File

@ -1,191 +0,0 @@
/*
event script handling
Copyright (C) Andrew Tridgell 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "system/filesys.h"
#include "system/wait.h"
#include "../include/ctdb_private.h"
#include "lib/events/events.h"
/*
run the event script - varargs version
*/
static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *fmt, va_list ap)
{
char *options, *cmdstr;
int ret;
va_list ap2;
struct stat st;
if (stat(ctdb->takeover.event_script, &st) != 0 &&
errno == ENOENT) {
DEBUG(0,("No event script found at '%s'\n", ctdb->takeover.event_script));
return 0;
}
va_copy(ap2, ap);
options = talloc_vasprintf(ctdb, fmt, ap2);
va_end(ap2);
CTDB_NO_MEMORY(ctdb, options);
cmdstr = talloc_asprintf(ctdb, "%s %s", ctdb->takeover.event_script, options);
CTDB_NO_MEMORY(ctdb, cmdstr);
ret = system(cmdstr);
if (ret != -1) {
ret = WEXITSTATUS(ret);
}
talloc_free(cmdstr);
talloc_free(options);
return ret;
}
/*
run the event script
*/
int ctdb_event_script(struct ctdb_context *ctdb, const char *fmt, ...)
{
va_list ap;
int ret;
va_start(ap, fmt);
ret = ctdb_event_script_v(ctdb, fmt, ap);
va_end(ap);
return ret;
}
struct ctdb_event_script_state {
struct ctdb_context *ctdb;
pid_t child;
void (*callback)(struct ctdb_context *, int, void *);
int fd[2];
void *private_data;
};
/* called when child is finished */
static void ctdb_event_script_handler(struct event_context *ev, struct fd_event *fde,
uint16_t flags, void *p)
{
struct ctdb_event_script_state *state =
talloc_get_type(p, struct ctdb_event_script_state);
int status = -1;
void (*callback)(struct ctdb_context *, int, void *) = state->callback;
void *private_data = state->private_data;
struct ctdb_context *ctdb = state->ctdb;
waitpid(state->child, &status, 0);
if (status != -1) {
status = WEXITSTATUS(status);
}
talloc_set_destructor(state, NULL);
talloc_free(state);
callback(ctdb, status, private_data);
}
/* called when child times out */
static void ctdb_event_script_timeout(struct event_context *ev, struct timed_event *te,
struct timeval t, void *p)
{
struct ctdb_event_script_state *state = talloc_get_type(p, struct ctdb_event_script_state);
void (*callback)(struct ctdb_context *, int, void *) = state->callback;
void *private_data = state->private_data;
struct ctdb_context *ctdb = state->ctdb;
DEBUG(0,("event script timed out\n"));
talloc_free(state);
callback(ctdb, -1, private_data);
}
/*
destroy a running event script
*/
static int event_script_destructor(struct ctdb_event_script_state *state)
{
kill(state->child, SIGKILL);
waitpid(state->child, NULL, 0);
return 0;
}
/*
run the event script in the background, calling the callback when
finished
*/
int ctdb_event_script_callback(struct ctdb_context *ctdb,
struct timeval timeout,
TALLOC_CTX *mem_ctx,
void (*callback)(struct ctdb_context *, int, void *),
void *private_data,
const char *fmt, ...)
{
struct ctdb_event_script_state *state;
va_list ap;
int ret;
state = talloc(mem_ctx, struct ctdb_event_script_state);
CTDB_NO_MEMORY(ctdb, state);
state->ctdb = ctdb;
state->callback = callback;
state->private_data = private_data;
ret = pipe(state->fd);
if (ret != 0) {
talloc_free(state);
return -1;
}
state->child = fork();
if (state->child == (pid_t)-1) {
close(state->fd[0]);
close(state->fd[1]);
talloc_free(state);
return -1;
}
if (state->child == 0) {
close(state->fd[0]);
ctdb_set_realtime(false);
set_close_on_exec(state->fd[1]);
va_start(ap, fmt);
ret = ctdb_event_script_v(ctdb, fmt, ap);
va_end(ap);
_exit(ret);
}
talloc_set_destructor(state, event_script_destructor);
close(state->fd[1]);
event_add_fd(ctdb->ev, state, state->fd[0], EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
ctdb_event_script_handler, state);
if (!timeval_is_zero(&timeout)) {
event_add_timed(ctdb->ev, state, timeout, ctdb_event_script_timeout, state);
}
return 0;
}

View File

@ -1,583 +0,0 @@
/*
ctdb recovery code
Copyright (C) Ronnie Sahlberg 2007
Copyright (C) Andrew Tridgell 2007
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "lib/events/events.h"
#include "../tdb/include/tdb.h"
#include "system/network.h"
#include "system/filesys.h"
#include "system/wait.h"
#include "../include/ctdb_private.h"
#define TAKEOVER_TIMEOUT() timeval_current_ofs(5,0)
#define CTDB_ARP_INTERVAL 1
#define CTDB_ARP_REPEAT 3
struct ctdb_takeover_arp {
struct ctdb_context *ctdb;
uint32_t count;
struct sockaddr_in sin;
struct ctdb_tcp_list *tcp_list;
};
/*
lists of tcp endpoints
*/
struct ctdb_tcp_list {
struct ctdb_tcp_list *prev, *next;
uint32_t vnn;
struct sockaddr_in saddr;
struct sockaddr_in daddr;
};
/*
send a gratuitous arp
*/
static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data)
{
struct ctdb_takeover_arp *arp = talloc_get_type(private_data,
struct ctdb_takeover_arp);
int ret;
struct ctdb_tcp_list *tcp;
ret = ctdb_sys_send_arp(&arp->sin, arp->ctdb->takeover.interface);
if (ret != 0) {
DEBUG(0,(__location__ "sending of arp failed (%s)\n", strerror(errno)));
}
for (tcp=arp->tcp_list;tcp;tcp=tcp->next) {
DEBUG(2,("sending tcp tickle ack for %u->%s:%u\n",
(unsigned)ntohs(tcp->daddr.sin_port),
inet_ntoa(tcp->saddr.sin_addr),
(unsigned)ntohs(tcp->saddr.sin_port)));
ret = ctdb_sys_send_ack(&tcp->saddr, &tcp->daddr);
if (ret != 0) {
DEBUG(0,(__location__ " Failed to send tcp tickle ack for %s\n",
inet_ntoa(tcp->saddr.sin_addr)));
}
}
arp->count++;
if (arp->count == CTDB_ARP_REPEAT) {
talloc_free(arp);
return;
}
event_add_timed(arp->ctdb->ev, arp->ctdb->takeover.last_ctx,
timeval_current_ofs(CTDB_ARP_INTERVAL, 0),
ctdb_control_send_arp, arp);
}
/*
take over an ip address
*/
int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, TDB_DATA indata)
{
int ret;
struct sockaddr_in *sin = (struct sockaddr_in *)indata.dptr;
struct ctdb_takeover_arp *arp;
char *ip = inet_ntoa(sin->sin_addr);
struct ctdb_tcp_list *tcp;
if (ctdb_sys_have_ip(ip)) {
return 0;
}
DEBUG(0,("Takover of IP %s/%u on interface %s\n",
ip, ctdb->nodes[ctdb->vnn]->public_netmask_bits,
ctdb->takeover.interface));
ret = ctdb_event_script(ctdb, "takeip %s %s %u",
ctdb->takeover.interface,
ip,
ctdb->nodes[ctdb->vnn]->public_netmask_bits);
if (ret != 0) {
DEBUG(0,(__location__ " Failed to takeover IP %s on interface %s\n",
ip, ctdb->takeover.interface));
return -1;
}
if (!ctdb->takeover.last_ctx) {
ctdb->takeover.last_ctx = talloc_new(ctdb);
CTDB_NO_MEMORY(ctdb, ctdb->takeover.last_ctx);
}
arp = talloc_zero(ctdb->takeover.last_ctx, struct ctdb_takeover_arp);
CTDB_NO_MEMORY(ctdb, arp);
arp->ctdb = ctdb;
arp->sin = *sin;
/* add all of the known tcp connections for this IP to the
list of tcp connections to send tickle acks for */
for (tcp=ctdb->tcp_list;tcp;tcp=tcp->next) {
if (sin->sin_addr.s_addr == tcp->daddr.sin_addr.s_addr) {
struct ctdb_tcp_list *t2 = talloc(arp, struct ctdb_tcp_list);
CTDB_NO_MEMORY(ctdb, t2);
*t2 = *tcp;
DLIST_ADD(arp->tcp_list, t2);
}
}
event_add_timed(arp->ctdb->ev, arp->ctdb->takeover.last_ctx,
timeval_zero(), ctdb_control_send_arp, arp);
return ret;
}
/*
release an ip address
*/
int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, TDB_DATA indata)
{
struct sockaddr_in *sin = (struct sockaddr_in *)indata.dptr;
TDB_DATA data;
char *ip = inet_ntoa(sin->sin_addr);
int ret;
struct ctdb_tcp_list *tcp;
if (!ctdb_sys_have_ip(ip)) {
return 0;
}
DEBUG(0,("Release of IP %s/%u on interface %s\n",
ip, ctdb->nodes[ctdb->vnn]->public_netmask_bits,
ctdb->takeover.interface));
/* stop any previous arps */
talloc_free(ctdb->takeover.last_ctx);
ctdb->takeover.last_ctx = NULL;
ret = ctdb_event_script(ctdb, "releaseip %s %s %u",
ctdb->takeover.interface,
ip,
ctdb->nodes[ctdb->vnn]->public_netmask_bits);
if (ret != 0) {
DEBUG(0,(__location__ " Failed to release IP %s on interface %s\n",
ip, ctdb->takeover.interface));
return -1;
}
/* send a message to all clients of this node telling them
that the cluster has been reconfigured and they should
release any sockets on this IP */
data.dptr = (uint8_t *)ip;
data.dsize = strlen(ip)+1;
ctdb_daemon_send_message(ctdb, ctdb->vnn, CTDB_SRVID_RELEASE_IP, data);
/* tell other nodes about any tcp connections we were holding with this IP */
for (tcp=ctdb->tcp_list;tcp;tcp=tcp->next) {
if (tcp->vnn == ctdb->vnn &&
sin->sin_addr.s_addr == tcp->daddr.sin_addr.s_addr) {
struct ctdb_control_tcp_vnn t;
t.vnn = ctdb->vnn;
t.src = tcp->saddr;
t.dest = tcp->daddr;
data.dptr = (uint8_t *)&t;
data.dsize = sizeof(t);
ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_VNNMAP, 0,
CTDB_CONTROL_TCP_ADD,
0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
}
}
return 0;
}
/*
setup the event script
*/
int ctdb_set_event_script(struct ctdb_context *ctdb, const char *script)
{
ctdb->takeover.event_script = talloc_strdup(ctdb, script);
CTDB_NO_MEMORY(ctdb, ctdb->takeover.event_script);
return 0;
}
/*
setup the public address list from a file
*/
int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
{
char **lines;
int nlines;
int i;
lines = file_lines_load(alist, &nlines, ctdb);
if (lines == NULL) {
ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
return -1;
}
while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
nlines--;
}
if (nlines != ctdb->num_nodes) {
DEBUG(0,("Number of lines in %s does not match number of nodes!\n", alist));
talloc_free(lines);
return -1;
}
for (i=0;i<nlines;i++) {
char *p;
struct in_addr in;
ctdb->nodes[i]->public_address = talloc_strdup(ctdb->nodes[i], lines[i]);
CTDB_NO_MEMORY(ctdb, ctdb->nodes[i]->public_address);
ctdb->nodes[i]->takeover_vnn = -1;
/* see if they supplied a netmask length */
p = strchr(ctdb->nodes[i]->public_address, '/');
if (!p) {
DEBUG(0,("You must supply a netmask for public address %s\n",
ctdb->nodes[i]->public_address));
return -1;
}
*p = 0;
ctdb->nodes[i]->public_netmask_bits = atoi(p+1);
if (ctdb->nodes[i]->public_netmask_bits > 32) {
DEBUG(0, ("Illegal netmask for IP %s\n", ctdb->nodes[i]->public_address));
return -1;
}
if (inet_aton(ctdb->nodes[i]->public_address, &in) == 0) {
DEBUG(0,("Badly formed IP '%s' in public address list\n", ctdb->nodes[i]->public_address));
return -1;
}
}
talloc_free(lines);
return 0;
}
/*
see if two IPs are on the same subnet
*/
static bool ctdb_same_subnet(const char *ip1, const char *ip2, uint8_t netmask_bits)
{
struct in_addr in1, in2;
uint32_t mask;
inet_aton(ip1, &in1);
inet_aton(ip2, &in2);
mask = ~((1LL<<(32-netmask_bits))-1);
if ((ntohl(in1.s_addr) & mask) != (ntohl(in2.s_addr) & mask)) {
return false;
}
return true;
}
/*
make any IP alias changes for public addresses that are necessary
*/
int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
{
int i, j;
int ret;
/* work out which node will look after each public IP */
for (i=0;i<nodemap->num;i++) {
if (nodemap->nodes[i].flags & NODE_FLAGS_CONNECTED) {
ctdb->nodes[i]->takeover_vnn = nodemap->nodes[i].vnn;
} else {
/* assign this dead nodes IP to the next higher node */
for (j=(i+1)%nodemap->num;
j != i;
j=(j+1)%nodemap->num) {
if ((nodemap->nodes[j].flags & NODE_FLAGS_CONNECTED) &&
ctdb_same_subnet(ctdb->nodes[j]->public_address,
ctdb->nodes[i]->public_address,
ctdb->nodes[j]->public_netmask_bits)) {
ctdb->nodes[i]->takeover_vnn = nodemap->nodes[j].vnn;
break;
}
}
if (j == i) {
DEBUG(0,(__location__ " No node available on same network to take %s\n",
ctdb->nodes[i]->public_address));
ctdb->nodes[i]->takeover_vnn = -1;
}
}
}
/* at this point ctdb->nodes[i]->takeover_vnn is the vnn which will own each IP */
/* now tell all nodes to delete any alias that they should not
have. This will be a NOOP on nodes that don't currently
hold the given alias */
for (i=0;i<nodemap->num;i++) {
/* don't talk to unconnected nodes */
if (!(nodemap->nodes[i].flags & NODE_FLAGS_CONNECTED)) continue;
/* tell this node to delete all of the aliases that it should not have */
for (j=0;j<nodemap->num;j++) {
if (ctdb->nodes[j]->takeover_vnn != nodemap->nodes[i].vnn) {
ret = ctdb_ctrl_release_ip(ctdb, TAKEOVER_TIMEOUT(),
nodemap->nodes[i].vnn,
ctdb->nodes[j]->public_address);
if (ret != 0) {
DEBUG(0,("Failed to tell vnn %u to release IP %s\n",
nodemap->nodes[i].vnn,
ctdb->nodes[j]->public_address));
return -1;
}
}
}
}
/* tell all nodes to get their own IPs */
for (i=0;i<nodemap->num;i++) {
ret = ctdb_ctrl_takeover_ip(ctdb, TAKEOVER_TIMEOUT(),
ctdb->nodes[i]->takeover_vnn,
ctdb->nodes[i]->public_address);
if (ret != 0) {
DEBUG(0,("Failed asking vnn %u to take over IP %s\n",
ctdb->nodes[i]->takeover_vnn,
ctdb->nodes[i]->public_address));
return -1;
}
}
return 0;
}
/*
called by a client to inform us of a TCP connection that it is managing
that should tickled with an ACK when IP takeover is done
*/
int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id, uint32_t vnn,
TDB_DATA indata)
{
struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
struct ctdb_control_tcp *p = (struct ctdb_control_tcp *)indata.dptr;
struct ctdb_tcp_list *tcp;
struct ctdb_control_tcp_vnn t;
int ret;
TDB_DATA data;
tcp = talloc(client, struct ctdb_tcp_list);
CTDB_NO_MEMORY(ctdb, tcp);
tcp->vnn = vnn;
tcp->saddr = p->src;
tcp->daddr = p->dest;
DLIST_ADD(client->tcp_list, tcp);
t.vnn = vnn;
t.src = p->src;
t.dest = p->dest;
data.dptr = (uint8_t *)&t;
data.dsize = sizeof(t);
/* tell all nodes about this tcp connection */
ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_VNNMAP, 0,
CTDB_CONTROL_TCP_ADD,
0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
if (ret != 0) {
DEBUG(0,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
return -1;
}
return 0;
}
/*
see if two sockaddr_in are the same
*/
static bool same_sockaddr_in(struct sockaddr_in *in1, struct sockaddr_in *in2)
{
return in1->sin_family == in2->sin_family &&
in1->sin_port == in2->sin_port &&
in1->sin_addr.s_addr == in2->sin_addr.s_addr;
}
/*
find a tcp address on a list
*/
static struct ctdb_tcp_list *ctdb_tcp_find(struct ctdb_tcp_list *list,
struct ctdb_tcp_list *tcp)
{
while (list) {
if (same_sockaddr_in(&list->saddr, &tcp->saddr) &&
same_sockaddr_in(&list->daddr, &tcp->daddr)) {
return list;
}
list = list->next;
}
return NULL;
}
/*
called by a daemon to inform us of a TCP connection that one of its
clients managing that should tickled with an ACK when IP takeover is
done
*/
int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
{
struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
struct ctdb_tcp_list *tcp;
tcp = talloc(ctdb, struct ctdb_tcp_list);
CTDB_NO_MEMORY(ctdb, tcp);
tcp->vnn = p->vnn;
tcp->saddr = p->src;
tcp->daddr = p->dest;
if (NULL == ctdb_tcp_find(ctdb->tcp_list, tcp)) {
DLIST_ADD(ctdb->tcp_list, tcp);
DEBUG(2,("Added tickle info for %s:%u from vnn %u\n",
inet_ntoa(tcp->daddr.sin_addr), ntohs(tcp->daddr.sin_port),
tcp->vnn));
} else {
DEBUG(4,("Already had tickle info for %s:%u from vnn %u\n",
inet_ntoa(tcp->daddr.sin_addr), ntohs(tcp->daddr.sin_port),
tcp->vnn));
}
return 0;
}
/*
called by a daemon to inform us of a TCP connection that one of its
clients managing that should tickled with an ACK when IP takeover is
done
*/
int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
{
struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
struct ctdb_tcp_list t, *tcp;
t.vnn = p->vnn;
t.saddr = p->src;
t.daddr = p->dest;
tcp = ctdb_tcp_find(ctdb->tcp_list, &t);
if (tcp) {
DEBUG(2,("Removed tickle info for %s:%u from vnn %u\n",
inet_ntoa(tcp->daddr.sin_addr), ntohs(tcp->daddr.sin_port),
tcp->vnn));
DLIST_REMOVE(ctdb->tcp_list, tcp);
talloc_free(tcp);
}
return 0;
}
/*
called when a daemon restarts - wipes all tcp entries from that vnn
*/
int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
{
struct ctdb_tcp_list *tcp, *next;
for (tcp=ctdb->tcp_list;tcp;tcp=next) {
next = tcp->next;
if (tcp->vnn == vnn) {
DLIST_REMOVE(ctdb->tcp_list, tcp);
talloc_free(tcp);
}
/* and tell the new guy about any that he should have
from us */
if (tcp->vnn == ctdb->vnn) {
struct ctdb_control_tcp_vnn t;
TDB_DATA data;
t.vnn = tcp->vnn;
t.src = tcp->saddr;
t.dest = tcp->daddr;
data.dptr = (uint8_t *)&t;
data.dsize = sizeof(t);
ctdb_daemon_send_control(ctdb, vnn, 0,
CTDB_CONTROL_TCP_ADD,
0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
}
}
return 0;
}
/*
called when a client structure goes away - hook to remove
elements from the tcp_list in all daemons
*/
void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
{
while (client->tcp_list) {
TDB_DATA data;
struct ctdb_control_tcp_vnn p;
struct ctdb_tcp_list *tcp = client->tcp_list;
DLIST_REMOVE(client->tcp_list, tcp);
p.vnn = tcp->vnn;
p.src = tcp->saddr;
p.dest = tcp->daddr;
data.dptr = (uint8_t *)&p;
data.dsize = sizeof(p);
ctdb_daemon_send_control(client->ctdb, CTDB_BROADCAST_VNNMAP, 0,
CTDB_CONTROL_TCP_REMOVE,
0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
talloc_free(tcp);
}
}
/*
release all IPs on shutdown
*/
void ctdb_release_all_ips(struct ctdb_context *ctdb)
{
int i;
if (!ctdb->takeover.enabled) {
return;
}
for (i=0;i<ctdb->num_nodes;i++) {
struct ctdb_node *node = ctdb->nodes[i];
if (ctdb_sys_have_ip(node->public_address)) {
ctdb_event_script(ctdb, "releaseip %s %s %u",
ctdb->takeover.interface,
node->public_address,
node->public_netmask_bits);
}
}
}

View File

@ -1,381 +0,0 @@
/*
ctdb recovery code
Copyright (C) Ronnie Sahlberg 2007
Copyright (C) Andrew Tridgell 2007
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "system/network.h"
#include "system/filesys.h"
#include "system/wait.h"
#include "../include/ctdb_private.h"
#include "lib/events/events.h"
#include <net/ethernet.h>
#include <net/if_arp.h>
/*
send gratuitous arp reply after we have taken over an ip address
saddr is the address we are trying to claim
iface is the interface name we will be using to claim the address
*/
int ctdb_sys_send_arp(const struct sockaddr_in *saddr, const char *iface)
{
int s, ret;
struct sockaddr sa;
struct ether_header *eh;
struct arphdr *ah;
struct ifreq if_hwaddr;
unsigned char buffer[64]; /*minimum eth frame size */
char *ptr;
/* for now, we only handle AF_INET addresses */
if (saddr->sin_family != AF_INET) {
DEBUG(0,(__location__ " not an ipv4 address\n"));
return -1;
}
s = socket(AF_INET, SOCK_PACKET, htons(ETHERTYPE_ARP));
if (s == -1){
DEBUG(0,(__location__ " failed to open raw socket\n"));
return -1;
}
/* get the mac address */
strcpy(if_hwaddr.ifr_name, iface);
ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr);
if ( ret < 0 ) {
close(s);
DEBUG(0,(__location__ " ioctl failed\n"));
return -1;
}
if (if_hwaddr.ifr_hwaddr.sa_family != AF_LOCAL) {
close(s);
DEBUG(0,(__location__ " not an ethernet address\n"));
return -1;
}
memset(buffer, 0 , 64);
eh = (struct ether_header *)buffer;
memset(eh->ether_dhost, 0xff, ETH_ALEN);
memcpy(eh->ether_shost, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
eh->ether_type = htons(ETHERTYPE_ARP);
ah = (struct arphdr *)&buffer[sizeof(struct ether_header)];
ah->ar_hrd = htons(ARPHRD_ETHER);
ah->ar_pro = htons(ETH_P_IP);
ah->ar_hln = ETH_ALEN;
ah->ar_pln = 4;
/* send a gratious arp */
ah->ar_op = htons(ARPOP_REQUEST);
ptr = (char *)&ah[1];
memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
ptr+=ETH_ALEN;
memcpy(ptr, &saddr->sin_addr, 4);
ptr+=4;
memset(ptr, 0, ETH_ALEN);
ptr+=ETH_ALEN;
memcpy(ptr, &saddr->sin_addr, 4);
ptr+=4;
strncpy(sa.sa_data, iface, sizeof(sa.sa_data));
ret = sendto(s, buffer, 64, 0, &sa, sizeof(sa));
if (ret < 0 ){
close(s);
DEBUG(0,(__location__ " failed sendto\n"));
return -1;
}
/* send unsolicited arp reply broadcast */
ah->ar_op = htons(ARPOP_REPLY);
ptr = (char *)&ah[1];
memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
ptr+=ETH_ALEN;
memcpy(ptr, &saddr->sin_addr, 4);
ptr+=4;
memcpy(ptr, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN);
ptr+=ETH_ALEN;
memcpy(ptr, &saddr->sin_addr, 4);
ptr+=4;
strncpy(sa.sa_data, iface, sizeof(sa.sa_data));
ret = sendto(s, buffer, 64, 0, &sa, sizeof(sa));
if (ret < 0 ){
DEBUG(0,(__location__ " failed sendto\n"));
return -1;
}
close(s);
return 0;
}
/*
uint16 checksum for n bytes
*/
static uint32_t uint16_checksum(uint16_t *data, size_t n)
{
uint32_t sum=0;
while (n>=2) {
sum += (uint32_t)ntohs(*data);
data++;
n -= 2;
}
if (n == 1) {
sum += (uint32_t)ntohs(*(uint8_t *)data);
}
return sum;
}
/*
simple TCP checksum - assumes data is multiple of 2 bytes long
*/
static uint16_t tcp_checksum(uint16_t *data, size_t n, struct iphdr *ip)
{
uint32_t sum = uint16_checksum(data, n);
uint16_t sum2;
sum += uint16_checksum((uint16_t *)&ip->saddr, sizeof(ip->saddr));
sum += uint16_checksum((uint16_t *)&ip->daddr, sizeof(ip->daddr));
sum += ip->protocol + n;
sum = (sum & 0xFFFF) + (sum >> 16);
sum = (sum & 0xFFFF) + (sum >> 16);
sum2 = htons(sum);
sum2 = ~sum2;
if (sum2 == 0) {
return 0xFFFF;
}
return sum2;
}
/*
send tcp ack packet from the specified IP/port to the specified
destination IP/port.
This is used to trigger the receiving host into sending its own ACK,
which should trigger early detection of TCP reset by the client
after IP takeover
*/
int ctdb_sys_send_ack(const struct sockaddr_in *dest,
const struct sockaddr_in *src)
{
int s, ret;
uint32_t one = 1;
struct {
struct iphdr ip;
struct tcphdr tcp;
} pkt;
/* for now, we only handle AF_INET addresses */
if (src->sin_family != AF_INET || dest->sin_family != AF_INET) {
DEBUG(0,(__location__ " not an ipv4 address\n"));
return -1;
}
s = socket(AF_INET, SOCK_RAW, htons(IPPROTO_RAW));
if (s == -1) {
DEBUG(0,(__location__ " failed to open raw socket (%s)\n",
strerror(errno)));
return -1;
}
ret = setsockopt(s, SOL_IP, IP_HDRINCL, &one, sizeof(one));
if (ret != 0) {
DEBUG(0,(__location__ " failed to setup IP headers (%s)\n",
strerror(errno)));
close(s);
return -1;
}
ZERO_STRUCT(pkt);
pkt.ip.version = 4;
pkt.ip.ihl = sizeof(pkt.ip)/4;
pkt.ip.tot_len = htons(sizeof(pkt));
pkt.ip.ttl = 255;
pkt.ip.protocol = IPPROTO_TCP;
pkt.ip.saddr = src->sin_addr.s_addr;
pkt.ip.daddr = dest->sin_addr.s_addr;
pkt.ip.check = 0;
pkt.tcp.source = src->sin_port;
pkt.tcp.dest = dest->sin_port;
pkt.tcp.ack = 1;
pkt.tcp.doff = sizeof(pkt.tcp)/4;
pkt.tcp.window = htons(1234);
pkt.tcp.check = tcp_checksum((uint16_t *)&pkt.tcp, sizeof(pkt.tcp), &pkt.ip);
ret = sendto(s, &pkt, sizeof(pkt), 0, dest, sizeof(*dest));
if (ret != sizeof(pkt)) {
DEBUG(0,(__location__ " failed sendto (%s)\n", strerror(errno)));
close(s);
return -1;
}
close(s);
return 0;
}
/*
see if we currently have an interface with the given IP
we try to bind to it, and if that fails then we don't have that IP
on an interface
*/
bool ctdb_sys_have_ip(const char *ip)
{
struct sockaddr_in sin;
int s;
int ret;
sin.sin_port = 0;
inet_aton(ip, &sin.sin_addr);
sin.sin_family = AF_INET;
s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
if (s == -1) {
return false;
}
ret = bind(s, (struct sockaddr *)&sin, sizeof(sin));
close(s);
return ret == 0;
}
/*
run the event script - varargs version
*/
static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *fmt, va_list ap)
{
char *options, *cmdstr;
int ret;
va_list ap2;
struct stat st;
if (stat(ctdb->takeover.event_script, &st) != 0 &&
errno == ENOENT) {
DEBUG(0,("No event script found at '%s'\n", ctdb->takeover.event_script));
return 0;
}
va_copy(ap2, ap);
options = talloc_vasprintf(ctdb, fmt, ap2);
va_end(ap2);
CTDB_NO_MEMORY(ctdb, options);
cmdstr = talloc_asprintf(ctdb, "%s %s", ctdb->takeover.event_script, options);
CTDB_NO_MEMORY(ctdb, cmdstr);
ret = system(cmdstr);
if (ret != -1) {
ret = WEXITSTATUS(ret);
}
talloc_free(cmdstr);
talloc_free(options);
return ret;
}
/*
run the event script
*/
int ctdb_event_script(struct ctdb_context *ctdb, const char *fmt, ...)
{
va_list ap;
int ret;
va_start(ap, fmt);
ret = ctdb_event_script_v(ctdb, fmt, ap);
va_end(ap);
return ret;
}
struct ctdb_event_script_state {
struct ctdb_context *ctdb;
pid_t child;
void (*callback)(struct ctdb_context *, int);
int fd[2];
};
/* called when child is finished */
static void ctdb_event_script_handler(struct event_context *ev, struct fd_event *fde,
uint16_t flags, void *p)
{
struct ctdb_event_script_state *state =
talloc_get_type(p, struct ctdb_event_script_state);
int status = -1;
waitpid(state->child, &status, 0);
if (status != -1) {
status = WEXITSTATUS(status);
}
state->callback(state->ctdb, status);
talloc_free(state);
}
/*
run the event script in the background, calling the callback when
finished
*/
int ctdb_event_script_callback(struct ctdb_context *ctdb,
void (*callback)(struct ctdb_context *, int),
const char *fmt, ...)
{
struct ctdb_event_script_state *state;
va_list ap;
int ret;
state = talloc(ctdb, struct ctdb_event_script_state);
CTDB_NO_MEMORY(ctdb, state);
state->ctdb = ctdb;
state->callback = callback;
ret = pipe(state->fd);
if (ret != 0) {
talloc_free(state);
return -1;
}
state->child = fork();
if (state->child == (pid_t)-1) {
close(state->fd[0]);
close(state->fd[1]);
talloc_free(state);
return -1;
}
if (state->child == 0) {
close(state->fd[0]);
va_start(ap, fmt);
ret = ctdb_event_script_v(ctdb, fmt, ap);
va_end(ap);
_exit(ret);
}
close(state->fd[1]);
event_add_fd(ctdb->ev, state, state->fd[0], EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
ctdb_event_script_handler, state);
return 0;
}

View File

@ -1,54 +0,0 @@
/*
ctdb database library
Copyright (C) Andrew Tridgell 2006
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
/* ctdb_tcp main state */
struct ctdb_tcp {
int listen_fd;
};
/*
state associated with an incoming connection
*/
struct ctdb_incoming {
struct ctdb_context *ctdb;
int fd;
struct ctdb_queue *queue;
};
/*
state associated with one tcp node
*/
struct ctdb_tcp_node {
int fd;
struct ctdb_queue *out_queue;
struct fd_event *connect_fde;
struct timed_event *connect_te;
};
/* prototypes internal to tcp transport */
int ctdb_tcp_queue_pkt(struct ctdb_node *node, uint8_t *data, uint32_t length);
int ctdb_tcp_listen(struct ctdb_context *ctdb);
void ctdb_tcp_node_connect(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data);
void ctdb_tcp_read_cb(uint8_t *data, size_t cnt, void *args);
void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data);
#define CTDB_TCP_ALIGNMENT 8

View File

@ -1,370 +0,0 @@
/*
ctdb over TCP
Copyright (C) Andrew Tridgell 2006
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "lib/events/events.h"
#include "../tdb/include/tdb.h"
#include "system/network.h"
#include "system/filesys.h"
#include "../include/ctdb_private.h"
#include "ctdb_tcp.h"
/*
called when a complete packet has come in - should not happen on this socket
*/
void ctdb_tcp_tnode_cb(uint8_t *data, size_t cnt, void *private_data)
{
struct ctdb_node *node = talloc_get_type(private_data, struct ctdb_node);
struct ctdb_tcp_node *tnode = talloc_get_type(
node->private_data, struct ctdb_tcp_node);
if (data == NULL) {
node->ctdb->upcalls->node_dead(node);
}
/* start a new connect cycle to try to re-establish the
link */
ctdb_queue_set_fd(tnode->out_queue, -1);
tnode->fd = -1;
event_add_timed(node->ctdb->ev, tnode, timeval_zero(),
ctdb_tcp_node_connect, node);
}
/*
called when socket becomes writeable on connect
*/
static void ctdb_node_connect_write(struct event_context *ev, struct fd_event *fde,
uint16_t flags, void *private_data)
{
struct ctdb_node *node = talloc_get_type(private_data,
struct ctdb_node);
struct ctdb_tcp_node *tnode = talloc_get_type(node->private_data,
struct ctdb_tcp_node);
struct ctdb_context *ctdb = node->ctdb;
int error = 0;
socklen_t len = sizeof(error);
int one = 1;
talloc_free(tnode->connect_te);
tnode->connect_te = NULL;
if (getsockopt(tnode->fd, SOL_SOCKET, SO_ERROR, &error, &len) != 0 ||
error != 0) {
talloc_free(fde);
close(tnode->fd);
tnode->fd = -1;
event_add_timed(ctdb->ev, tnode, timeval_current_ofs(1, 0),
ctdb_tcp_node_connect, node);
return;
}
talloc_free(fde);
setsockopt(tnode->fd,IPPROTO_TCP,TCP_NODELAY,(char *)&one,sizeof(one));
setsockopt(tnode->fd,SOL_SOCKET,SO_KEEPALIVE,(char *)&one,sizeof(one));
ctdb_queue_set_fd(tnode->out_queue, tnode->fd);
/* tell the ctdb layer we are connected */
node->ctdb->upcalls->node_connected(node);
}
static int ctdb_tcp_get_address(struct ctdb_context *ctdb,
const char *address, struct in_addr *addr)
{
if (inet_pton(AF_INET, address, addr) <= 0) {
struct hostent *he = gethostbyname(address);
if (he == NULL || he->h_length > sizeof(*addr)) {
ctdb_set_error(ctdb, "invalid nework address '%s'\n",
address);
return -1;
}
memcpy(addr, he->h_addr, he->h_length);
}
return 0;
}
/*
called when we should try and establish a tcp connection to a node
*/
void ctdb_tcp_node_connect(struct event_context *ev, struct timed_event *te,
struct timeval t, void *private_data)
{
struct ctdb_node *node = talloc_get_type(private_data,
struct ctdb_node);
struct ctdb_tcp_node *tnode = talloc_get_type(node->private_data,
struct ctdb_tcp_node);
struct ctdb_context *ctdb = node->ctdb;
struct sockaddr_in sock_in;
struct sockaddr_in sock_out;
if (tnode->fd != -1) {
talloc_free(tnode->connect_fde);
tnode->connect_fde = NULL;
close(tnode->fd);
tnode->fd = -1;
}
tnode->fd = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
set_nonblocking(tnode->fd);
set_close_on_exec(tnode->fd);
ZERO_STRUCT(sock_out);
#ifdef HAVE_SOCK_SIN_LEN
sock_out.sin_len = sizeof(sock_out);
#endif
if (ctdb_tcp_get_address(ctdb, node->address.address, &sock_out.sin_addr) != 0) {
return;
}
sock_out.sin_port = htons(node->address.port);
sock_out.sin_family = PF_INET;
/* Bind our side of the socketpair to the same address we use to listen
* on incoming CTDB traffic.
* We must specify this address to make sure that the address we expose to
* the remote side is actually routable in case CTDB traffic will run on
* a dedicated non-routeable network.
*/
ZERO_STRUCT(sock_in);
#ifdef HAVE_SOCK_SIN_LEN
sock_in.sin_len = sizeof(sock_in);
#endif
if (ctdb_tcp_get_address(ctdb, ctdb->address.address, &sock_in.sin_addr) != 0) {
return;
}
sock_in.sin_port = htons(0); /* INPORT_ANY is not always available */
sock_in.sin_family = PF_INET;
bind(tnode->fd, (struct sockaddr *)&sock_in, sizeof(sock_in));
if (connect(tnode->fd, (struct sockaddr *)&sock_out, sizeof(sock_out)) != 0 &&
errno != EINPROGRESS) {
/* try again once a second */
close(tnode->fd);
tnode->fd = -1;
event_add_timed(ctdb->ev, tnode, timeval_current_ofs(1, 0),
ctdb_tcp_node_connect, node);
return;
}
/* non-blocking connect - wait for write event */
tnode->connect_fde = event_add_fd(node->ctdb->ev, tnode, tnode->fd,
EVENT_FD_WRITE|EVENT_FD_READ,
ctdb_node_connect_write, node);
/* don't give it long to connect - retry in one second. This ensures
that we find a node is up quickly (tcp normally backs off a syn reply
delay by quite a lot) */
tnode->connect_te = event_add_timed(ctdb->ev, tnode, timeval_current_ofs(1, 0),
ctdb_tcp_node_connect, node);
}
/*
called when we get contacted by another node
currently makes no attempt to check if the connection is really from a ctdb
node in our cluster
*/
static void ctdb_listen_event(struct event_context *ev, struct fd_event *fde,
uint16_t flags, void *private_data)
{
struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
struct ctdb_tcp *ctcp = talloc_get_type(ctdb->private_data, struct ctdb_tcp);
struct sockaddr_in addr;
socklen_t len;
int fd, nodeid;
struct ctdb_incoming *in;
int one = 1;
const char *incoming_node;
memset(&addr, 0, sizeof(addr));
len = sizeof(addr);
fd = accept(ctcp->listen_fd, (struct sockaddr *)&addr, &len);
if (fd == -1) return;
incoming_node = inet_ntoa(addr.sin_addr);
for (nodeid=0;nodeid<ctdb->num_nodes;nodeid++) {
if (!strcmp(incoming_node, ctdb->nodes[nodeid]->address.address)) {
DEBUG(0, ("Incoming connection from node:%d %s\n",nodeid,incoming_node));
break;
}
}
if (nodeid>=ctdb->num_nodes) {
DEBUG(0, ("Refused connection from unknown node %s\n", incoming_node));
close(fd);
return;
}
in = talloc_zero(ctcp, struct ctdb_incoming);
in->fd = fd;
in->ctdb = ctdb;
set_nonblocking(in->fd);
set_close_on_exec(in->fd);
setsockopt(in->fd,SOL_SOCKET,SO_KEEPALIVE,(char *)&one,sizeof(one));
in->queue = ctdb_queue_setup(ctdb, in, in->fd, CTDB_TCP_ALIGNMENT,
ctdb_tcp_read_cb, in);
}
/*
automatically find which address to listen on
*/
static int ctdb_tcp_listen_automatic(struct ctdb_context *ctdb)
{
struct ctdb_tcp *ctcp = talloc_get_type(ctdb->private_data,
struct ctdb_tcp);
struct sockaddr_in sock;
int lock_fd, i;
const char *lock_path = "/tmp/.ctdb_socket_lock";
struct flock lock;
/* in order to ensure that we don't get two nodes with the
same adddress, we must make the bind() and listen() calls
atomic. The SO_REUSEADDR setsockopt only prevents double
binds if the first socket is in LISTEN state */
lock_fd = open(lock_path, O_RDWR|O_CREAT, 0666);
if (lock_fd == -1) {
DEBUG(0,("Unable to open %s\n", lock_path));
return -1;
}
lock.l_type = F_WRLCK;
lock.l_whence = SEEK_SET;
lock.l_start = 0;
lock.l_len = 1;
lock.l_pid = 0;
if (fcntl(lock_fd, F_SETLKW, &lock) != 0) {
DEBUG(0,("Unable to lock %s\n", lock_path));
close(lock_fd);
return -1;
}
for (i=0;i<ctdb->num_nodes;i++) {
ZERO_STRUCT(sock);
#ifdef HAVE_SOCK_SIN_LEN
sock.sin_len = sizeof(sock);
#endif
sock.sin_port = htons(ctdb->nodes[i]->address.port);
sock.sin_family = PF_INET;
if (ctdb_tcp_get_address(ctdb, ctdb->nodes[i]->address.address,
&sock.sin_addr) != 0) {
continue;
}
if (bind(ctcp->listen_fd, (struct sockaddr * )&sock,
sizeof(sock)) == 0) {
break;
}
}
if (i == ctdb->num_nodes) {
DEBUG(0,("Unable to bind to any of the node addresses - giving up\n"));
goto failed;
}
ctdb->address = ctdb->nodes[i]->address;
ctdb->name = talloc_asprintf(ctdb, "%s:%u",
ctdb->address.address,
ctdb->address.port);
ctdb->vnn = ctdb->nodes[i]->vnn;
ctdb->nodes[i]->flags &= ~NODE_FLAGS_DISCONNECTED;
DEBUG(1,("ctdb chose network address %s:%u vnn %u\n",
ctdb->address.address,
ctdb->address.port,
ctdb->vnn));
if (listen(ctcp->listen_fd, 10) == -1) {
goto failed;
}
event_add_fd(ctdb->ev, ctcp, ctcp->listen_fd, EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
ctdb_listen_event, ctdb);
close(lock_fd);
return 0;
failed:
close(lock_fd);
close(ctcp->listen_fd);
ctcp->listen_fd = -1;
return -1;
}
/*
listen on our own address
*/
int ctdb_tcp_listen(struct ctdb_context *ctdb)
{
struct ctdb_tcp *ctcp = talloc_get_type(ctdb->private_data,
struct ctdb_tcp);
struct sockaddr_in sock;
int one = 1;
ctcp->listen_fd = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
if (ctcp->listen_fd == -1) {
ctdb_set_error(ctdb, "socket failed\n");
return -1;
}
set_close_on_exec(ctcp->listen_fd);
setsockopt(ctcp->listen_fd,SOL_SOCKET,SO_REUSEADDR,(char *)&one,sizeof(one));
/* we can either auto-bind to the first available address, or we can
use a specified address */
if (!ctdb->address.address) {
return ctdb_tcp_listen_automatic(ctdb);
}
ZERO_STRUCT(sock);
#ifdef HAVE_SOCK_SIN_LEN
sock.sin_len = sizeof(sock);
#endif
sock.sin_port = htons(ctdb->address.port);
sock.sin_family = PF_INET;
if (ctdb_tcp_get_address(ctdb, ctdb->address.address,
&sock.sin_addr) != 0) {
goto failed;
}
if (bind(ctcp->listen_fd, (struct sockaddr * )&sock, sizeof(sock)) != 0) {
goto failed;
}
if (listen(ctcp->listen_fd, 10) == -1) {
goto failed;
}
event_add_fd(ctdb->ev, ctcp, ctcp->listen_fd, EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
ctdb_listen_event, ctdb);
return 0;
failed:
close(ctcp->listen_fd);
ctcp->listen_fd = -1;
return -1;
}

View File

@ -1,140 +0,0 @@
/*
ctdb over TCP
Copyright (C) Andrew Tridgell 2006
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "../tdb/include/tdb.h"
#include "lib/events/events.h"
#include "system/network.h"
#include "system/filesys.h"
#include "../include/ctdb_private.h"
#include "ctdb_tcp.h"
/*
initialise tcp portion of a ctdb node
*/
static int ctdb_tcp_add_node(struct ctdb_node *node)
{
struct ctdb_tcp *ctcp = talloc_get_type(node->ctdb->private_data,
struct ctdb_tcp);
struct ctdb_tcp_node *tnode;
tnode = talloc_zero(ctcp, struct ctdb_tcp_node);
CTDB_NO_MEMORY(node->ctdb, tnode);
tnode->fd = -1;
node->private_data = tnode;
tnode->out_queue = ctdb_queue_setup(node->ctdb, ctcp, tnode->fd, CTDB_TCP_ALIGNMENT,
ctdb_tcp_tnode_cb, node);
return 0;
}
/*
initialise transport structures
*/
static int ctdb_tcp_initialise(struct ctdb_context *ctdb)
{
int i;
/* listen on our own address */
if (ctdb_tcp_listen(ctdb) != 0) return -1;
for (i=0; i<ctdb->num_nodes; i++) {
if (ctdb_tcp_add_node(ctdb->nodes[i]) != 0) {
DEBUG(0, ("methods->add_node failed at %d\n", i));
return -1;
}
}
return 0;
}
/*
start the protocol going
*/
static int ctdb_tcp_start(struct ctdb_context *ctdb)
{
int i;
/* startup connections to the other servers - will happen on
next event loop */
for (i=0;i<ctdb->num_nodes;i++) {
struct ctdb_node *node = *(ctdb->nodes + i);
struct ctdb_tcp_node *tnode = talloc_get_type(
node->private_data, struct ctdb_tcp_node);
if (!ctdb_same_address(&ctdb->address, &node->address)) {
event_add_timed(ctdb->ev, tnode, timeval_zero(),
ctdb_tcp_node_connect, node);
}
}
return 0;
}
/*
shutdown the transport
*/
static void ctdb_tcp_shutdown(struct ctdb_context *ctdb)
{
struct ctdb_tcp *ctcp = talloc_get_type(ctdb->private_data,
struct ctdb_tcp);
talloc_free(ctcp);
ctdb->private_data = NULL;
}
/*
transport packet allocator - allows transport to control memory for packets
*/
static void *ctdb_tcp_allocate_pkt(TALLOC_CTX *mem_ctx, size_t size)
{
/* tcp transport needs to round to 8 byte alignment to ensure
that we can use a length header and 64 bit elements in
structures */
size = (size+(CTDB_TCP_ALIGNMENT-1)) & ~(CTDB_TCP_ALIGNMENT-1);
return talloc_size(mem_ctx, size);
}
static const struct ctdb_methods ctdb_tcp_methods = {
.initialise = ctdb_tcp_initialise,
.start = ctdb_tcp_start,
.queue_pkt = ctdb_tcp_queue_pkt,
.add_node = ctdb_tcp_add_node,
.allocate_pkt = ctdb_tcp_allocate_pkt,
.shutdown = ctdb_tcp_shutdown,
};
/*
initialise tcp portion of ctdb
*/
int ctdb_tcp_init(struct ctdb_context *ctdb)
{
struct ctdb_tcp *ctcp;
ctcp = talloc_zero(ctdb, struct ctdb_tcp);
CTDB_NO_MEMORY(ctdb, ctcp);
ctcp->listen_fd = -1;
ctdb->private_data = ctcp;
ctdb->methods = &ctdb_tcp_methods;
return 0;
}

View File

@ -1,89 +0,0 @@
/*
ctdb over TCP
Copyright (C) Andrew Tridgell 2006
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "lib/events/events.h"
#include "lib/util/dlinklist.h"
#include "../tdb/include/tdb.h"
#include "system/network.h"
#include "system/filesys.h"
#include "../include/ctdb_private.h"
#include "ctdb_tcp.h"
/*
called when a complete packet has come in
*/
void ctdb_tcp_read_cb(uint8_t *data, size_t cnt, void *args)
{
struct ctdb_incoming *in = talloc_get_type(args, struct ctdb_incoming);
struct ctdb_req_header *hdr = (struct ctdb_req_header *)data;
if (data == NULL) {
/* incoming socket has died */
goto failed;
}
if (cnt < sizeof(*hdr)) {
DEBUG(0,(__location__ " Bad packet length %u\n", (unsigned)cnt));
goto failed;
}
if (cnt & (CTDB_TCP_ALIGNMENT-1)) {
DEBUG(0,(__location__ " Length 0x%x not multiple of alignment\n",
(unsigned)cnt));
goto failed;
}
if (cnt != hdr->length) {
DEBUG(0,(__location__ " Bad header length %u expected %u\n",
(unsigned)hdr->length, (unsigned)cnt));
goto failed;
}
if (hdr->ctdb_magic != CTDB_MAGIC) {
DEBUG(0,(__location__ " Non CTDB packet 0x%x rejected\n",
hdr->ctdb_magic));
goto failed;
}
if (hdr->ctdb_version != CTDB_VERSION) {
DEBUG(0, (__location__ " Bad CTDB version 0x%x rejected\n",
hdr->ctdb_version));
goto failed;
}
/* tell the ctdb layer above that we have a packet */
in->ctdb->upcalls->recv_pkt(in->ctdb, data, cnt);
return;
failed:
talloc_free(in);
}
/*
queue a packet for sending
*/
int ctdb_tcp_queue_pkt(struct ctdb_node *node, uint8_t *data, uint32_t length)
{
struct ctdb_tcp_node *tnode = talloc_get_type(node->private_data,
struct ctdb_tcp_node);
return ctdb_queue_send(tnode->out_queue, data, length);
}

View File

@ -1,24 +0,0 @@
#!/bin/sh
killall -q ctdb_bench ctdbd
NUMNODES=2
if [ $# -gt 0 ]; then
NUMNODES=$1
fi
rm -f nodes.txt
for i in `seq 1 $NUMNODES`; do
echo 127.0.0.$i >> nodes.txt
done
tests/start_daemons.sh $NUMNODES nodes.txt || exit 1
killall -9 ctdb_bench
echo "Trying $NUMNODES nodes"
for i in `seq 1 $NUMNODES`; do
valgrind -q $VALGRIND bin/ctdb_bench --socket sock.$i -n $NUMNODES $* &
done
wait
bin/ctdb shutdown --socket sock.1 -n all

View File

@ -1,229 +0,0 @@
/*
simple ctdb benchmark
Copyright (C) Andrew Tridgell 2006
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "lib/events/events.h"
#include "system/filesys.h"
#include "popt.h"
#include "cmdline.h"
#include <sys/time.h>
#include <time.h>
static struct timeval tp1,tp2;
static void start_timer(void)
{
gettimeofday(&tp1,NULL);
}
static double end_timer(void)
{
gettimeofday(&tp2,NULL);
return (tp2.tv_sec + (tp2.tv_usec*1.0e-6)) -
(tp1.tv_sec + (tp1.tv_usec*1.0e-6));
}
static int timelimit = 10;
static int num_records = 10;
static int num_nodes;
enum my_functions {FUNC_INCR=1, FUNC_FETCH=2};
/*
ctdb call function to increment an integer
*/
static int incr_func(struct ctdb_call_info *call)
{
if (call->record_data.dsize == 0) {
call->new_data = talloc(call, TDB_DATA);
if (call->new_data == NULL) {
return CTDB_ERR_NOMEM;
}
call->new_data->dptr = talloc_size(call, 4);
call->new_data->dsize = 4;
*(uint32_t *)call->new_data->dptr = 0;
} else {
call->new_data = &call->record_data;
}
(*(uint32_t *)call->new_data->dptr)++;
return 0;
}
/*
ctdb call function to fetch a record
*/
static int fetch_func(struct ctdb_call_info *call)
{
call->reply_data = &call->record_data;
return 0;
}
static int msg_count;
static int msg_plus, msg_minus;
/*
handler for messages in bench_ring()
*/
static void ring_message_handler(struct ctdb_context *ctdb, uint64_t srvid,
TDB_DATA data, void *private_data)
{
int incr = *(int *)data.dptr;
int *count = (int *)private_data;
int dest;
(*count)++;
dest = (ctdb_get_vnn(ctdb) + incr) % num_nodes;
ctdb_send_message(ctdb, dest, srvid, data);
if (incr == 1) {
msg_plus++;
} else {
msg_minus++;
}
}
/*
benchmark sending messages in a ring around the nodes
*/
static void bench_ring(struct ctdb_context *ctdb, struct event_context *ev)
{
int vnn=ctdb_get_vnn(ctdb);
if (vnn == 0) {
/* two messages are injected into the ring, moving
in opposite directions */
int dest, incr;
TDB_DATA data;
data.dptr = (uint8_t *)&incr;
data.dsize = sizeof(incr);
incr = 1;
dest = (ctdb_get_vnn(ctdb) + incr) % num_nodes;
ctdb_send_message(ctdb, dest, 0, data);
incr = -1;
dest = (ctdb_get_vnn(ctdb) + incr) % num_nodes;
ctdb_send_message(ctdb, dest, 0, data);
}
start_timer();
while (end_timer() < timelimit) {
if (vnn == 0 && msg_count % 10000 == 0) {
printf("Ring: %.2f msgs/sec (+ve=%d -ve=%d)\r",
msg_count/end_timer(), msg_plus, msg_minus);
fflush(stdout);
}
event_loop_once(ev);
}
printf("Ring: %.2f msgs/sec (+ve=%d -ve=%d)\n",
msg_count/end_timer(), msg_plus, msg_minus);
}
/*
handler for reconfigure message
*/
static void reconfigure_handler(struct ctdb_context *ctdb, uint64_t srvid,
TDB_DATA data, void *private_data)
{
int *ready = (int *)private_data;
*ready = 1;
}
/*
main program
*/
int main(int argc, const char *argv[])
{
struct ctdb_context *ctdb;
struct ctdb_db_context *ctdb_db;
struct poptOption popt_options[] = {
POPT_AUTOHELP
POPT_CTDB_CMDLINE
{ "timelimit", 't', POPT_ARG_INT, &timelimit, 0, "timelimit", "integer" },
{ "num-records", 'r', POPT_ARG_INT, &num_records, 0, "num_records", "integer" },
{ NULL, 'n', POPT_ARG_INT, &num_nodes, 0, "num_nodes", "integer" },
POPT_TABLEEND
};
int opt;
const char **extra_argv;
int extra_argc = 0;
int ret;
poptContext pc;
struct event_context *ev;
int cluster_ready=0;
pc = poptGetContext(argv[0], argc, argv, popt_options, POPT_CONTEXT_KEEP_FIRST);
while ((opt = poptGetNextOpt(pc)) != -1) {
switch (opt) {
default:
fprintf(stderr, "Invalid option %s: %s\n",
poptBadOption(pc, 0), poptStrerror(opt));
exit(1);
}
}
/* setup the remaining options for the main program to use */
extra_argv = poptGetArgs(pc);
if (extra_argv) {
extra_argv++;
while (extra_argv[extra_argc]) extra_argc++;
}
ev = s4_event_context_init(NULL);
/* initialise ctdb */
ctdb = ctdb_cmdline_client(ev);
ctdb_set_message_handler(ctdb, CTDB_SRVID_RECONFIGURE, reconfigure_handler,
&cluster_ready);
/* attach to a specific database */
ctdb_db = ctdb_attach(ctdb, "test.tdb");
if (!ctdb_db) {
printf("ctdb_attach failed - %s\n", ctdb_errstr(ctdb));
exit(1);
}
/* setup a ctdb call function */
ret = ctdb_set_call(ctdb_db, incr_func, FUNC_INCR);
ret = ctdb_set_call(ctdb_db, fetch_func, FUNC_FETCH);
if (ctdb_set_message_handler(ctdb, 0, ring_message_handler,&msg_count))
goto error;
printf("Waiting for cluster\n");
while (1) {
uint32_t recmode=1;
ctdb_ctrl_getrecmode(ctdb, timeval_zero(), CTDB_CURRENT_NODE, &recmode);
if (recmode == 0) break;
event_loop_once(ev);
}
bench_ring(ctdb, ev);
error:
return 0;
}

View File

@ -1,262 +0,0 @@
/*
simple ctdb benchmark
Copyright (C) Andrew Tridgell 2006
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "lib/events/events.h"
#include "system/filesys.h"
#include "popt.h"
#include "cmdline.h"
#include <sys/time.h>
#include <time.h>
static struct timeval tp1,tp2;
static void start_timer(void)
{
gettimeofday(&tp1,NULL);
}
static double end_timer(void)
{
gettimeofday(&tp2,NULL);
return (tp2.tv_sec + (tp2.tv_usec*1.0e-6)) -
(tp1.tv_sec + (tp1.tv_usec*1.0e-6));
}
static int timelimit = 10;
static int num_records = 10;
static int num_nodes;
static int msg_count;
#define TESTKEY "testkey"
/*
fetch a record
store a expanded record
send a message to next node to tell it to do the same
*/
static void bench_fetch_1node(struct ctdb_context *ctdb)
{
TDB_DATA key, data, nulldata;
struct ctdb_db_context *ctdb_db;
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
int dest, ret;
struct ctdb_record_handle *h;
key.dptr = discard_const(TESTKEY);
key.dsize = strlen(TESTKEY);
ctdb_db = ctdb_db_handle(ctdb, "test.tdb");
h = ctdb_fetch_lock(ctdb_db, tmp_ctx, key, &data);
if (h == NULL) {
printf("Failed to fetch record '%s' on node %d\n",
(const char *)key.dptr, ctdb_get_vnn(ctdb));
talloc_free(tmp_ctx);
return;
}
if (data.dsize > 1000) {
data.dsize = 0;
}
if (data.dsize == 0) {
data.dptr = (uint8_t *)talloc_asprintf(tmp_ctx, "Test data\n");
}
data.dptr = (uint8_t *)talloc_asprintf_append((char *)data.dptr,
"msg_count=%d on node %d\n",
msg_count, ctdb_get_vnn(ctdb));
data.dsize = strlen((const char *)data.dptr)+1;
ret = ctdb_record_store(h, data);
talloc_free(h);
if (ret != 0) {
printf("Failed to store record\n");
}
talloc_free(tmp_ctx);
/* tell the next node to do the same */
nulldata.dptr = NULL;
nulldata.dsize = 0;
dest = (ctdb_get_vnn(ctdb) + 1) % num_nodes;
ctdb_send_message(ctdb, dest, 0, nulldata);
}
/*
handler for messages in bench_ring()
*/
static void message_handler(struct ctdb_context *ctdb, uint64_t srvid,
TDB_DATA data, void *private_data)
{
msg_count++;
bench_fetch_1node(ctdb);
}
/*
benchmark the following:
fetch a record
store a expanded record
send a message to next node to tell it to do the same
*/
static void bench_fetch(struct ctdb_context *ctdb, struct event_context *ev)
{
int vnn=ctdb_get_vnn(ctdb);
if (vnn == num_nodes - 1) {
bench_fetch_1node(ctdb);
}
start_timer();
while (end_timer() < timelimit) {
if (vnn == 0 && msg_count % 100 == 0) {
printf("Fetch: %.2f msgs/sec\r", msg_count/end_timer());
fflush(stdout);
}
if (event_loop_once(ev) != 0) {
printf("Event loop failed!\n");
break;
}
}
printf("Fetch: %.2f msgs/sec\n", msg_count/end_timer());
}
enum my_functions {FUNC_FETCH=1};
/*
ctdb call function to fetch a record
*/
static int fetch_func(struct ctdb_call_info *call)
{
call->reply_data = &call->record_data;
return 0;
}
/*
handler for reconfigure message
*/
static void reconfigure_handler(struct ctdb_context *ctdb, uint64_t srvid,
TDB_DATA data, void *private_data)
{
int *ready = (int *)private_data;
*ready = 1;
}
/*
main program
*/
int main(int argc, const char *argv[])
{
struct ctdb_context *ctdb;
struct ctdb_db_context *ctdb_db;
struct poptOption popt_options[] = {
POPT_AUTOHELP
POPT_CTDB_CMDLINE
{ "timelimit", 't', POPT_ARG_INT, &timelimit, 0, "timelimit", "integer" },
{ "num-records", 'r', POPT_ARG_INT, &num_records, 0, "num_records", "integer" },
{ NULL, 'n', POPT_ARG_INT, &num_nodes, 0, "num_nodes", "integer" },
POPT_TABLEEND
};
int opt;
const char **extra_argv;
int extra_argc = 0;
int ret;
poptContext pc;
struct event_context *ev;
struct ctdb_call call;
int cluster_ready=0;
pc = poptGetContext(argv[0], argc, argv, popt_options, POPT_CONTEXT_KEEP_FIRST);
while ((opt = poptGetNextOpt(pc)) != -1) {
switch (opt) {
default:
fprintf(stderr, "Invalid option %s: %s\n",
poptBadOption(pc, 0), poptStrerror(opt));
exit(1);
}
}
/* talloc_enable_leak_report_full(); */
/* setup the remaining options for the main program to use */
extra_argv = poptGetArgs(pc);
if (extra_argv) {
extra_argv++;
while (extra_argv[extra_argc]) extra_argc++;
}
ev = s4_event_context_init(NULL);
ctdb = ctdb_cmdline_client(ev);
ctdb_set_message_handler(ctdb, CTDB_SRVID_RECONFIGURE, reconfigure_handler,
&cluster_ready);
/* attach to a specific database */
ctdb_db = ctdb_attach(ctdb, "test.tdb");
if (!ctdb_db) {
printf("ctdb_attach failed - %s\n", ctdb_errstr(ctdb));
exit(1);
}
ret = ctdb_set_call(ctdb_db, fetch_func, FUNC_FETCH);
ctdb_set_message_handler(ctdb, 0, message_handler, &msg_count);
printf("Waiting for cluster\n");
while (1) {
uint32_t recmode=1;
ctdb_ctrl_getrecmode(ctdb, timeval_zero(), CTDB_CURRENT_NODE, &recmode);
if (recmode == 0) break;
event_loop_once(ev);
}
bench_fetch(ctdb, ev);
ZERO_STRUCT(call);
call.key.dptr = discard_const(TESTKEY);
call.key.dsize = strlen(TESTKEY);
printf("Fetching final record\n");
/* fetch the record */
call.call_id = FUNC_FETCH;
call.call_data.dptr = NULL;
call.call_data.dsize = 0;
ret = ctdb_call(ctdb_db, &call);
if (ret == -1) {
printf("ctdb_call FUNC_FETCH failed - %s\n", ctdb_errstr(ctdb));
exit(1);
}
printf("DATA:\n%s\n", (char *)call.reply_data.dptr);
return 0;
}

View File

@ -1,156 +0,0 @@
/*
simple tool to create a lot of records on a tdb and to read them out
Copyright (C) Andrew Tridgell 2006
Ronnie sahlberg 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "includes.h"
#include "lib/events/events.h"
#include "system/filesys.h"
#include "popt.h"
#include "cmdline.h"
#include <sys/time.h>
#include <time.h>
static int num_records = 10;
static void store_records(struct ctdb_context *ctdb, struct event_context *ev)
{
TDB_DATA key, data;
struct ctdb_db_context *ctdb_db;
TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
int ret;
struct ctdb_record_handle *h;
uint32_t i;
ctdb_db = ctdb_db_handle(ctdb, "test.tdb");
printf("creating %d records\n", num_records);
for (i=0;i<num_records;i++) {
key.dptr = (uint8_t *)&i;
key.dsize = sizeof(uint32_t);
h = ctdb_fetch_lock(ctdb_db, tmp_ctx, key, &data);
if (h == NULL) {
printf("Failed to fetch record '%s' on node %d\n",
(const char *)key.dptr, ctdb_get_vnn(ctdb));
talloc_free(tmp_ctx);
return;
}
data.dptr = (uint8_t *)&i;
data.dsize = sizeof(uint32_t);
ret = ctdb_record_store(h, data);
talloc_free(h);
if (ret != 0) {
printf("Failed to store record\n");
}
if (i % 1000 == 0) {
printf("%u\r", i);
fflush(stdout);
}
}
printf("fetching all %d records\n", num_records);
while (1) {
for (i=0;i<num_records;i++) {
key.dptr = (uint8_t *)&i;
key.dsize = sizeof(uint32_t);
h = ctdb_fetch_lock(ctdb_db, tmp_ctx, key, &data);
if (h == NULL) {
printf("Failed to fetch record '%s' on node %d\n",
(const char *)key.dptr, ctdb_get_vnn(ctdb));
talloc_free(tmp_ctx);
return;
}
talloc_free(h);
}
sleep(1);
printf(".");
fflush(stdout);
}
talloc_free(tmp_ctx);
}
/*
main program
*/
int main(int argc, const char *argv[])
{
struct ctdb_context *ctdb;
struct ctdb_db_context *ctdb_db;
struct poptOption popt_options[] = {
POPT_AUTOHELP
POPT_CTDB_CMDLINE
{ "num-records", 'r', POPT_ARG_INT, &num_records, 0, "num_records", "integer" },
POPT_TABLEEND
};
int opt;
const char **extra_argv;
int extra_argc = 0;
poptContext pc;
struct event_context *ev;
pc = poptGetContext(argv[0], argc, argv, popt_options, POPT_CONTEXT_KEEP_FIRST);
while ((opt = poptGetNextOpt(pc)) != -1) {
switch (opt) {
default:
fprintf(stderr, "Invalid option %s: %s\n",
poptBadOption(pc, 0), poptStrerror(opt));
exit(1);
}
}
/* talloc_enable_leak_report_full(); */
/* setup the remaining options for the main program to use */
extra_argv = poptGetArgs(pc);
if (extra_argv) {
extra_argv++;
while (extra_argv[extra_argc]) extra_argc++;
}
ev = s4_event_context_init(NULL);
ctdb = ctdb_cmdline_client(ev);
/* attach to a specific database */
ctdb_db = ctdb_attach(ctdb, "test.tdb");
if (!ctdb_db) {
printf("ctdb_attach failed - %s\n", ctdb_errstr(ctdb));
exit(1);
}
printf("Waiting for cluster\n");
while (1) {
uint32_t recmode=1;
ctdb_ctrl_getrecmode(ctdb, timeval_zero(), CTDB_CURRENT_NODE, &recmode);
if (recmode == 0) break;
event_loop_once(ev);
}
store_records(ctdb, ev);
return 0;
}

View File

@ -1,50 +0,0 @@
#!/bin/sh
killall -q ctdbd
tests/start_daemons.sh 2 tests/nodes.txt || exit 1
echo "Testing ping"
$VALGRIND bin/ctdb ping || exit 1
echo "Testing status"
$VALGRIND bin/ctdb status || exit 1
echo "Testing statistics"
$VALGRIND bin/ctdb -n all statistics || exit 1
echo "Testing statisticsreset"
$VALGRIND bin/ctdb -n all statisticsreset || exit 1
echo "Testing debug"
$VALGRIND bin/ctdb -n all setdebug 3 || exit 1
$VALGRIND bin/ctdb -n all getdebug || exit 1
$VALGRIND bin/ctdb -n all setdebug 0 || exit 1
$VALGRIND bin/ctdb -n all getdebug || exit 1
echo "Attaching to some databases"
$VALGRIND bin/ctdb attach test1.tdb || exit 1
$VALGRIND bin/ctdb attach test2.tdb || exit 1
echo "Testing getdbmap"
$VALGRIND bin/ctdb getdbmap || exit 1
echo "Testing status"
$VALGRIND bin/ctdb status || exit 1
echo "Testing variables"
$VALGRIND bin/ctdb listvars || exit 1
$VALGRIND bin/ctdb getvar TraverseTimeout || exit 1
$VALGRIND bin/ctdb setvar TraverseTimeout 10 || exit 1
$VALGRIND bin/ctdb getvar TraverseTimeout || exit 1
sleep 1
echo "Testing shutdown"
$VALGRIND bin/ctdb shutdown -n all || exit 1
sleep 1
echo "All done"
killall -q ctdbd
exit 0

View File

@ -1,68 +0,0 @@
#!/bin/sh
# event script for 'make test'
cmd="$1"
shift
case $cmd in
monitor)
echo "`date` monitor event"
exit 0
;;
startup)
echo "`date` ctdb startup event"
exit 0;
;;
takeip)
if [ $# != 3 ]; then
echo "must supply interface, IP and maskbits"
exit 1
fi
iface=$1
ip=$2
maskbits=$3
[ `id -u` = 0 ] && {
/sbin/ip addr add $ip/$maskbits dev $iface || {
echo "`/bin/date` Failed to add $ip/$maskbits on dev $iface"
exit 1
}
}
exit 0;
;;
##################################################
# called when ctdbd wants to release an IP address
releaseip)
if [ $# != 3 ]; then
echo "`/bin/date` must supply interface, IP and maskbits"
exit 1
fi
iface=$1
ip=$2
maskbits=$3
[ `id -u` = 0 ] && {
/sbin/ip addr del $ip/$maskbits dev $iface || {
echo "`/bin/date` Failed to del $ip on dev $iface"
exit 1
}
}
echo "`date` ctdb takeip event for $1 $2 $3"
exit 0
;;
recovered)
echo "`date` ctdb recovered event"
exit 0
;;
shutdown)
echo "`date` ctdb shutdown event"
exit 0
;;
esac
echo "`/bin/date` Invalid command $cmd"
exit 1

View File

@ -1,24 +0,0 @@
#!/bin/sh
NUMNODES=2
if [ $# -gt 0 ]; then
NUMNODES=$1
fi
rm -f nodes.txt
for i in `seq 1 $NUMNODES`; do
echo 127.0.0.$i >> nodes.txt
done
tests/start_daemons.sh $NUMNODES nodes.txt || exit 1
killall -9 -q ctdb_fetch
for i in `seq 1 $NUMNODES`; do
$VALGRIND bin/ctdb_fetch --socket sock.$i -n $NUMNODES $* &
done
wait
echo "Shutting down"
bin/ctdb shutdown -n all --socket=sock.1
exit 0

View File

@ -1,4 +0,0 @@
127.0.0.1
127.0.0.2
127.0.0.3
127.0.0.4

View File

@ -1,4 +0,0 @@
10.99.99.0/24
10.99.99.1/24
10.99.99.2/24
10.99.99.3/24

View File

@ -1,107 +0,0 @@
#!/bin/sh
killall -q ctdbd
echo "Starting 4 ctdb daemons"
bin/ctdbd --recovery-daemon --nlist tests/4nodes.txt
bin/ctdbd --recovery-daemon --nlist tests/4nodes.txt --listen=127.0.0.2 --socket=/tmp/ctdb.socket.127.0.0.2
bin/ctdbd --recovery-daemon --nlist tests/4nodes.txt --listen=127.0.0.3 --socket=/tmp/ctdb.socket.127.0.0.3
bin/ctdbd --recovery-daemon --nlist tests/4nodes.txt --listen=127.0.0.4 --socket=/tmp/ctdb.socket.127.0.0.4
echo
echo "Attaching to some databases"
bin/ctdb_control attach test1.tdb || exit 1
bin/ctdb_control attach test2.tdb || exit 1
bin/ctdb_control attach test3.tdb || exit 1
bin/ctdb_control attach test4.tdb || exit 1
echo "Clearing all databases to make sure they are all empty"
bin/ctdb_control getdbmap 0 | egrep "^dbid:" | sed -e "s/^dbid://" -e "s/ .*$//" | while read DB; do
seq 0 3 | while read NODE; do
bin/ctdb_control cleardb $NODE $DB
done
done
echo
echo
echo "Printing all databases on all nodes. they should all be empty"
echo "============================================================="
bin/ctdb_control getdbmap 0 | egrep "^dbid:" | sed -e "s/^.*name://" -e "s/ .*$//" | while read DBNAME; do
seq 0 3 | while read NODE; do
echo "Content of DBNAME:$DBNAME NODE:$NODE :"
bin/ctdb_control catdb $DBNAME $NODE
done
done
echo
echo
echo "Populating the databases"
./bin/ctdb_control writerecord 0 0x220c2a7b testkey1 testdata1
./bin/ctdb_control setdmaster 0 0x220c2a7b 1
./bin/ctdb_control writerecord 1 0x220c2a7b testkey1 testdata1
./bin/ctdb_control writerecord 1 0x220c2a7b testkey1 testdata1
./bin/ctdb_control setdmaster 1 0x220c2a7b 2
./bin/ctdb_control writerecord 2 0x220c2a7b testkey1 testdata1
./bin/ctdb_control writerecord 2 0x220c2a7b testkey1 testdata1
./bin/ctdb_control writerecord 2 0x220c2a7b testkey1 testdata1
./bin/ctdb_control setdmaster 2 0x220c2a7b 3
./bin/ctdb_control writerecord 3 0x220c2a7b testkey1 testdata1
./bin/ctdb_control writerecord 3 0x220c2a7b testkey1 testdata1
./bin/ctdb_control writerecord 3 0x220c2a7b testkey1 testdata1
./bin/ctdb_control writerecord 3 0x220c2a7b testkey1 testdata1
./bin/ctdb_control setdmaster 3 0x220c2a7b 3
echo
echo
echo "Printing all databases on all nodes. there should be a record there"
echo "============================================================="
bin/ctdb_control getdbmap 0 | egrep "^dbid:" | sed -e "s/^.*name://" -e "s/ .*$//" | while read DBNAME; do
seq 0 3 | while read NODE; do
echo "Content of DBNAME:$DBNAME NODE:$NODE :"
bin/ctdb_control catdb $DBNAME $NODE
done
done
echo
echo
echo "killing off node #2"
echo "==================="
CTDBPID=`./bin/ctdb_control getpid 2 | sed -e "s/Pid://"`
kill $CTDBPID
sleep 1
echo
echo
echo "wait 3 seconds to let the recovery daemon do its job"
echo "===================================================="
sleep 3
echo
echo
echo "Printing all databases on all nodes."
echo "The databases should be the same now on all nodes"
echo "and the record will have been migrated to node 0"
echo "================================================="
echo "Node 0:"
bin/ctdb_control catdb test4.tdb 0
echo "Node 1:"
bin/ctdb_control catdb test4.tdb 1
echo "Node 3:"
bin/ctdb_control catdb test4.tdb 3
echo "nodemap:"
bin/ctdb_control getnodemap 0
echo
echo
echo "Traverse the cluster and dump the database"
bin/ctdb_control catdb test4.tdb
#leave the ctdb daemons running so one can look at the box in more detail
#killall -q ctdbd

View File

@ -1,8 +0,0 @@
#!/bin/sh
tests/fetch.sh 4 || exit 1
tests/bench.sh 4 || exit 1
tests/ctdbd.sh || exit 1
echo "All OK"
exit 0

View File

@ -1,28 +0,0 @@
#!/bin/sh
NUMNODES="$1"
NODES=$2
shift
shift
killall -q ctdbd
CTDB_OPTIONS="--reclock=rec.lock --nlist $NODES --event-script=tests/events --logfile=- --dbdir=test.db $*"
if [ `id -u` -eq 0 ]; then
CTDB_OPTIONS="$CTDB_OPTIONS --public-addresses=tests/public_addresses --public-interface=lo"
fi
echo "Starting $NUMNODES ctdb daemons"
for i in `seq 1 $NUMNODES`; do
$VALGRIND bin/ctdbd --socket=sock.$i $CTDB_OPTIONS || exit 1
done
ln -sf $PWD/sock.1 /tmp/ctdb.socket || exit 1
while bin/ctdb status | grep RECOVERY > /dev/null; do
echo "`date` Waiting for recovery"
sleep 1;
done
echo "$NUMNODES daemons started"
exit 0

File diff suppressed because it is too large Load Diff

View File

@ -1,43 +0,0 @@
#!/bin/sh
# onnode script for rsh
if [ $# -lt 2 ]; then
cat <<EOF
Usage: onnode <nodenum|all> <command>
EOF
exit 1
fi
NODE="$1"
shift
SCRIPT="$*"
NODES=/etc/ctdb/nodes
NUMNODES=`egrep '^[[:alnum:]]' $NODES | wc -l`
MAXNODE=`expr $NUMNODES - 1`
if [ $NODE = "all" ]; then
for a in `egrep '^[[:alnum:]]' $NODES`; do
if [ -f "$SCRIPT" ]; then
rsh $a at -f $SCRIPT now
else
rsh $a $SCRIPT
fi
done
exit 0
fi
if [ $NODE -gt $MAXNODE ]; then
echo "Node $NODE doesn't exist"
exit 1
fi
NODEPLUSONE=`expr $NODE + 1`
a=`egrep '^[[:alnum:]]' $NODES | head -$NODEPLUSONE | tail -1`
if [ -f "$SCRIPT" ]; then
exec rsh $a at -f $SCRIPT now
else
exec rsh $a $SCRIPT
fi

View File

@ -1,43 +0,0 @@
#!/bin/sh
# onnode script for ssh
if [ $# -lt 2 ]; then
cat <<EOF
Usage: onnode <nodenum|all> <command>
EOF
exit 1
fi
NODE="$1"
shift
SCRIPT="$*"
NODES=/etc/ctdb/nodes
NUMNODES=`egrep '^[[:alnum:]]' $NODES | wc -l`
MAXNODE=`expr $NUMNODES - 1`
if [ $NODE = "all" ]; then
for a in `egrep '^[[:alnum:]]' $NODES`; do
if [ -f "$SCRIPT" ]; then
ssh $a at -f $SCRIPT now
else
ssh $a $SCRIPT
fi
done
exit 0
fi
if [ $NODE -gt $MAXNODE ]; then
echo "Node $NODE doesn't exist"
exit 1
fi
NODEPLUSONE=`expr $NODE + 1`
a=`egrep '^[[:alnum:]]' $NODES | head -$NODEPLUSONE | tail -1`
if [ -f "$SCRIPT" ]; then
exec ssh $a at -f $SCRIPT now
else
exec ssh $a $SCRIPT
fi

View File

@ -2,7 +2,6 @@
# Start LIBRARY ntvfs_common
[SUBSYSTEM::ntvfs_common]
PUBLIC_DEPENDENCIES = NDR_OPENDB NDR_NOTIFY sys_notify sys_lease share
PRIVATE_DEPENDENCIES = brlock_ctdb opendb_ctdb
# End LIBRARY ntvfs_common
################################################

View File

@ -62,11 +62,7 @@ struct odb_context *odb_init(TALLOC_CTX *mem_ctx,
struct ntvfs_context *ntvfs_ctx)
{
if (ops == NULL) {
if (lp_parm_bool(ntvfs_ctx->lp_ctx, NULL, "ctdb", "opendb", false)) {
odb_ctdb_init_ops();
} else {
odb_tdb_init_ops();
}
odb_tdb_init_ops();
}
return ops->odb_init(mem_ctx, ntvfs_ctx);
}

View File

@ -37,7 +37,6 @@
#include "smbd/service.h"
#include "param/secrets.h"
#include "smbd/pidfile.h"
#include "cluster/ctdb/ctdb_cluster.h"
#include "param/param.h"
/*
@ -322,9 +321,6 @@ static int binary_smbd_main(const char *binary_name, int argc, const char *argv[
return 1;
}
/* initialise clustering if needed */
cluster_ctdb_init(cmdline_lp_ctx, event_ctx, model);
if (opt_interactive) {
/* terminate when stdin goes away */
stdin_event_flags = EVENT_FD_READ;