From 07ade5e488475ef7e56e4d924776fa6fc2688458 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Mon, 16 Apr 2007 16:01:37 +1000 Subject: [PATCH] this is a demonstration of an idea for handling locks in ctdb. The problem we have is this: - we want the client smbd processes to be able to 'shortcut' access to the ltdb, by directly accessing the ltdb, and if the header of the record shows we are the dmaster then process immediately, with no overhead of talking across the unix domain socket - a client doing a shortcut will use tdb_chainlock() to lock the record while processing - we want the main ctdb daemon to be able to set locks on the record, and when those locks collide with a 'shortcut' fcntl lock, we want the ctdb daemon to keep processing other operations - we don't want to have to send a message from a smbd client to the ctdbd each time it releases a lock The solution is shown in this example. Note that the expensive fork() and blocking lock is only paid in case of contention, so in the median case I think this is zero cost. (This used to be ctdb commit a3248c3e2b740cd2403acffd3c1f6a33dca0ea03) --- ctdb/Makefile.in | 6 +- ctdb/common/util.c | 7 ++ ctdb/config.mk | 1 + ctdb/include/includes.h | 2 +- ctdb/tests/lockwait.c | 244 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 258 insertions(+), 2 deletions(-) create mode 100644 ctdb/tests/lockwait.c diff --git a/ctdb/Makefile.in b/ctdb/Makefile.in index 175418aa99d..93e0c7d2bdc 100644 --- a/ctdb/Makefile.in +++ b/ctdb/Makefile.in @@ -30,7 +30,7 @@ CTDB_OBJ = $(CTDB_COMMON_OBJ) $(CTDB_TCP_OBJ) OBJS = @TDBOBJ@ @TALLOCOBJ@ @LIBREPLACEOBJ@ @INFINIBAND_WRAPPER_OBJ@ $(EXTRA_OBJ) $(EVENTS_OBJ) $(CTDB_OBJ) -BINS = bin/ctdbd bin/ctdbd_test bin/ctdb_test bin/ctdb_bench bin/ctdb_messaging bin/ctdb_fetch bin/ctdb_fetch1 @INFINIBAND_BINS@ +BINS = bin/ctdbd bin/ctdbd_test bin/ctdb_test bin/ctdb_bench bin/ctdb_messaging bin/ctdb_fetch bin/ctdb_fetch1 bin/lockwait @INFINIBAND_BINS@ DIRS = lib bin @@ -81,6 +81,10 @@ bin/ibwrapper_test: $(OBJS) ib/ibwrapper_test.o @echo Linking $@ @$(CC) $(CFLAGS) -o $@ ib/ibwrapper_test.o $(OBJS) $(LIB_FLAGS) +bin/lockwait: $(OBJS) tests/lockwait.o tests/cmdline.o + @echo Linking $@ + @$(CC) $(CFLAGS) -o $@ tests/lockwait.o tests/cmdline.o $(OBJS) $(LIB_FLAGS) + clean: rm -f *.o */*.o */*/*.o rm -f $(BINS) diff --git a/ctdb/common/util.c b/ctdb/common/util.c index a44c7d0ad07..4d0b25117a7 100644 --- a/ctdb/common/util.c +++ b/ctdb/common/util.c @@ -37,6 +37,13 @@ struct timeval timeval_current(void) return tv; } +double timeval_elapsed(struct timeval *tv) +{ + struct timeval tv2 = timeval_current(); + return (tv2.tv_sec - tv->tv_sec) + + (tv2.tv_usec - tv->tv_usec)*1.0e-6; +} + /** return a timeval struct with the given elements */ diff --git a/ctdb/config.mk b/ctdb/config.mk index 0e0629bfb1f..a16b7b29919 100644 --- a/ctdb/config.mk +++ b/ctdb/config.mk @@ -15,6 +15,7 @@ OBJ_FILES = \ ################## [SUBSYSTEM::ctdb] +INIT_FUNCTION = server_service_ctdbd_init OBJ_FILES = \ ctdb_cluster.o \ common/ctdb.o \ diff --git a/ctdb/include/includes.h b/ctdb/include/includes.h index 994c25452c8..e55ddc2c6b8 100644 --- a/ctdb/include/includes.h +++ b/ctdb/include/includes.h @@ -32,5 +32,5 @@ int timeval_compare(const struct timeval *tv1, const struct timeval *tv2); struct timeval timeval_until(const struct timeval *tv1, const struct timeval *tv2); _PUBLIC_ struct timeval timeval_current_ofs(uint32_t secs, uint32_t usecs); +double timeval_elapsed(struct timeval *tv); char **file_lines_load(const char *fname, int *numlines, TALLOC_CTX *mem_ctx); - diff --git a/ctdb/tests/lockwait.c b/ctdb/tests/lockwait.c new file mode 100644 index 00000000000..2c95bb7334e --- /dev/null +++ b/ctdb/tests/lockwait.c @@ -0,0 +1,244 @@ +/* + test a lock wait idea + + Copyright (C) Andrew Tridgell 2006 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include "includes.h" +#include "lib/events/events.h" +#include "system/filesys.h" +#include "system/wait.h" +#include "popt.h" +#include "tests/cmdline.h" + + +struct lockwait_handle { + struct fd_event *fde; + int fd[2]; + pid_t child; + void *private_data; + void (*callback)(void *); +}; + +static void lockwait_handler(struct event_context *ev, struct fd_event *fde, + uint16_t flags, void *private_data) +{ + struct lockwait_handle *h = talloc_get_type(private_data, + struct lockwait_handle); + void (*callback)(void *) = h->callback; + void *p = h->private_data; + talloc_set_destructor(h, NULL); + talloc_free(h); + callback(p); + waitpid(h->child, NULL, 0); +} + +static int lockwait_destructor(struct lockwait_handle *h) +{ + close(h->fd[0]); + kill(h->child, SIGKILL); + waitpid(h->child, NULL, 0); + return 0; +} + + +static struct lockwait_handle *lockwait(struct event_context *ev, + TALLOC_CTX *mem_ctx, + int fd, off_t ofs, size_t len, + void (*callback)(void *), void *private_data) +{ + struct lockwait_handle *h; + int ret; + + h = talloc_zero(mem_ctx, struct lockwait_handle); + if (h == NULL) { + return NULL; + } + + ret = pipe(h->fd); + if (ret != 0) { + talloc_free(h); + return NULL; + } + + h->child = fork(); + if (h->child == (pid_t)-1) { + close(h->fd[0]); + close(h->fd[1]); + talloc_free(h); + return NULL; + } + + h->callback = callback; + h->private_data = private_data; + + if (h->child == 0) { + /* in child */ + struct flock lock; + close(h->fd[0]); + lock.l_type = F_WRLCK; + lock.l_whence = SEEK_SET; + lock.l_start = ofs; + lock.l_len = len; + lock.l_pid = 0; + fcntl(fd,F_SETLKW,&lock); + _exit(0); + } + + close(h->fd[1]); + talloc_set_destructor(h, lockwait_destructor); + + h->fde = event_add_fd(ev, h, fd, EVENT_FD_READ, lockwait_handler, h); + if (h->fde == NULL) { + talloc_free(h); + return NULL; + } + + return h; +} + + + + +static void fcntl_lock_callback(void *p) +{ + int *got_lock = (int *)p; + *got_lock = 1; +} + +/* + get an fcntl lock - waiting if necessary + */ +static int fcntl_lock(struct event_context *ev, + int fd, int op, off_t offset, off_t count, int type) +{ + struct flock lock; + int ret; + int use_lockwait = (op == F_SETLKW); + int got_lock = 0; + + lock.l_type = type; + lock.l_whence = SEEK_SET; + lock.l_start = offset; + lock.l_len = count; + lock.l_pid = 0; + + do { + ret = fcntl(fd,use_lockwait?F_SETLK:op,&lock); + if (ret == 0) { + return 0; + } + if (ret == -1 && + (errno == EACCES || errno == EAGAIN || errno == EDEADLK)) { + struct lockwait_handle *h; + h = lockwait(ev, ev, fd, offset, count, + fcntl_lock_callback, &got_lock); + if (h == NULL) { + errno = ENOLCK; + return -1; + } + /* in real code we would return to the event loop */ + while (!got_lock) { + event_loop_once(ev); + } + got_lock = 0; + } + } while (!got_lock); + + return ret; +} + +static void child(struct event_context *ev, int n) +{ + int fd; + int count=0; + struct timeval tv; + fd = open("test.dat", O_CREAT|O_RDWR, 0666); + if (fd == -1) { + perror("test.dat"); + exit(1); + } + + tv = timeval_current(); + + while (timeval_elapsed(&tv) < 10) { + int ret; + ret = fcntl_lock(ev, fd, F_SETLKW, 0, 1, F_WRLCK); + if (ret != 0) { + printf("Failed to get lock in child %d!\n", n); + break; + } + fcntl_lock(ev, fd, F_SETLK, 0, 1, F_UNLCK); + count++; + } + + printf("child %2d %.0f ops/sec\n", n, count/timeval_elapsed(&tv)); + _exit(0); +} + +static int timelimit = 10; + +/* + main program +*/ +int main(int argc, const char *argv[]) +{ + pid_t *pids; + int nprogs = 2; + int i; + struct event_context *ev; + struct poptOption popt_options[] = { + POPT_AUTOHELP + { "timelimit", 't', POPT_ARG_INT, &timelimit, 0, "timelimit", "integer" }, + { "num-progs", 'n', POPT_ARG_INT, &nprogs, 0, "num_progs", "integer" }, + POPT_TABLEEND + }; + poptContext pc; + int opt; + + pc = poptGetContext(argv[0], argc, argv, popt_options, POPT_CONTEXT_KEEP_FIRST); + + while ((opt = poptGetNextOpt(pc)) != -1) { + switch (opt) { + default: + fprintf(stderr, "Invalid option %s: %s\n", + poptBadOption(pc, 0), poptStrerror(opt)); + exit(1); + } + } + + ev = event_context_init(NULL); + + pids = talloc_array(ev, pid_t, nprogs); + + /* create N processes fighting over the same lock */ + for (i=0;i