From cc03304048f341d25b5b861c2e9eca4538a9d466 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Fri, 4 Jul 2008 18:03:24 +1000 Subject: [PATCH 01/90] fixed a case statement (This used to be ctdb commit 5168e9fa138995581fe5805f99ae569ce3c127f7) --- ctdb/server/ctdb_server.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ctdb/server/ctdb_server.c b/ctdb/server/ctdb_server.c index 017fb9a6dce..19c8c2c8d5b 100644 --- a/ctdb/server/ctdb_server.c +++ b/ctdb/server/ctdb_server.c @@ -511,7 +511,7 @@ void ctdb_queue_packet(struct ctdb_context *ctdb, struct ctdb_req_header *hdr) void ctdb_queue_packet_opcode(struct ctdb_context *ctdb, struct ctdb_req_header *hdr, unsigned opcode) { switch (opcode) { -#define DO_OP(x) ctdb_queue_packet(ctdb, hdr); break +#define DO_OP(x) case x: ctdb_queue_packet(ctdb, hdr); break DO_OP(1); DO_OP(2); DO_OP(3); From 66222af5e46ccef9bb900cfb25b064b1b29d5bfe Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Fri, 11 Jul 2008 10:33:46 +1000 Subject: [PATCH 02/90] Fix a very subtle race where we could get a double free of a talloced memory if ctdb_run_eventscript() would be called during processing of ctdb_event_script_timeout() for user unvoked eventscripts. (eventsccripts invoked by "ctdb eventscript ...") Signed-off-by: Ronnie Sahlberg (This used to be ctdb commit 15bc66ae801b0c69a65a7a2acf5df151e76edc2a) --- ctdb/server/eventscript.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/ctdb/server/eventscript.c b/ctdb/server/eventscript.c index 1d42b2be493..316812290d2 100644 --- a/ctdb/server/eventscript.c +++ b/ctdb/server/eventscript.c @@ -252,10 +252,15 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve void (*callback)(struct ctdb_context *, int, void *) = state->callback; void *private_data = state->private_data; struct ctdb_context *ctdb = state->ctdb; + char *options; DEBUG(DEBUG_ERR,("Event script timed out : %s count : %u\n", state->options, ctdb->event_script_timeouts)); - if (!strcmp(state->options, "monitor")) { + options = talloc_strdup(ctdb, state->options); + CTDB_NO_MEMORY_VOID(ctdb, options); + + talloc_free(state); + if (!strcmp(options, "monitor")) { /* if it is a monitor event, we allow it to "hang" a few times before we declare it a failure and ban ourself (and make ourself unhealthy) @@ -271,7 +276,7 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve } else { callback(ctdb, 0, private_data); } - } else if (!strcmp(state->options, "startup")) { + } else if (!strcmp(options, "startup")) { DEBUG(DEBUG_ERR, (__location__ " eventscript for startup event timedout.\n")); callback(ctdb, -1, private_data); } else { @@ -281,7 +286,7 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve callback(ctdb, -1, private_data); } - talloc_free(state); + talloc_free(options); } /* @@ -480,7 +485,7 @@ int32_t ctdb_run_eventscripts(struct ctdb_context *ctdb, state = talloc(ctdb->eventscripts_ctx, struct eventscript_callback_state); CTDB_NO_MEMORY(ctdb, state); - state->c = talloc_steal(ctdb, c); + state->c = talloc_steal(state, c); DEBUG(DEBUG_NOTICE,("Forced running of eventscripts with arguments %s\n", indata.dptr)); From d16bc2f6551b6621289aed182f9e75c5a1123542 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Fri, 11 Jul 2008 11:48:41 +1000 Subject: [PATCH 03/90] new version 1.0.47 Signed-off-by: Ronnie Sahlberg (This used to be ctdb commit 1de62d1ad71fa784d5e93f76da8f872cad9b9f42) --- ctdb/packaging/RPM/ctdb.spec | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ctdb/packaging/RPM/ctdb.spec b/ctdb/packaging/RPM/ctdb.spec index 46bedd0528d..dfe8c39df86 100644 --- a/ctdb/packaging/RPM/ctdb.spec +++ b/ctdb/packaging/RPM/ctdb.spec @@ -5,7 +5,7 @@ Vendor: Samba Team Packager: Samba Team Name: ctdb Version: 1.0 -Release: 46 +Release: 47 Epoch: 0 License: GNU GPL version 3 Group: System Environment/Daemons @@ -118,7 +118,10 @@ fi %{_includedir}/ctdb_private.h %changelog -* Fri Jul 11 2008 : Version pre_1.0.47 +* Fri Jul 11 2008 : Version 1.0.47 + - Fix a double free bug where if a user striggered (ctdb eventscript) + hung and while the timeout handler was being processed a new user + triggered eventscript was started we would free state twice. - Rewrite of onnode and associated documentation. * Thu Jul 10 2008 : Version 1.0.46 - Document both the LVS:cingle-ip-address and the REMOTE-NODE:wan-accelerator From 3fa8aaa4e2f4931ac968bff2d85b98908c323d24 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Mon, 14 Jul 2008 09:19:22 +1000 Subject: [PATCH 04/90] fixed up exit status for onnode (This used to be ctdb commit c26afe26cc5c1f9cd9eef74166b5fc39dde591d3) --- ctdb/tools/onnode | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/ctdb/tools/onnode b/ctdb/tools/onnode index 1e7e24690fa..6d8ed17f042 100755 --- a/ctdb/tools/onnode +++ b/ctdb/tools/onnode @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Run commands on CTDB nodes. @@ -171,9 +171,12 @@ trap 'kill -TERM $pids 2>/dev/null' INT TERM # There's a small race here where the kill can fail if no processes # have been added to $pids and the script is interrupted. However, # the part of the window where it matter is very small. +retcode=0 for n in $nodes ; do if $parallel ; then if $verbose ; then + # pipefail is a bashism - is there some way to do this with plain sh? + set -o pipefail 2>/dev/null ($SSH $ssh_opts $EXTRA_SSH_OPTS $n "$command" 2>&1 | sed -e "s@^@[$n] @" )& else $SSH $ssh_opts $EXTRA_SSH_OPTS $n "$command" & @@ -184,8 +187,17 @@ for n in $nodes ; do echo >&2 ; echo ">> NODE: $n <<" >&2 fi - $SSH $ssh_opts $EXTRA_SSH_OPTS $n "$command" + $SSH $ssh_opts $EXTRA_SSH_OPTS $n "$command" + [ $? = 0 ] || retcode=$? fi done -$parallel && wait +$parallel && { + for p in $pids; do + wait $p + [ $? = 0 ] || retcode=$? + done +} + +exit $retcode + From 0934f40c2af94db6591193b5c70546d0c4156a30 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Mon, 14 Jul 2008 11:22:41 +1000 Subject: [PATCH 05/90] remove a debugging echo statement (This used to be ctdb commit 495a6293c284a1e74b9c5e0c112e6ed5feead107) --- ctdb/config/events.d/50.samba | 1 - 1 file changed, 1 deletion(-) diff --git a/ctdb/config/events.d/50.samba b/ctdb/config/events.d/50.samba index 04ca6e78fe4..a8aad9152f6 100755 --- a/ctdb/config/events.d/50.samba +++ b/ctdb/config/events.d/50.samba @@ -114,7 +114,6 @@ case $cmd in } [ "$CTDB_SAMBA_SKIP_SHARE_CHECK" != "yes" ] && { -echo do da test smb_dirs=`testparm -s 2> /dev/null | egrep '^[[:space:]]*path = ' | cut -d= -f2` ctdb_check_directories "Samba" $smb_dirs } From 9a9b506d23c0bc8d5fe592c638f9ad02485f4aef Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 15 Jul 2008 11:03:35 +1000 Subject: [PATCH 06/90] Add two new options CTDB_SAMBA_SKIP_CONF_CHECK and CTDB_SAMBA_CHECK_PORTS. The first is used to tell ctdb to no longer monitoring if the smb.conf file is consistent or not. The second specifies which ports to check that smb is listening on instead of using testparm to figure this out. Since the net, testparm and smbstatus may block indefinitely in some configurations we must have a way to configure ctdb to NOT use any of these three commands in the scripts. These commands should thus never be used in scripts. Signed-off-by: Ronnie Sahlberg (This used to be ctdb commit 2fe52c7979ecd28250ec4ac195d3c3999916e573) --- ctdb/config/ctdb.sysconfig | 7 +++++++ ctdb/config/events.d/50.samba | 13 +++++++++---- ctdb/packaging/RPM/ctdb.spec | 7 +++++++ 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/ctdb/config/ctdb.sysconfig b/ctdb/config/ctdb.sysconfig index 9084f8eb8d3..84a90d0609a 100644 --- a/ctdb/config/ctdb.sysconfig +++ b/ctdb/config/ctdb.sysconfig @@ -47,6 +47,13 @@ # since checking each one of them might take a long time. # CTDB_SAMBA_SKIP_SHARE_CHECK="yes" +# should we skip checking smb.conf for consistency +# CTDB_SAMBA_SKIP_CONF_CHECK="yes" + +# specify which ports we should check that there is a daemon listening to +# by default we use testparm and look in smb.conf to figure out. +# CTDB_SAMBA_CHECK_PORTS="445" + # should ctdb manage starting/stopping Winbind service? # if left comented out then it will be autodetected based on smb.conf # CTDB_MANAGES_WINBIND=yes diff --git a/ctdb/config/events.d/50.samba b/ctdb/config/events.d/50.samba index a8aad9152f6..9aa21e2ece0 100755 --- a/ctdb/config/events.d/50.samba +++ b/ctdb/config/events.d/50.samba @@ -108,9 +108,11 @@ case $cmd in touch $CTDB_BASE/state/samba/periodic_cleanup } - testparm -s 2>&1 | egrep '^WARNING|^ERROR|^Unknown' && { - echo "ERROR: testparm shows smb.conf is not clean" - exit 1 + [ "$CTDB_SAMBA_SKIP_CONF_CHECK" != "yes" ] && { + testparm -s 2>&1 | egrep '^WARNING|^ERROR|^Unknown' && { + echo "ERROR: testparm shows smb.conf is not clean" + exit 1 + } } [ "$CTDB_SAMBA_SKIP_SHARE_CHECK" != "yes" ] && { @@ -118,7 +120,10 @@ case $cmd in ctdb_check_directories "Samba" $smb_dirs } - smb_ports=`testparm -s --parameter-name="smb ports" 2> /dev/null` + smb_ports="$CTDB_SAMBA_CHECK_PORTS" + [ -z "$smb_ports" ] && { + smb_ports=`testparm -s --parameter-name="smb ports" 2> /dev/null` + } ctdb_check_tcp_ports "Samba" $smb_ports # check winbind is OK diff --git a/ctdb/packaging/RPM/ctdb.spec b/ctdb/packaging/RPM/ctdb.spec index dfe8c39df86..7182f7de128 100644 --- a/ctdb/packaging/RPM/ctdb.spec +++ b/ctdb/packaging/RPM/ctdb.spec @@ -118,6 +118,13 @@ fi %{_includedir}/ctdb_private.h %changelog +* Fri Jul 11 2008 : Version 1.0.48-pre + - Add two new options : + CTDB_SAMBA_SKIP_CONF_CHECK and CTDB_SAMBA_CHECK_PORTS that can be used + to override what checks to do when monitoring samba health. + We can no longer use the smbstatus, net or testparm commands to check + if samba or its config is healthy since these commands may block + indefinitely and thus can not be used in scripts. * Fri Jul 11 2008 : Version 1.0.47 - Fix a double free bug where if a user striggered (ctdb eventscript) hung and while the timeout handler was being processed a new user From d7da19a6081a9308808a37938b99b1f97a53de77 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Wed, 16 Jul 2008 12:23:05 +1000 Subject: [PATCH 07/90] fixed postun script to prevent corrupting RPM database (This used to be ctdb commit 9043913a54fe707083697f0587c6ffde86ca5a69) --- ctdb/packaging/RPM/ctdb.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ctdb/packaging/RPM/ctdb.spec b/ctdb/packaging/RPM/ctdb.spec index 7182f7de128..07ae7218b52 100644 --- a/ctdb/packaging/RPM/ctdb.spec +++ b/ctdb/packaging/RPM/ctdb.spec @@ -78,7 +78,7 @@ exit 0 %postun if [ "$1" -ge "1" ]; then - %{initdir}/ctdb restart >/dev/null 2>&1 + %{initdir}/ctdb restart >/dev/null 2>&1 || true fi From e4e298e10e3d6e219638740d435b3e1f296b4d74 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 16 Jul 2008 12:23:18 +1000 Subject: [PATCH 08/90] change how we filter out "empty" records in the traversecode so that we output the same list of keys in "catdb" as "tdbdump". when traversing a persistent database, as an optimization, only traverse on the local node (and thus skip checking if we are dmaster or not). If the local node is not part of the vnnmap and thus would not be guaranteed to have an uptodate persistent database we instead traverse it on one of the other nodes that are in the vnnmap. (This used to be ctdb commit 2b0bd6c302545f2533a7a67dfc6bb5f9f60799f7) --- ctdb/client/ctdb_client.c | 5 +-- ctdb/server/ctdb_traverse.c | 71 +++++++++++++++++++++++++++++-------- 2 files changed, 60 insertions(+), 16 deletions(-) diff --git a/ctdb/client/ctdb_client.c b/ctdb/client/ctdb_client.c index ed999f2600b..544f5d1c383 100644 --- a/ctdb/client/ctdb_client.c +++ b/ctdb/client/ctdb_client.c @@ -1847,6 +1847,7 @@ int ctdb_traverse(struct ctdb_db_context *ctdb_db, ctdb_traverse_func fn, void * return state.count; } +#define ISASCII(x) ((x>31)&&(x<128)) /* called on each key during a catdb */ @@ -1861,7 +1862,7 @@ static int dumpdb_fn(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA data, voi fprintf(f, "key(%u) = \"", (unsigned)key.dsize); for (i=0;idmaster != h->ctdb_db->ctdb->pnn) { - return 0; + + if (h->ctdb_db->persistent == 0) { + /* filter out zero-length records */ + if (data.dsize <= sizeof(struct ctdb_ltdb_header)) { + return 0; + } + + /* filter out non-authoritative records */ + if (hdr->dmaster != h->ctdb_db->ctdb->pnn) { + return 0; + } } d = ctdb_marshall_record(h, 0, key, NULL, data); @@ -174,6 +182,7 @@ static struct ctdb_traverse_local_handle *ctdb_traverse_local(struct ctdb_db_con struct ctdb_traverse_all_handle { struct ctdb_context *ctdb; + struct ctdb_db_context *ctdb_db; uint32_t reqid; ctdb_traverse_fn_t callback; void *private_data; @@ -224,17 +233,19 @@ static struct ctdb_traverse_all_handle *ctdb_daemon_traverse_all(struct ctdb_db_ int ret; TDB_DATA data; struct ctdb_traverse_all r; + uint32_t destination; state = talloc(ctdb_db, struct ctdb_traverse_all_handle); if (state == NULL) { return NULL; } - state->ctdb = ctdb; - state->reqid = ctdb_reqid_new(ctdb_db->ctdb, state); - state->callback = callback; + state->ctdb = ctdb; + state->ctdb_db = ctdb_db; + state->reqid = ctdb_reqid_new(ctdb_db->ctdb, state); + state->callback = callback; state->private_data = private_data; - state->null_count = 0; + state->null_count = 0; talloc_set_destructor(state, ctdb_traverse_all_destructor); @@ -245,10 +256,37 @@ static struct ctdb_traverse_all_handle *ctdb_daemon_traverse_all(struct ctdb_db_ data.dptr = (uint8_t *)&r; data.dsize = sizeof(r); - /* tell all the nodes in the cluster to start sending records to this node */ - ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_VNNMAP, 0, - CTDB_CONTROL_TRAVERSE_ALL, - 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL); + if (ctdb_db->persistent == 0) { + /* normal database, traverse all nodes */ + destination = CTDB_BROADCAST_VNNMAP; + } else { + int i; + /* persistent database, traverse one node, preferably + * the local one + */ + destination = ctdb->pnn; + /* check we are in the vnnmap */ + for (i=0; i < ctdb->vnn_map->size; i++) { + if (ctdb->vnn_map->map[i] == ctdb->pnn) { + break; + } + } + /* if we are not in the vnn map we just pick the first + * node instead + */ + if (i == ctdb->vnn_map->size) { + destination = ctdb->vnn_map->map[0]; + } + } + + /* tell all the nodes in the cluster to start sending records to this + * node, or if it is a persistent database, just tell the local + * node + */ + ret = ctdb_daemon_send_control(ctdb, destination, 0, + CTDB_CONTROL_TRAVERSE_ALL, + 0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL); + if (ret != 0) { talloc_free(state); return NULL; @@ -371,8 +409,13 @@ int32_t ctdb_control_traverse_data(struct ctdb_context *ctdb, TDB_DATA data, TDB if (key.dsize == 0 && data.dsize == 0) { state->null_count++; - if (state->null_count != ctdb_get_num_active_nodes(ctdb)) { - return 0; + /* Persistent databases are only scanned on one node (the local + * node) + */ + if (state->ctdb_db->persistent == 0) { + if (state->null_count != ctdb_get_num_active_nodes(ctdb)) { + return 0; + } } } From 0964c59dc6706311fd565d91289e922be5e3a864 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 17 Jul 2008 09:04:15 +1000 Subject: [PATCH 09/90] Do not allow "ctdb eventscript" to start new eventscripts while we are in recovery mode (This used to be ctdb commit 8140825e1d06053a900fd0adf0a150622c0fc146) --- ctdb/server/eventscript.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ctdb/server/eventscript.c b/ctdb/server/eventscript.c index 316812290d2..057bc8077cb 100644 --- a/ctdb/server/eventscript.c +++ b/ctdb/server/eventscript.c @@ -489,6 +489,11 @@ int32_t ctdb_run_eventscripts(struct ctdb_context *ctdb, DEBUG(DEBUG_NOTICE,("Forced running of eventscripts with arguments %s\n", indata.dptr)); + if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) { + DEBUG(DEBUG_ERR, (__location__ " Aborted running eventscript \"%s\" while in RECOVERY mode\n", indata.dptr)); + return -1; + } + ctdb_disable_monitoring(ctdb); ret = ctdb_event_script_callback(ctdb, From 6eb4e46fe1f2b14178bbd21930f7bca486a1ed2e Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 17 Jul 2008 13:50:55 +1000 Subject: [PATCH 10/90] Add two new controls to start and cancel a persistent update. This allows ctdb to automatically start a new full blown recovery if a client has started updating the local tdb for a persistent database but is kill -9ed before it has ensured the update is distributed clusterwide. (This used to be ctdb commit 1ffccb3e0b3b5bd376c5302304029af393709518) --- ctdb/include/ctdb_private.h | 9 +++++++ ctdb/server/ctdb_control.c | 6 +++++ ctdb/server/ctdb_daemon.c | 6 +++++ ctdb/server/ctdb_persistent.c | 48 +++++++++++++++++++++++++++++++++++ 4 files changed, 69 insertions(+) diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 4124f649fc3..66e77093126 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -161,6 +161,7 @@ struct ctdb_client { uint32_t client_id; pid_t pid; struct ctdb_tcp_list *tcp_list; + uint32_t num_persistent_updates; }; @@ -544,6 +545,8 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0, CTDB_CONTROL_DEL_PUBLIC_IP = 78, CTDB_CONTROL_RUN_EVENTSCRIPTS = 79, CTDB_CONTROL_GET_CAPABILITIES = 80, + CTDB_CONTROL_START_PERSISTENT_UPDATE = 81, + CTDB_CONTROL_CANCEL_PERSISTENT_UPDATE= 82, }; /* @@ -812,6 +815,12 @@ int ctdb_ltdb_store(struct ctdb_db_context *ctdb_db, TDB_DATA key, struct ctdb_ltdb_header *header, TDB_DATA data); int ctdb_ltdb_persistent_store(struct ctdb_db_context *ctdb_db, TDB_DATA key, struct ctdb_ltdb_header *header, TDB_DATA data); +int32_t ctdb_control_start_persistent_update(struct ctdb_context *ctdb, + struct ctdb_req_control *c, + TDB_DATA recdata); +int32_t ctdb_control_cancel_persistent_update(struct ctdb_context *ctdb, + struct ctdb_req_control *c, + TDB_DATA recdata); void ctdb_queue_packet(struct ctdb_context *ctdb, struct ctdb_req_header *hdr); void ctdb_queue_packet_opcode(struct ctdb_context *ctdb, struct ctdb_req_header *hdr, unsigned opcode); int ctdb_ltdb_lock_requeue(struct ctdb_db_context *ctdb_db, diff --git a/ctdb/server/ctdb_control.c b/ctdb/server/ctdb_control.c index aaadbaee0a1..1ae8e85cacf 100644 --- a/ctdb/server/ctdb_control.c +++ b/ctdb/server/ctdb_control.c @@ -394,6 +394,12 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb, case CTDB_CONTROL_GET_CAPABILITIES: return ctdb_control_get_capabilities(ctdb, outdata); + case CTDB_CONTROL_START_PERSISTENT_UPDATE: + return ctdb_control_start_persistent_update(ctdb, c, indata); + + case CTDB_CONTROL_CANCEL_PERSISTENT_UPDATE: + return ctdb_control_cancel_persistent_update(ctdb, c, indata); + default: DEBUG(DEBUG_CRIT,(__location__ " Unknown CTDB control opcode %u\n", opcode)); return -1; diff --git a/ctdb/server/ctdb_daemon.c b/ctdb/server/ctdb_daemon.c index c153a975708..aeb0cbde38b 100644 --- a/ctdb/server/ctdb_daemon.c +++ b/ctdb/server/ctdb_daemon.c @@ -210,6 +210,12 @@ static int ctdb_client_destructor(struct ctdb_client *client) ctdb_takeover_client_destructor_hook(client); ctdb_reqid_remove(client->ctdb, client->client_id); client->ctdb->statistics.num_clients--; + + if (client->num_persistent_updates != 0) { + DEBUG(DEBUG_ERR,(__location__ " Client disconnecting with %u persistent updates in flight. Starting recovery\n", client->num_persistent_updates)); + client->ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE; + } + return 0; } diff --git a/ctdb/server/ctdb_persistent.c b/ctdb/server/ctdb_persistent.c index 713950a0c9d..66311a9f890 100644 --- a/ctdb/server/ctdb_persistent.c +++ b/ctdb/server/ctdb_persistent.c @@ -81,9 +81,16 @@ int32_t ctdb_control_persistent_store(struct ctdb_context *ctdb, struct ctdb_req_control *c, TDB_DATA recdata, bool *async_reply) { + struct ctdb_client *client = ctdb_reqid_find(ctdb, c->client_id, struct ctdb_client); struct ctdb_persistent_state *state; int i; + if (client == NULL) { + DEBUG(DEBUG_ERR,(__location__ " can not match persistent_store to a client. Returning error\n")); + return -1; + } + client->num_persistent_updates--; + state = talloc_zero(ctdb, struct ctdb_persistent_state); CTDB_NO_MEMORY(ctdb, state); @@ -410,3 +417,44 @@ int32_t ctdb_control_update_record(struct ctdb_context *ctdb, return 0; } + + + +/* + start a persistent store operation. passing both the key, header and + data to the daemon. If the client disconnects before it has issued + a persistent_update call to the daemon we trigger a full recovery + to ensure the databases are brought back in sync. + for now we ignore the recdata that the client has passed to us. + */ +int32_t ctdb_control_start_persistent_update(struct ctdb_context *ctdb, + struct ctdb_req_control *c, + TDB_DATA recdata) +{ + struct ctdb_client *client = ctdb_reqid_find(ctdb, c->client_id, struct ctdb_client); + + if (client == NULL) { + DEBUG(DEBUG_ERR,(__location__ " can not match start_persistent_update to a client. Returning error\n")); + return -1; + } + + client->num_persistent_updates++; + + return 0; +} + +int32_t ctdb_control_cancel_persistent_update(struct ctdb_context *ctdb, + struct ctdb_req_control *c, + TDB_DATA recdata) +{ + struct ctdb_client *client = ctdb_reqid_find(ctdb, c->client_id, struct ctdb_client); + + if (client == NULL) { + DEBUG(DEBUG_ERR,(__location__ " can not match cancel_persistent_update to a client. Returning error\n")); + return -1; + } + + client->num_persistent_updates--; + + return 0; +} From b9ba443da36d3d3832e7e08e3f3d0d457889cf54 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 17 Jul 2008 13:56:17 +1000 Subject: [PATCH 11/90] new version 1.0.48 (This used to be ctdb commit ccf9334bd20b1398623dd649987aa15119dac14e) --- ctdb/packaging/RPM/ctdb.spec | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/ctdb/packaging/RPM/ctdb.spec b/ctdb/packaging/RPM/ctdb.spec index 07ae7218b52..84f9cf6c288 100644 --- a/ctdb/packaging/RPM/ctdb.spec +++ b/ctdb/packaging/RPM/ctdb.spec @@ -5,7 +5,7 @@ Vendor: Samba Team Packager: Samba Team Name: ctdb Version: 1.0 -Release: 47 +Release: 48 Epoch: 0 License: GNU GPL version 3 Group: System Environment/Daemons @@ -118,7 +118,16 @@ fi %{_includedir}/ctdb_private.h %changelog -* Fri Jul 11 2008 : Version 1.0.48-pre +* Thu Jul 17 2008 : Version 1.0.48 + - Update the spec file. + - Do not start new user-triggered eventscripts if we are already + inside recovery mode. + - Add two new controls to start/cancel a persistent update. + A client such as samba can use these to tell ctdbd that it will soon + be writing directly to the persistent database tdb file. So if + samba is -9ed before it has eitehr done the persistent_store or + canceled the operation, ctdb knows that the persistent databases + 'may' be out of sync and therefore a full blown recovery is called for. - Add two new options : CTDB_SAMBA_SKIP_CONF_CHECK and CTDB_SAMBA_CHECK_PORTS that can be used to override what checks to do when monitoring samba health. From 90ff67dc7403e714424b4c356ef6ee78dfffbb47 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 17 Jul 2008 18:47:20 +1000 Subject: [PATCH 12/90] Only decrement the "number of persistent writes in flight" If/when it is >0 or we will break if used against an unpatched samba server (This used to be ctdb commit 52a38487f981fd5981c02a7a063ad2c598591c10) --- ctdb/server/ctdb_persistent.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ctdb/server/ctdb_persistent.c b/ctdb/server/ctdb_persistent.c index 66311a9f890..455ccba4b05 100644 --- a/ctdb/server/ctdb_persistent.c +++ b/ctdb/server/ctdb_persistent.c @@ -89,7 +89,9 @@ int32_t ctdb_control_persistent_store(struct ctdb_context *ctdb, DEBUG(DEBUG_ERR,(__location__ " can not match persistent_store to a client. Returning error\n")); return -1; } - client->num_persistent_updates--; + if (client->num_persistent_updates > 0) { + client->num_persistent_updates--; + } state = talloc_zero(ctdb, struct ctdb_persistent_state); CTDB_NO_MEMORY(ctdb, state); @@ -454,7 +456,9 @@ int32_t ctdb_control_cancel_persistent_update(struct ctdb_context *ctdb, return -1; } - client->num_persistent_updates--; + if (client->num_persistent_updates > 0) { + client->num_persistent_updates--; + } return 0; } From 68d01c59b2c03aaade45d60c5ea3a25475e19743 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 17 Jul 2008 18:53:54 +1000 Subject: [PATCH 13/90] Allow the fix-to-make-persistent-writes-safer work with unpatched samba versions (This used to be ctdb commit 52716d26eb84104d65828bed38e69f214a5fa824) --- ctdb/packaging/RPM/ctdb.spec | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ctdb/packaging/RPM/ctdb.spec b/ctdb/packaging/RPM/ctdb.spec index 84f9cf6c288..6e406192e7b 100644 --- a/ctdb/packaging/RPM/ctdb.spec +++ b/ctdb/packaging/RPM/ctdb.spec @@ -5,7 +5,7 @@ Vendor: Samba Team Packager: Samba Team Name: ctdb Version: 1.0 -Release: 48 +Release: 49 Epoch: 0 License: GNU GPL version 3 Group: System Environment/Daemons @@ -118,6 +118,9 @@ fi %{_includedir}/ctdb_private.h %changelog +* Thu Jul 17 2008 : Version 1.0.49 + - Update the safe persistent update fix to work with unpatched samba + servers. * Thu Jul 17 2008 : Version 1.0.48 - Update the spec file. - Do not start new user-triggered eventscripts if we are already From 8b520bcb5fec99237e9009f39455714f2fd029a0 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Fri, 18 Jul 2008 10:38:51 +1000 Subject: [PATCH 14/90] lower a debug message (This used to be ctdb commit 554dcf16d37c8b9e4704df11d21fb272f30f5cec) --- ctdb/server/ctdb_daemon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ctdb/server/ctdb_daemon.c b/ctdb/server/ctdb_daemon.c index aeb0cbde38b..3978e28fdf4 100644 --- a/ctdb/server/ctdb_daemon.c +++ b/ctdb/server/ctdb_daemon.c @@ -53,7 +53,7 @@ static void flag_change_handler(struct ctdb_context *ctdb, uint64_t srvid, ctdb->nodes[c->pnn]->flags = (ctdb->nodes[c->pnn]->flags&NODE_FLAGS_DISCONNECTED) | (c->new_flags & ~NODE_FLAGS_DISCONNECTED); - DEBUG(DEBUG_INFO,("Node flags for node %u are now 0x%x\n", c->pnn, ctdb->nodes[c->pnn]->flags)); + DEBUG(DEBUG_DEBUG,("Node flags for node %u are now 0x%x\n", c->pnn, ctdb->nodes[c->pnn]->flags)); /* make sure we don't hold any IPs when we shouldn't */ if (c->pnn == ctdb->pnn && From 6d5f96c24911f474a1d288f37271e5dbdd878528 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Fri, 18 Jul 2008 10:41:18 +1000 Subject: [PATCH 15/90] lower a debug statement (This used to be ctdb commit 3d58f9b524a40c7b43a2a855212db090e9becefa) --- ctdb/server/ctdb_recoverd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 69d867afdb5..6caf52b6508 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -2901,7 +2901,7 @@ again: } - DEBUG(DEBUG_INFO, (__location__ " Update flags on all nodes\n")); + DEBUG(DEBUG_DEBUG, (__location__ " Update flags on all nodes\n")); /* update all nodes to have the same flags that we have */ From 66919db3d7ab1e091223faf515b183af8bfddc83 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Fri, 18 Jul 2008 10:59:34 +1000 Subject: [PATCH 16/90] We can not assume that just because we could complete a TCP handshake to the remote node that 1, we are in fact talking to a CTDB daemon 2, that IF we are talking to a ctdb daemon, it is operational. So, we can not blindly mark the node as CONNECTED just because we can open a TCP connection. Instead we rely on "If we did get a KEEPALIVE from the remote node, is is connected" (This used to be ctdb commit 60e2cb175c449ae65793a3e1ffb60cf030a3a0d5) --- ctdb/tcp/tcp_connect.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c index f3b4f7d3089..906a665d044 100644 --- a/ctdb/tcp/tcp_connect.c +++ b/ctdb/tcp/tcp_connect.c @@ -100,9 +100,6 @@ static void ctdb_node_connect_write(struct event_context *ev, struct fd_event *f /* the queue subsystem now owns this fd */ tnode->fd = -1; - - /* tell the ctdb layer we are connected */ - node->ctdb->upcalls->node_connected(node); } From d0707c98c07b7d643afbe025f58500a462af69b8 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Fri, 18 Jul 2008 12:07:25 +1000 Subject: [PATCH 17/90] if a new node enters the cluster, that node will already be frozen at start but the rest of the nodes are not frozen. at this stage an election is called by the new node. Since in this case the nodes are not froze, we can not modify the recmaster of the nodes so it is expected that this control would fail. Add a boolean to send_election_request() to make it not try to set the recmaster locally for the case where we are in an election phase while not frozen. (This used to be ctdb commit c5035657606283d2e35bea40992505e84ca8e7be) --- ctdb/server/ctdb_recoverd.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 6caf52b6508..b2533afc9cb 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -1654,7 +1654,7 @@ static bool ctdb_election_win(struct ctdb_recoverd *rec, struct election_message /* send out an election request */ -static int send_election_request(struct ctdb_recoverd *rec, uint32_t pnn) +static int send_election_request(struct ctdb_recoverd *rec, uint32_t pnn, bool update_recmaster) { int ret; TDB_DATA election_data; @@ -1670,19 +1670,26 @@ static int send_election_request(struct ctdb_recoverd *rec, uint32_t pnn) election_data.dptr = (unsigned char *)&emsg; - /* first we assume we will win the election and set - recoverymaster to be ourself on the current node - */ - ret = ctdb_ctrl_setrecmaster(ctdb, CONTROL_TIMEOUT(), pnn, pnn); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " failed to send recmaster election request\n")); - return -1; - } - - /* send an election message to all active nodes */ ctdb_send_message(ctdb, CTDB_BROADCAST_ALL, srvid, election_data); + + /* A new node that is already frozen has entered the cluster. + The existing nodes are not frozen and dont need to be frozen + until the election has ended and we start the actual recovery + */ + if (update_recmaster == true) { + /* first we assume we will win the election and set + recoverymaster to be ourself on the current node + */ + ret = ctdb_ctrl_setrecmaster(ctdb, CONTROL_TIMEOUT(), pnn, pnn); + if (ret != 0) { + DEBUG(DEBUG_ERR, (__location__ " failed to send recmaster election request\n")); + return -1; + } + } + + return 0; } @@ -1720,7 +1727,7 @@ static void election_send_request(struct event_context *ev, struct timed_event * struct ctdb_recoverd *rec = talloc_get_type(p, struct ctdb_recoverd); int ret; - ret = send_election_request(rec, ctdb_get_pnn(rec->ctdb)); + ret = send_election_request(rec, ctdb_get_pnn(rec->ctdb), false); if (ret != 0) { DEBUG(DEBUG_ERR,("Failed to send election request!\n")); } @@ -1856,7 +1863,7 @@ static void force_election(struct ctdb_recoverd *rec, uint32_t pnn, timeval_current_ofs(ctdb->tunable.election_timeout, 0), ctdb_election_timeout, rec); - ret = send_election_request(rec, pnn); + ret = send_election_request(rec, pnn, true); if (ret!=0) { DEBUG(DEBUG_ERR, (__location__ " failed to initiate recmaster election")); return; From 87e19f1f9d2a51aeb0f0128149aa13e7d74458f5 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Fri, 18 Jul 2008 13:46:01 +1000 Subject: [PATCH 18/90] fixed a bug where we would look for a signal past the end of the signal arrays This could cause the events code to get into a loop chewing CPU (This used to be ctdb commit e24152fbd06ba4c2b6cfd473751c7f00a676b9ae) --- ctdb/lib/events/events_signal.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ctdb/lib/events/events_signal.c b/ctdb/lib/events/events_signal.c index 2f6d4993832..128a9bc0d03 100644 --- a/ctdb/lib/events/events_signal.c +++ b/ctdb/lib/events/events_signal.c @@ -46,15 +46,15 @@ struct sigcounter { the poor design of signals means that this table must be static global */ static struct sig_state { - struct signal_event *sig_handlers[NUM_SIGNALS]; - struct sigaction *oldact[NUM_SIGNALS]; - struct sigcounter signal_count[NUM_SIGNALS]; + struct signal_event *sig_handlers[NUM_SIGNALS+1]; + struct sigaction *oldact[NUM_SIGNALS+1]; + struct sigcounter signal_count[NUM_SIGNALS+1]; struct sigcounter got_signal; int pipe_hack[2]; #ifdef SA_SIGINFO /* with SA_SIGINFO we get quite a lot of info per signal */ - siginfo_t *sig_info[NUM_SIGNALS]; - struct sigcounter sig_blocked[NUM_SIGNALS]; + siginfo_t *sig_info[NUM_SIGNALS+1]; + struct sigcounter sig_blocked[NUM_SIGNALS+1]; #endif } *sig_state; From 3637add42bc7f8268d117beaa8cfc7baced10d1d Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Fri, 18 Jul 2008 13:49:05 +1000 Subject: [PATCH 19/90] new version 1.0.50 (This used to be ctdb commit b08a988fbdad0da850c9b79791c1a8970555147f) --- ctdb/packaging/RPM/ctdb.spec | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/ctdb/packaging/RPM/ctdb.spec b/ctdb/packaging/RPM/ctdb.spec index 6e406192e7b..1ac41298be8 100644 --- a/ctdb/packaging/RPM/ctdb.spec +++ b/ctdb/packaging/RPM/ctdb.spec @@ -5,7 +5,7 @@ Vendor: Samba Team Packager: Samba Team Name: ctdb Version: 1.0 -Release: 49 +Release: 50 Epoch: 0 License: GNU GPL version 3 Group: System Environment/Daemons @@ -118,6 +118,16 @@ fi %{_includedir}/ctdb_private.h %changelog +* Fri Jul 18 2008 : Version 1.0.50 + - Dont assume that just because we can establish a TCP connection + that we are actually talking to a functioning ctdb daemon. + So dont mark the node as CONNECTED just because the tcp handshake + was successful. + - Dont try to set the recmaster to ourself during elections for those + cases we know this will fail. To remove some annoying benign but scary + looking entries from the log. + - Bugfix for eventsystem for signal handling that could cause a node to + hang. * Thu Jul 17 2008 : Version 1.0.49 - Update the safe persistent update fix to work with unpatched samba servers. From 1bfcca524df5a3cd7b5414a47eb6f0a457459c4a Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 22 Jul 2008 09:07:42 +1000 Subject: [PATCH 20/90] From Michael Adams, change one element from private to private_data Signed-off-by: Ronnie Sahlberg (This used to be ctdb commit 0de79352c9b36c118e36905f08ebbe38ecbb957e) --- ctdb/include/ctdb_private.h | 2 +- ctdb/server/ctdb_recoverd.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 66e77093126..77d10926603 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -1098,7 +1098,7 @@ struct ctdb_client_call_state { struct ctdb_call *call; struct { void (*fn)(struct ctdb_client_call_state *); - void *private; + void *private_data; } async; }; diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index b2533afc9cb..64a05a777e7 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -775,7 +775,7 @@ static void vacuum_fetch_next(struct vacuum_info *v); */ static void vacuum_fetch_callback(struct ctdb_client_call_state *state) { - struct vacuum_info *v = talloc_get_type(state->async.private, struct vacuum_info); + struct vacuum_info *v = talloc_get_type(state->async.private_data, struct vacuum_info); talloc_free(state); vacuum_fetch_next(v); } @@ -841,7 +841,7 @@ static void vacuum_fetch_next(struct vacuum_info *v) return; } state->async.fn = vacuum_fetch_callback; - state->async.private = v; + state->async.private_data = v; return; } From 79793708a446c16f096e592545d66aae640d00dd Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Wed, 23 Jul 2008 15:25:52 +1000 Subject: [PATCH 21/90] fixed buffering in ctdb logging code to handle multiple lines correctly (This used to be ctdb commit e8ef9891aa31c374921b23cc74e1eda1f8218bf0) --- ctdb/server/ctdb_logging.c | 39 +++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/ctdb/server/ctdb_logging.c b/ctdb/server/ctdb_logging.c index 6ebc8c17810..f551088b00d 100644 --- a/ctdb/server/ctdb_logging.c +++ b/ctdb/server/ctdb_logging.c @@ -138,38 +138,39 @@ static void ctdb_log_handler(struct event_context *ev, struct fd_event *fde, uint16_t flags, void *private) { struct ctdb_context *ctdb = talloc_get_type(private, struct ctdb_context); - int n1, n2; char *p; + int n; if (!(flags & EVENT_FD_READ)) { return; } - n1 = read(ctdb->log->pfd, &ctdb->log->buf[ctdb->log->buf_used], + n = read(ctdb->log->pfd, &ctdb->log->buf[ctdb->log->buf_used], sizeof(ctdb->log->buf) - ctdb->log->buf_used); - if (n1 > 0) { - ctdb->log->buf_used += n1; + if (n > 0) { + ctdb->log->buf_used += n; } - p = memchr(ctdb->log->buf, '\n', ctdb->log->buf_used); - if (!p) { - if (ctdb->log->buf_used == sizeof(ctdb->log->buf)) { - do_debug("%*.*s\n", - (int)ctdb->log->buf_used, (int)ctdb->log->buf_used, ctdb->log->buf); - ctdb->log->buf_used = 0; + while (ctdb->log->buf_used > 0 && + (p = memchr(ctdb->log->buf, '\n', ctdb->log->buf_used)) != NULL) { + int n1 = (p - ctdb->log->buf)+1; + int n2 = n1 - 1; + /* swallow \r from child processes */ + if (n2 > 0 && ctdb->log->buf[n2-1] == '\r') { + n2--; } - return; + do_debug("%*.*s\n", n2, n2, ctdb->log->buf); + memmove(ctdb->log->buf, p+1, sizeof(ctdb->log->buf) - n1); + ctdb->log->buf_used -= n1; } - n1 = (p - ctdb->log->buf)+1; - n2 = n1 - 1; - /* swallow \r from child processes */ - if (n2 > 0 && ctdb->log->buf[n2-1] == '\r') { - n2--; + /* the buffer could have completely filled - unfortunately we have + no choice but to dump it out straight away */ + if (ctdb->log->buf_used == sizeof(ctdb->log->buf)) { + do_debug("%*.*s\n", + (int)ctdb->log->buf_used, (int)ctdb->log->buf_used, ctdb->log->buf); + ctdb->log->buf_used = 0; } - do_debug("%*.*s\n", n2, n2, ctdb->log->buf); - memmove(ctdb->log->buf, p+1, sizeof(ctdb->log->buf) - n1); - ctdb->log->buf_used -= n1; } From 4eac51341c68ed7f07061874911fb1b784ebd39d Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Wed, 23 Jul 2008 15:35:46 +1000 Subject: [PATCH 22/90] allow for probing of directories without raising an error (This used to be ctdb commit 8fed021d11160b137f4140ea02947347250e2959) --- ctdb/config/functions | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/ctdb/config/functions b/ctdb/config/functions index d15c4b582d0..20325b1812e 100644 --- a/ctdb/config/functions +++ b/ctdb/config/functions @@ -143,6 +143,22 @@ ctdb_check_rpc() { } } +###################################################### +# check a set of directories is available +# return 0 on a missing directory +# usage: ctdb_check_directories_probe SERVICE_NAME +###################################################### +ctdb_check_directories_probe() { + service_name="$1" + shift + wait_dirs="$*" + [ -z "$wait_dirs" ] && return; + for d in $wait_dirs; do + [ -d $d ] || return 1 + done + return 0 +} + ###################################################### # check a set of directories is available # usage: ctdb_check_directories SERVICE_NAME @@ -151,13 +167,10 @@ ctdb_check_directories() { service_name="$1" shift wait_dirs="$*" - [ -z "$wait_dirs" ] && return; - for d in $wait_dirs; do - [ -d $d ] || { - echo "ERROR: $service_name directory $d not available" - exit 1 - } - done + ctdb_check_directories_probe "$service_name" $wait_dirs || { + echo "ERROR: $service_name directory $d not available" + exit 1 + } } ###################################################### From 71f0641ddabab5a87799fbf285f0328df4456031 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Wed, 23 Jul 2008 15:36:23 +1000 Subject: [PATCH 23/90] run the testparm commands in 50.samba in the background, only running in the foreground if something fails (This used to be ctdb commit b1fed105ad780e89a128a611ef0bd659818eeebf) --- ctdb/config/events.d/50.samba | 96 +++++++++++++++++++++++++++++++---- 1 file changed, 86 insertions(+), 10 deletions(-) diff --git a/ctdb/config/events.d/50.samba b/ctdb/config/events.d/50.samba index 9aa21e2ece0..498aa1704ba 100755 --- a/ctdb/config/events.d/50.samba +++ b/ctdb/config/events.d/50.samba @@ -17,10 +17,81 @@ shift SAMBA_CLEANUP_PERIOD=10 } +# we keep a cached copy of smb.conf here +smbconf_cache="$CTDB_BASE/state/samba/smb.conf.cache" + + +############################################# +# update the smb.conf cache in the foreground +testparm_foreground_update() { + mkdir -p "$CTDB_BASE/state/samba" || exit 1 + testparm -s 2> /dev/null | egrep -v 'registry.shares.=|include.=' > "$smbconf_cache" +} + +############################################# +# update the smb.conf cache in the background +testparm_background_update() { + # if the cache doesn't exist, then update in the foreground + [ -f $smbconf_cache ] || { + testparm_foreground_update + } + # otherwise do a background update + ( + tmpfile="${smbconf_cache}.$$" + testparm -s > $tmpfile 2> /dev/null & + # remember the pid of the teamparm process + pid="$!" + # give it 10 seconds to run + timeleft=10 + while [ $timeleft -gt 0 ]; do + timeleft=$(($timeleft - 1)) + # see if the process still exists + kill -0 $pid > /dev/null 2>&1 || { + # it doesn't exist, grab its exit status + wait $pid + [ $? = 0 ] || { + echo "50.samba: smb.conf background update exited with status $?" + rm -f "${tmpfile}" + exit 1 + } + # put the new smb.conf contents in the cache (atomic rename) + # make sure we remove references to the registry while doing + # this to ensure that running testparm on the cache does + # not use the registry + egrep -v 'registry.shares.=|include.=' < "$tmpfile" > "${tmpfile}.2" + rm -f "$tmpfile" + mv -f "${tmpfile}.2" "$smbconf_cache" || { + echo "50.samba: failed to update background cache" + rm -f "${tmpfile}.2" + exit 1 + } + exit 0 + } + # keep waiting for testparm to finish + sleep 1 + done + # it took more than 10 seconds - kill it off + rm -f "${tmpfile}" + kill -9 "$pid" > /dev/null 2>&1 + echo "50.samba: timed out updating smbconf cache in background" + exit 1 + ) & +} + +################################################## +# show the testparm output using a cached smb.conf +# to avoid registry access +testparm_cat() { + [ -f $smbconf_cache ] || { + testparm_foreground_update + } + testparm -s "$smbconf_cache" "$@" 2>/dev/null +} + # function to see if ctdb manages winbind check_ctdb_manages_winbind() { [ -z "$CTDB_MANAGES_WINBIND" ] && { - secmode=`testparm -s --parameter-name=security 2> /dev/null` + secmode=`testparm_cat --parameter-name=security` case $secmode in ADS|DOMAIN) CTDB_MANAGES_WINBIND="yes"; @@ -108,21 +179,26 @@ case $cmd in touch $CTDB_BASE/state/samba/periodic_cleanup } - [ "$CTDB_SAMBA_SKIP_CONF_CHECK" != "yes" ] && { - testparm -s 2>&1 | egrep '^WARNING|^ERROR|^Unknown' && { - echo "ERROR: testparm shows smb.conf is not clean" - exit 1 - } + testparm_background_update + + testparm_cat | egrep '^WARNING|^ERROR|^Unknown' && { + testparm_foreground_update + testparm_cat | egrep '^WARNING|^ERROR|^Unknown' && { + echo "ERROR: testparm shows smb.conf is not clean" + exit 1 + } } - [ "$CTDB_SAMBA_SKIP_SHARE_CHECK" != "yes" ] && { - smb_dirs=`testparm -s 2> /dev/null | egrep '^[[:space:]]*path = ' | cut -d= -f2` - ctdb_check_directories "Samba" $smb_dirs + smb_dirs=`testparm_cat | egrep '^[[:space:]]*path = ' | cut -d= -f2` + ctdb_check_directories_probe "Samba" $smb_dirs || { + testparm_foreground_update + smb_dirs=`testparm_cat | egrep '^[[:space:]]*path = ' | cut -d= -f2` + ctdb_check_directories "Samba" $smb_dirs } smb_ports="$CTDB_SAMBA_CHECK_PORTS" [ -z "$smb_ports" ] && { - smb_ports=`testparm -s --parameter-name="smb ports" 2> /dev/null` + smb_ports=`testparm_cat --parameter-name="smb ports"` } ctdb_check_tcp_ports "Samba" $smb_ports From 78beb279662b479f8d185f621ef7457b96245752 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Mon, 28 Jul 2008 17:07:44 +1000 Subject: [PATCH 24/90] From Alexander Saupp. If we use vlan tagging and bonding we must strip the vlan part off the name so we can check the main bonde device for status. I.e. check bond0 instead of bond0. (This used to be ctdb commit 795c190b004d404b84dda053593139ed51d345e5) --- ctdb/config/events.d/10.interface | 1 + 1 file changed, 1 insertion(+) diff --git a/ctdb/config/events.d/10.interface b/ctdb/config/events.d/10.interface index 8283a3f20d9..15e55618d15 100755 --- a/ctdb/config/events.d/10.interface +++ b/ctdb/config/events.d/10.interface @@ -147,6 +147,7 @@ case $cmd in for IFACE in $INTERFACES ; do case $IFACE in bond*) + IFACE=`echo $IFACE |sed -e 's/\....$//'` grep '^MII Status: up' /proc/net/bonding/$IFACE > /dev/null || { echo "ERROR: public network interface $IFACE is down" exit 1 From e8c1785f2b7314a99193e58d679980377b4f4bfe Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Mon, 28 Jul 2008 17:11:15 +1000 Subject: [PATCH 25/90] new version 1.0.51 (This used to be ctdb commit ded1a974cdd86b436c6e5cba27069d1a3796dbe9) --- ctdb/packaging/RPM/ctdb.spec | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ctdb/packaging/RPM/ctdb.spec b/ctdb/packaging/RPM/ctdb.spec index 1ac41298be8..bff072b60f8 100644 --- a/ctdb/packaging/RPM/ctdb.spec +++ b/ctdb/packaging/RPM/ctdb.spec @@ -5,7 +5,7 @@ Vendor: Samba Team Packager: Samba Team Name: ctdb Version: 1.0 -Release: 50 +Release: 51 Epoch: 0 License: GNU GPL version 3 Group: System Environment/Daemons @@ -118,6 +118,13 @@ fi %{_includedir}/ctdb_private.h %changelog +* Mon Jul 28 2008 : Version 1.0.51 + - Strip off the vlan tag from bond devices before we check in /proc + if the interface is up or not. + - Use testparm in the background in the scripts to allow probing + that the shares do exist. + - Fix a bug in the logging code to handle multiline entries better + - Rename private elements from private to private_data * Fri Jul 18 2008 : Version 1.0.50 - Dont assume that just because we can establish a TCP connection that we are actually talking to a functioning ctdb daemon. From 9c10147fc145a5512c1c6531149682e88d80d4d3 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Wed, 30 Jul 2008 13:20:24 +1000 Subject: [PATCH 26/90] - show pids during test - don't use first_time, as it is not safe for multiple clients on a node (This used to be ctdb commit 22f737be0e70fc043affaa4f953f60d852b7999a) --- ctdb/tests/ctdb_persistent.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/ctdb/tests/ctdb_persistent.c b/ctdb/tests/ctdb_persistent.c index 7bd4ab87698..c95ab9357ac 100644 --- a/ctdb/tests/ctdb_persistent.c +++ b/ctdb/tests/ctdb_persistent.c @@ -57,7 +57,7 @@ static void each_second(struct event_context *ev, struct timed_event *te, uint32_t *old_counters; - printf("Counters: "); + printf("[%4u] Counters: ", getpid()); old_counters = (uint32_t *)old_data.dptr; for (i=0;i Date: Wed, 30 Jul 2008 13:20:47 +1000 Subject: [PATCH 27/90] - cleanup persistent db at start - catch SIGINT and kill daemons (This used to be ctdb commit d3f5d75665a78ae0081fda57e58384b27a6ae396) --- ctdb/tests/fetch.sh | 2 ++ ctdb/tests/persistent.sh | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/ctdb/tests/fetch.sh b/ctdb/tests/fetch.sh index f4c820a83f1..a0753215c7f 100755 --- a/ctdb/tests/fetch.sh +++ b/ctdb/tests/fetch.sh @@ -5,6 +5,8 @@ if [ $# -gt 0 ]; then NUMNODES=$1 fi +trap 'echo "Killing test"; killall -9 -q ctdbd ctdb_fetch; exit 1' INT TERM + tests/start_daemons.sh $NUMNODES || exit 1 diff --git a/ctdb/tests/persistent.sh b/ctdb/tests/persistent.sh index 327f8bcce9a..112e9fde2a7 100755 --- a/ctdb/tests/persistent.sh +++ b/ctdb/tests/persistent.sh @@ -5,14 +5,19 @@ if [ $# -gt 0 ]; then NUMNODES=$1 fi +killall -9 -q ctdb_persistent ctdbd + +rm -rf test.db/persistent echo "Starting $NUMNODES daemons for SAFE persistent writes" tests/start_daemons.sh $NUMNODES || exit 1 -killall -9 -q ctdb_persistent +trap 'echo "Killing test"; killall -9 -q ctdbd ctdb_persistent; exit 1' INT TERM + for i in `seq 1 $NUMNODES`; do $VALGRIND bin/ctdb_persistent --timelimit 30 --socket sock.$i $* & + $VALGRIND bin/ctdb_persistent --timelimit 30 --socket sock.$i $* & done wait @@ -29,6 +34,7 @@ killall -9 -q ctdb_persistent for i in `seq 1 $NUMNODES`; do $VALGRIND bin/ctdb_persistent --unsafe-writes --timelimit 30 --socket sock.$i $* & + $VALGRIND bin/ctdb_persistent --unsafe-writes --timelimit 30 --socket sock.$i $* & done wait From bf32a8e6c3d769624c6a0be65d7756a367555c93 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Wed, 30 Jul 2008 13:21:02 +1000 Subject: [PATCH 28/90] cleanup on SIGINT (This used to be ctdb commit 008533d971aec9c28c6e4750ef4677dd943633ff) --- ctdb/tests/run_tests.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ctdb/tests/run_tests.sh b/ctdb/tests/run_tests.sh index 356a9b21a04..3e1caf43a22 100755 --- a/ctdb/tests/run_tests.sh +++ b/ctdb/tests/run_tests.sh @@ -1,5 +1,7 @@ #!/bin/sh +trap 'echo "Killing test"; killall -9 -q ctdbd; exit 1' INT TERM + tests/fetch.sh 4 || exit 1 tests/bench.sh 4 || exit 1 tests/ctdbd.sh || exit 1 From abe02328180d40a60278bd0473237f01e1fdeb14 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Wed, 30 Jul 2008 14:24:56 +1000 Subject: [PATCH 29/90] rename the structure we use for marshalling multiple records (This used to be ctdb commit 4d205476d286570a6e1f52b59af42858ce051106) --- ctdb/include/ctdb_private.h | 4 ++-- ctdb/server/ctdb_recover.c | 20 ++++++++++---------- ctdb/server/ctdb_recoverd.c | 18 +++++++++--------- ctdb/tools/ctdb_vacuum.c | 18 +++++++++--------- 4 files changed, 30 insertions(+), 30 deletions(-) diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 77d10926603..4d547782c2a 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -1028,8 +1028,8 @@ struct ctdb_control_pulldb { uint32_t lmaster; }; -/* structure used for pulldb control */ -struct ctdb_control_pulldb_reply { +/* structure used for sending lists of records */ +struct ctdb_marshall_buffer { uint32_t db_id; uint32_t count; uint8_t data[1]; diff --git a/ctdb/server/ctdb_recover.c b/ctdb/server/ctdb_recover.c index 7b8d28c58a8..2b7489849ce 100644 --- a/ctdb/server/ctdb_recover.c +++ b/ctdb/server/ctdb_recover.c @@ -258,7 +258,7 @@ int32_t ctdb_control_pull_db(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DAT struct ctdb_control_pulldb *pull; struct ctdb_db_context *ctdb_db; struct pulldb_data params; - struct ctdb_control_pulldb_reply *reply; + struct ctdb_marshall_buffer *reply; if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) { DEBUG(DEBUG_DEBUG,("rejecting ctdb_control_pull_db when not frozen\n")); @@ -273,14 +273,14 @@ int32_t ctdb_control_pull_db(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DAT return -1; } - reply = talloc_zero(outdata, struct ctdb_control_pulldb_reply); + reply = talloc_zero(outdata, struct ctdb_marshall_buffer); CTDB_NO_MEMORY(ctdb, reply); reply->db_id = pull->db_id; params.ctdb = ctdb; params.pulldata = reply; - params.len = offsetof(struct ctdb_control_pulldb_reply, data); + params.len = offsetof(struct ctdb_marshall_buffer, data); params.failed = false; if (ctdb_lock_all_databases_mark(ctdb) != 0) { @@ -308,7 +308,7 @@ int32_t ctdb_control_pull_db(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DAT */ int32_t ctdb_control_push_db(struct ctdb_context *ctdb, TDB_DATA indata) { - struct ctdb_control_pulldb_reply *reply = (struct ctdb_control_pulldb_reply *)indata.dptr; + struct ctdb_marshall_buffer *reply = (struct ctdb_marshall_buffer *)indata.dptr; struct ctdb_db_context *ctdb_db; int i, ret; struct ctdb_rec_data *rec; @@ -318,7 +318,7 @@ int32_t ctdb_control_push_db(struct ctdb_context *ctdb, TDB_DATA indata) return -1; } - if (indata.dsize < offsetof(struct ctdb_control_pulldb_reply, data)) { + if (indata.dsize < offsetof(struct ctdb_marshall_buffer, data)) { DEBUG(DEBUG_ERR,(__location__ " invalid data in pulldb reply\n")); return -1; } @@ -887,13 +887,13 @@ int32_t ctdb_control_get_reclock_file(struct ctdb_context *ctdb, TDB_DATA *outda */ int32_t ctdb_control_try_delete_records(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata) { - struct ctdb_control_pulldb_reply *reply = (struct ctdb_control_pulldb_reply *)indata.dptr; + struct ctdb_marshall_buffer *reply = (struct ctdb_marshall_buffer *)indata.dptr; struct ctdb_db_context *ctdb_db; int i; struct ctdb_rec_data *rec; - struct ctdb_control_pulldb_reply *records; + struct ctdb_marshall_buffer *records; - if (indata.dsize < offsetof(struct ctdb_control_pulldb_reply, data)) { + if (indata.dsize < offsetof(struct ctdb_marshall_buffer, data)) { DEBUG(DEBUG_ERR,(__location__ " invalid data in try_delete_records\n")); return -1; } @@ -910,9 +910,9 @@ int32_t ctdb_control_try_delete_records(struct ctdb_context *ctdb, TDB_DATA inda /* create a blob to send back the records we couldnt delete */ - records = (struct ctdb_control_pulldb_reply *) + records = (struct ctdb_marshall_buffer *) talloc_zero_size(outdata, - offsetof(struct ctdb_control_pulldb_reply, data)); + offsetof(struct ctdb_marshall_buffer, data)); if (records == NULL) { DEBUG(DEBUG_ERR,(__location__ " Out of memory\n")); return -1; diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 64a05a777e7..2bc24dd7496 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -546,9 +546,9 @@ static int pull_one_remote_database(struct ctdb_context *ctdb, uint32_t srcnode, return -1; } - reply = (struct ctdb_control_pulldb_reply *)outdata.dptr; + reply = (struct ctdb_marshall_buffer *)outdata.dptr; - if (outdata.dsize < offsetof(struct ctdb_control_pulldb_reply, data)) { + if (outdata.dsize < offsetof(struct ctdb_marshall_buffer, data)) { DEBUG(DEBUG_ERR,(__location__ " invalid data in pulldb reply\n")); talloc_free(tmp_ctx); return -1; @@ -764,7 +764,7 @@ struct vacuum_info { struct ctdb_recoverd *rec; uint32_t srcnode; struct ctdb_db_context *ctdb_db; - struct ctdb_control_pulldb_reply *recs; + struct ctdb_marshall_buffer *recs; struct ctdb_rec_data *r; }; @@ -866,7 +866,7 @@ static void vacuum_fetch_handler(struct ctdb_context *ctdb, uint64_t srvid, TDB_DATA data, void *private_data) { struct ctdb_recoverd *rec = talloc_get_type(private_data, struct ctdb_recoverd); - struct ctdb_control_pulldb_reply *recs; + struct ctdb_marshall_buffer *recs; int ret, i; TALLOC_CTX *tmp_ctx = talloc_new(ctdb); const char *name; @@ -877,7 +877,7 @@ static void vacuum_fetch_handler(struct ctdb_context *ctdb, uint64_t srvid, uint32_t srcnode; struct vacuum_info *v; - recs = (struct ctdb_control_pulldb_reply *)data.dptr; + recs = (struct ctdb_marshall_buffer *)data.dptr; r = (struct ctdb_rec_data *)&recs->data[0]; if (recs->count == 0) { @@ -1136,7 +1136,7 @@ static struct tdb_wrap *create_recdb(struct ctdb_context *ctdb, TALLOC_CTX *mem_ */ struct recdb_data { struct ctdb_context *ctdb; - struct ctdb_control_pulldb_reply *recdata; + struct ctdb_marshall_buffer *recdata; uint32_t len; bool failed; }; @@ -1184,7 +1184,7 @@ static int push_recdb_database(struct ctdb_context *ctdb, uint32_t dbid, struct tdb_wrap *recdb, struct ctdb_node_map *nodemap) { struct recdb_data params; - struct ctdb_control_pulldb_reply *recdata; + struct ctdb_marshall_buffer *recdata; TDB_DATA outdata; TALLOC_CTX *tmp_ctx; uint32_t *nodes; @@ -1192,14 +1192,14 @@ static int push_recdb_database(struct ctdb_context *ctdb, uint32_t dbid, tmp_ctx = talloc_new(ctdb); CTDB_NO_MEMORY(ctdb, tmp_ctx); - recdata = talloc_zero(recdb, struct ctdb_control_pulldb_reply); + recdata = talloc_zero(recdb, struct ctdb_marshall_buffer); CTDB_NO_MEMORY(ctdb, recdata); recdata->db_id = dbid; params.ctdb = ctdb; params.recdata = recdata; - params.len = offsetof(struct ctdb_control_pulldb_reply, data); + params.len = offsetof(struct ctdb_marshall_buffer, data); params.failed = false; if (tdb_traverse_read(recdb->tdb, traverse_recdb, ¶ms) == -1) { diff --git a/ctdb/tools/ctdb_vacuum.c b/ctdb/tools/ctdb_vacuum.c index 60a0b0a3ee7..f517c6ea719 100644 --- a/ctdb/tools/ctdb_vacuum.c +++ b/ctdb/tools/ctdb_vacuum.c @@ -39,7 +39,7 @@ struct vacuum_data { struct ctdb_db_context *ctdb_db; trbt_tree_t *delete_tree; uint32_t delete_count; - struct ctdb_control_pulldb_reply **list; + struct ctdb_marshall_buffer **list; bool traverse_error; uint32_t total; }; @@ -153,7 +153,7 @@ static int vacuum_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, } struct delete_records_list { - struct ctdb_control_pulldb_reply *records; + struct ctdb_marshall_buffer *records; }; /* @@ -269,16 +269,16 @@ static int ctdb_vacuum_db(struct ctdb_context *ctdb, uint32_t db_id, struct ctdb vdata->ctdb_db = ctdb_db; /* the list needs to be of length num_nodes */ - vdata->list = talloc_array(vdata, struct ctdb_control_pulldb_reply *, ctdb->vnn_map->size); + vdata->list = talloc_array(vdata, struct ctdb_marshall_buffer *, ctdb->vnn_map->size); if (vdata->list == NULL) { DEBUG(DEBUG_ERR,(__location__ " Out of memory\n")); talloc_free(vdata); return -1; } for (i=0;ivnn_map->size;i++) { - vdata->list[i] = (struct ctdb_control_pulldb_reply *) + vdata->list[i] = (struct ctdb_marshall_buffer *) talloc_zero_size(vdata->list, - offsetof(struct ctdb_control_pulldb_reply, data)); + offsetof(struct ctdb_marshall_buffer, data)); if (vdata->list[i] == NULL) { DEBUG(DEBUG_ERR,(__location__ " Out of memory\n")); talloc_free(vdata); @@ -332,9 +332,9 @@ static int ctdb_vacuum_db(struct ctdb_context *ctdb, uint32_t db_id, struct ctdb DEBUG(DEBUG_ERR,(__location__ " Out of memory\n")); return -1; } - recs->records = (struct ctdb_control_pulldb_reply *) + recs->records = (struct ctdb_marshall_buffer *) talloc_zero_size(vdata, - offsetof(struct ctdb_control_pulldb_reply, data)); + offsetof(struct ctdb_marshall_buffer, data)); if (recs->records == NULL) { DEBUG(DEBUG_ERR,(__location__ " Out of memory\n")); return -1; @@ -353,7 +353,7 @@ static int ctdb_vacuum_db(struct ctdb_context *ctdb, uint32_t db_id, struct ctdb (if possible) */ for (i=0;ivnn_map->size;i++) { - struct ctdb_control_pulldb_reply *records; + struct ctdb_marshall_buffer *records; struct ctdb_rec_data *rec; if (ctdb->vnn_map->map[i] == ctdb->pnn) { @@ -375,7 +375,7 @@ static int ctdb_vacuum_db(struct ctdb_context *ctdb, uint32_t db_id, struct ctdb /* outdata countains the list of records coming back from the node which the node could not delete */ - records = (struct ctdb_control_pulldb_reply *)outdata.dptr; + records = (struct ctdb_marshall_buffer *)outdata.dptr; rec = (struct ctdb_rec_data *)&records->data[0]; while (records->count-- > 1) { TDB_DATA reckey, recdata; From bc1aed395c368473c9095b38247afb2f207d5c25 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Wed, 30 Jul 2008 19:55:54 +1000 Subject: [PATCH 30/90] added a new persistent transaction test program (This used to be ctdb commit d5dcb46e182466e4b51c106f2491178c23babd8d) --- ctdb/Makefile.in | 7 +- ctdb/tests/ctdb_transaction.c | 258 ++++++++++++++++++++++++++++++++++ ctdb/tests/transaction.sh | 28 ++++ 3 files changed, 292 insertions(+), 1 deletion(-) create mode 100644 ctdb/tests/ctdb_transaction.c create mode 100755 ctdb/tests/transaction.sh diff --git a/ctdb/Makefile.in b/ctdb/Makefile.in index 9b5ebe02889..cf1240b1f71 100755 --- a/ctdb/Makefile.in +++ b/ctdb/Makefile.in @@ -56,7 +56,8 @@ CTDB_SERVER_OBJ = server/ctdbd.o server/ctdb_daemon.o server/ctdb_lockwait.o \ server/ctdb_keepalive.o server/ctdb_logging.o server/ctdb_uptime.c \ $(CTDB_CLIENT_OBJ) $(CTDB_TCP_OBJ) @INFINIBAND_WRAPPER_OBJ@ -TEST_BINS=bin/ctdb_bench bin/ctdb_fetch bin/ctdb_store bin/ctdb_randrec bin/ctdb_persistent bin/ctdb_traverse bin/rb_test \ +TEST_BINS=bin/ctdb_bench bin/ctdb_fetch bin/ctdb_store bin/ctdb_randrec bin/ctdb_persistent \ + bin/ctdb_traverse bin/rb_test bin/ctdb_transaction \ @INFINIBAND_BINS@ BINS = bin/ctdb @CTDB_SCSI_IO@ bin/ctdb_ipmux bin/smnotify @@ -141,6 +142,10 @@ bin/ctdb_persistent: $(CTDB_CLIENT_OBJ) tests/ctdb_persistent.o @echo Linking $@ @$(CC) $(CFLAGS) -o $@ tests/ctdb_persistent.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS) +bin/ctdb_transaction: $(CTDB_CLIENT_OBJ) tests/ctdb_transaction.o + @echo Linking $@ + @$(CC) $(CFLAGS) -o $@ tests/ctdb_transaction.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS) + bin/ibwrapper_test: $(CTDB_CLIENT_OBJ) ib/ibwrapper_test.o @echo Linking $@ @$(CC) $(CFLAGS) -o $@ ib/ibwrapper_test.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS) diff --git a/ctdb/tests/ctdb_transaction.c b/ctdb/tests/ctdb_transaction.c new file mode 100644 index 00000000000..783fa123fdf --- /dev/null +++ b/ctdb/tests/ctdb_transaction.c @@ -0,0 +1,258 @@ +/* + simple tool to test persistent databases + + Copyright (C) Andrew Tridgell 2006-2007 + Copyright (c) Ronnie sahlberg 2007 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see . +*/ + +#include "includes.h" +#include "lib/events/events.h" +#include "system/filesys.h" +#include "popt.h" +#include "cmdline.h" + +#include +#include + +static struct timeval tp1,tp2; + +static void start_timer(void) +{ + gettimeofday(&tp1,NULL); +} + +static double end_timer(void) +{ + gettimeofday(&tp2,NULL); + return (tp2.tv_sec + (tp2.tv_usec*1.0e-6)) - + (tp1.tv_sec + (tp1.tv_usec*1.0e-6)); +} + +static int timelimit = 10; + +static unsigned int pnn; + +static TDB_DATA old_data; + +static int success = true; + +static void each_second(struct event_context *ev, struct timed_event *te, + struct timeval t, void *private_data) +{ + struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context); + int i; + uint32_t *old_counters; + + + printf("[%4u] Counters: ", getpid()); + old_counters = (uint32_t *)old_data.dptr; + for (i=0;i Date: Wed, 30 Jul 2008 19:57:00 +1000 Subject: [PATCH 31/90] added new multi-record transaction commit code (This used to be ctdb commit 9ff3380099fe6f4d39de126db0826971a10ee692) --- ctdb/include/ctdb_private.h | 19 ++- ctdb/server/ctdb_control.c | 9 ++ ctdb/server/ctdb_persistent.c | 214 ++++++++++++++++++++++++++-------- 3 files changed, 189 insertions(+), 53 deletions(-) diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 4d547782c2a..79046aa7eed 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -430,7 +430,6 @@ struct ctdb_db_context { struct ctdb_registered_call *calls; /* list of registered calls */ uint32_t seqnum; struct timed_event *te; - uint32_t client_tdb_flags; }; @@ -547,6 +546,9 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0, CTDB_CONTROL_GET_CAPABILITIES = 80, CTDB_CONTROL_START_PERSISTENT_UPDATE = 81, CTDB_CONTROL_CANCEL_PERSISTENT_UPDATE= 82, + CTDB_CONTROL_TRANS2_COMMIT = 83, + CTDB_CONTROL_TRANS2_FINISHED = 84, + CTDB_CONTROL_TRANS2_ERROR = 85, }; /* @@ -813,8 +815,6 @@ int ctdb_ltdb_fetch(struct ctdb_db_context *ctdb_db, TALLOC_CTX *mem_ctx, TDB_DATA *data); int ctdb_ltdb_store(struct ctdb_db_context *ctdb_db, TDB_DATA key, struct ctdb_ltdb_header *header, TDB_DATA data); -int ctdb_ltdb_persistent_store(struct ctdb_db_context *ctdb_db, TDB_DATA key, - struct ctdb_ltdb_header *header, TDB_DATA data); int32_t ctdb_control_start_persistent_update(struct ctdb_context *ctdb, struct ctdb_req_control *c, TDB_DATA recdata); @@ -1121,6 +1121,11 @@ int32_t ctdb_ltdb_set_seqnum_frequency(struct ctdb_context *ctdb, uint32_t frequ struct ctdb_rec_data *ctdb_marshall_record(TALLOC_CTX *mem_ctx, uint32_t reqid, TDB_DATA key, struct ctdb_ltdb_header *, TDB_DATA data); +struct ctdb_rec_data *ctdb_marshall_loop_next(struct ctdb_marshall_buffer *m, struct ctdb_rec_data *r, + uint32_t *reqid, + struct ctdb_ltdb_header *header, + TDB_DATA *key, TDB_DATA *data); + int32_t ctdb_control_pull_db(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata); int32_t ctdb_control_push_db(struct ctdb_context *ctdb, TDB_DATA indata); int32_t ctdb_control_set_dmaster(struct ctdb_context *ctdb, TDB_DATA indata); @@ -1308,6 +1313,9 @@ int32_t ctdb_control_persistent_store(struct ctdb_context *ctdb, int32_t ctdb_control_update_record(struct ctdb_context *ctdb, struct ctdb_req_control *c, TDB_DATA recdata, bool *async_reply); +int32_t ctdb_control_trans2_commit(struct ctdb_context *ctdb, + struct ctdb_req_control *c, + TDB_DATA recdata, bool *async_reply); int32_t ctdb_control_transaction_start(struct ctdb_context *ctdb, uint32_t id); int32_t ctdb_control_transaction_commit(struct ctdb_context *ctdb, uint32_t id); @@ -1353,4 +1361,9 @@ int ctdb_control_reload_nodes_file(struct ctdb_context *ctdb, uint32_t opcode); int32_t ctdb_dump_memory(struct ctdb_context *ctdb, TDB_DATA *outdata); int32_t ctdb_control_get_capabilities(struct ctdb_context *ctdb, TDB_DATA *outdata); +int32_t ctdb_control_trans2_finished(struct ctdb_context *ctdb, + struct ctdb_req_control *c); +int32_t ctdb_control_trans2_error(struct ctdb_context *ctdb, + struct ctdb_req_control *c); + #endif diff --git a/ctdb/server/ctdb_control.c b/ctdb/server/ctdb_control.c index 1ae8e85cacf..3d223299914 100644 --- a/ctdb/server/ctdb_control.c +++ b/ctdb/server/ctdb_control.c @@ -400,6 +400,15 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb, case CTDB_CONTROL_CANCEL_PERSISTENT_UPDATE: return ctdb_control_cancel_persistent_update(ctdb, c, indata); + case CTDB_CONTROL_TRANS2_COMMIT: + return ctdb_control_trans2_commit(ctdb, c, indata, async_reply); + + case CTDB_CONTROL_TRANS2_ERROR: + return ctdb_control_trans2_error(ctdb, c); + + case CTDB_CONTROL_TRANS2_FINISHED: + return ctdb_control_trans2_finished(ctdb, c); + default: DEBUG(DEBUG_CRIT,(__location__ " Unknown CTDB control opcode %u\n", opcode)); return -1; diff --git a/ctdb/server/ctdb_persistent.c b/ctdb/server/ctdb_persistent.c index 455ccba4b05..81c623999a8 100644 --- a/ctdb/server/ctdb_persistent.c +++ b/ctdb/server/ctdb_persistent.c @@ -50,6 +50,8 @@ static void ctdb_persistent_callback(struct ctdb_context *ctdb, status, errormsg)); state->status = status; state->errormsg = errormsg; + DEBUG(DEBUG_ERR,(__location__ " Forcing recovery\n")); + ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE; } state->num_pending--; if (state->num_pending == 0) { @@ -67,19 +69,21 @@ static void ctdb_persistent_store_timeout(struct event_context *ev, struct timed struct ctdb_persistent_state *state = talloc_get_type(private_data, struct ctdb_persistent_state); ctdb_request_control_reply(state->ctdb, state->c, NULL, -1, "timeout in ctdb_persistent_state"); + DEBUG(DEBUG_ERR,(__location__ " Forcing recovery\n")); + state->ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE; talloc_free(state); } /* - store a persistent record - called from a ctdb client when it has updated - a record in a persistent database. The client will have the record + store a set of persistent records - called from a ctdb client when it has updated + some records in a persistent database. The client will have the record locked for the duration of this call. The client is the dmaster when this call is made */ -int32_t ctdb_control_persistent_store(struct ctdb_context *ctdb, - struct ctdb_req_control *c, - TDB_DATA recdata, bool *async_reply) +int32_t ctdb_control_trans2_commit(struct ctdb_context *ctdb, + struct ctdb_req_control *c, + TDB_DATA recdata, bool *async_reply) { struct ctdb_client *client = ctdb_reqid_find(ctdb, c->client_id, struct ctdb_client); struct ctdb_persistent_state *state; @@ -89,8 +93,26 @@ int32_t ctdb_control_persistent_store(struct ctdb_context *ctdb, DEBUG(DEBUG_ERR,(__location__ " can not match persistent_store to a client. Returning error\n")); return -1; } - if (client->num_persistent_updates > 0) { - client->num_persistent_updates--; + + /* handling num_persistent_updates is a bit strange - + there are 3 cases + 1) very old clients, which never called CTDB_CONTROL_START_PERSISTENT_UPDATE + They don't expect num_persistent_updates to be used at all + + 2) less old clients, which uses CTDB_CONTROL_START_PERSISTENT_UPDATE, and expected + this commit to then decrement it + + 3) new clients which use TRANS2 commit functions, and + expect this function to increment the counter, and + then have it decremented in ctdb_control_trans2_error + or ctdb_control_trans2_finished + */ + if (c->opcode == CTDB_CONTROL_PERSISTENT_STORE) { + if (client->num_persistent_updates > 0) { + client->num_persistent_updates--; + } + } else { + client->num_persistent_updates++; } state = talloc_zero(ctdb, struct ctdb_persistent_state); @@ -147,10 +169,7 @@ int32_t ctdb_control_persistent_store(struct ctdb_context *ctdb, struct ctdb_persistent_write_state { struct ctdb_db_context *ctdb_db; - TDB_DATA key; - TDB_DATA data; - struct ctdb_ltdb_header *header; - struct tdb_context *tdb; + struct ctdb_marshall_buffer *m; struct ctdb_req_control *c; }; @@ -160,32 +179,65 @@ struct ctdb_persistent_write_state { */ static int ctdb_persistent_store(struct ctdb_persistent_write_state *state) { - struct ctdb_ltdb_header oldheader; - int ret; + int ret, i; + struct ctdb_rec_data *rec = NULL; + struct ctdb_marshall_buffer *m = state->m; - /* fetch the old header and ensure the rsn is less than the new rsn */ - ret = ctdb_ltdb_fetch(state->ctdb_db, state->key, &oldheader, NULL, NULL); - if (ret != 0) { - DEBUG(DEBUG_ERR,("Failed to fetch old record for db_id 0x%08x in ctdb_persistent_store\n", - state->ctdb_db->db_id)); + ret = tdb_transaction_start(state->ctdb_db->ltdb->tdb); + if (ret == -1) { + DEBUG(DEBUG_ERR,("Failed to start transaction for db_id 0x%08x in ctdb_persistent_store\n", + state->ctdb_db->db_id)); return -1; } - if (oldheader.rsn >= state->header->rsn) { - DEBUG(DEBUG_CRIT,("existing header for db_id 0x%08x has larger RSN %llu than new RSN %llu in ctdb_persistent_store\n", - state->ctdb_db->db_id, - (unsigned long long)oldheader.rsn, (unsigned long long)state->header->rsn)); - return -1; + for (i=0;icount;i++) { + struct ctdb_ltdb_header oldheader; + struct ctdb_ltdb_header header; + TDB_DATA key, data; + + rec = ctdb_marshall_loop_next(m, rec, NULL, &header, &key, &data); + + if (rec == NULL) { + DEBUG(DEBUG_ERR,("Failed to get next record %d for db_id 0x%08x in ctdb_persistent_store\n", + i, state->ctdb_db->db_id)); + goto failed; + } + + /* fetch the old header and ensure the rsn is less than the new rsn */ + ret = ctdb_ltdb_fetch(state->ctdb_db, key, &oldheader, NULL, NULL); + if (ret != 0) { + DEBUG(DEBUG_ERR,("Failed to fetch old record for db_id 0x%08x in ctdb_persistent_store\n", + state->ctdb_db->db_id)); + goto failed; + } + + if (oldheader.rsn >= header.rsn) { + DEBUG(DEBUG_CRIT,("existing header for db_id 0x%08x has larger RSN %llu than new RSN %llu in ctdb_persistent_store\n", + state->ctdb_db->db_id, + (unsigned long long)oldheader.rsn, (unsigned long long)header.rsn)); + goto failed; + } + + ret = ctdb_ltdb_store(state->ctdb_db, key, &header, data); + if (ret != 0) { + DEBUG(DEBUG_CRIT,("Failed to store record for db_id 0x%08x in ctdb_persistent_store\n", + state->ctdb_db->db_id)); + return -1; + } } - ret = ctdb_ltdb_persistent_store(state->ctdb_db, state->key, state->header, state->data); - if (ret != 0) { - DEBUG(DEBUG_CRIT,("Failed to store record for db_id 0x%08x in ctdb_persistent_store\n", - state->ctdb_db->db_id)); + ret = tdb_transaction_commit(state->ctdb_db->ltdb->tdb); + if (ret == -1) { + DEBUG(DEBUG_ERR,("Failed to commit transaction for db_id 0x%08x in ctdb_persistent_store\n", + state->ctdb_db->db_id)); return -1; } return 0; + +failed: + tdb_transaction_cancel(state->ctdb_db->ltdb->tdb); + return -1; } @@ -357,20 +409,19 @@ int32_t ctdb_control_update_record(struct ctdb_context *ctdb, struct ctdb_req_control *c, TDB_DATA recdata, bool *async_reply) { - struct ctdb_rec_data *rec = (struct ctdb_rec_data *)&recdata.dptr[0]; struct ctdb_db_context *ctdb_db; - uint32_t db_id = rec->reqid; struct ctdb_persistent_write_state *state; struct childwrite_handle *handle; + struct ctdb_marshall_buffer *m = (struct ctdb_marshall_buffer *)recdata.dptr; if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) { DEBUG(DEBUG_DEBUG,("rejecting ctdb_control_update_record when recovery active\n")); return -1; } - ctdb_db = find_ctdb_db(ctdb, db_id); + ctdb_db = find_ctdb_db(ctdb, m->db_id); if (ctdb_db == NULL) { - DEBUG(DEBUG_ERR,("Unknown database 0x%08x in ctdb_control_update_record\n", db_id)); + DEBUG(DEBUG_ERR,("Unknown database 0x%08x in ctdb_control_update_record\n", m->db_id)); return -1; } @@ -379,23 +430,7 @@ int32_t ctdb_control_update_record(struct ctdb_context *ctdb, state->ctdb_db = ctdb_db; state->c = c; - state->tdb = ctdb_db->ltdb->tdb; - state->key.dptr = &rec->data[0]; - state->key.dsize = rec->keylen; - state->data.dptr = &rec->data[rec->keylen]; - state->data.dsize = rec->datalen; - - if (state->data.dsize < sizeof(struct ctdb_ltdb_header)) { - DEBUG(DEBUG_CRIT,("Invalid data size %u in ctdb_control_update_record\n", - (unsigned)state->data.dsize)); - talloc_free(state); - return -1; - } - - state->header = (struct ctdb_ltdb_header *)&state->data.dptr[0]; - state->data.dptr += sizeof(struct ctdb_ltdb_header); - state->data.dsize -= sizeof(struct ctdb_ltdb_header); - + state->m = m; /* create a child process to take out a transaction and write the data. @@ -421,8 +456,49 @@ int32_t ctdb_control_update_record(struct ctdb_context *ctdb, } +/* + called when a client has finished a local commit in a transaction to + a persistent database + */ +int32_t ctdb_control_trans2_finished(struct ctdb_context *ctdb, + struct ctdb_req_control *c) +{ + struct ctdb_client *client = ctdb_reqid_find(ctdb, c->client_id, struct ctdb_client); + + if (client->num_persistent_updates == 0) { + DEBUG(DEBUG_ERR, (__location__ " ERROR: num_persistent_updates == 0\n")); + return -1; + } + client->num_persistent_updates--; + + return 0; +} /* + called when a client gets an error committing its database + during a transaction commit + */ +int32_t ctdb_control_trans2_error(struct ctdb_context *ctdb, + struct ctdb_req_control *c) +{ + struct ctdb_client *client = ctdb_reqid_find(ctdb, c->client_id, struct ctdb_client); + + if (client->num_persistent_updates == 0) { + DEBUG(DEBUG_ERR, (__location__ " ERROR: num_persistent_updates == 0\n")); + return -1; + } + client->num_persistent_updates--; + + DEBUG(DEBUG_ERR,(__location__ " Forcing recovery\n")); + client->ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE; + + return 0; +} + + +/* + backwards compatibility: + start a persistent store operation. passing both the key, header and data to the daemon. If the client disconnects before it has issued a persistent_update call to the daemon we trigger a full recovery @@ -445,9 +521,14 @@ int32_t ctdb_control_start_persistent_update(struct ctdb_context *ctdb, return 0; } +/* + backwards compatibility: + + called to tell ctdbd that it is no longer doing a persistent update +*/ int32_t ctdb_control_cancel_persistent_update(struct ctdb_context *ctdb, - struct ctdb_req_control *c, - TDB_DATA recdata) + struct ctdb_req_control *c, + TDB_DATA recdata) { struct ctdb_client *client = ctdb_reqid_find(ctdb, c->client_id, struct ctdb_client); @@ -462,3 +543,36 @@ int32_t ctdb_control_cancel_persistent_update(struct ctdb_context *ctdb, return 0; } + + +/* + backwards compatibility: + + single record varient of ctdb_control_trans2_commit for older clients + */ +int32_t ctdb_control_persistent_store(struct ctdb_context *ctdb, + struct ctdb_req_control *c, + TDB_DATA recdata, bool *async_reply) +{ + struct ctdb_marshall_buffer *m; + struct ctdb_rec_data *rec = (struct ctdb_rec_data *)recdata.dptr; + TDB_DATA key, data; + + if (recdata.dsize != offsetof(struct ctdb_rec_data, data) + + rec->keylen + rec->datalen) { + DEBUG(DEBUG_ERR, (__location__ " Bad data size in recdata\n")); + return -1; + } + + key.dptr = &rec->data[0]; + key.dsize = rec->keylen; + data.dptr = &rec->data[rec->keylen]; + data.dsize = rec->datalen; + + m = ctdb_marshall_add(c, NULL, rec->reqid, rec->reqid, key, NULL, data); + CTDB_NO_MEMORY(ctdb, m); + + return ctdb_control_trans2_commit(ctdb, c, ctdb_marshall_finish(m), async_reply); +} + + From dfd3c530806da164088eb2c000db53cde9614992 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Wed, 30 Jul 2008 19:57:48 +1000 Subject: [PATCH 32/90] added client side functions for new transaction code (This used to be ctdb commit aab710f1c6bcdfd8ff2992f8adc15933276dc39e) --- ctdb/client/ctdb_client.c | 197 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) diff --git a/ctdb/client/ctdb_client.c b/ctdb/client/ctdb_client.c index 544f5d1c383..5004a69b0e9 100644 --- a/ctdb/client/ctdb_client.c +++ b/ctdb/client/ctdb_client.c @@ -1787,6 +1787,11 @@ static void traverse_handler(struct ctdb_context *ctdb, uint64_t srvid, TDB_DATA return; } + if (data.dsize == sizeof(struct ctdb_ltdb_header)) { + /* empty records are deleted records in ctdb */ + return; + } + if (state->fn(ctdb, key, data, state->private_data) != 0) { state->done = True; } @@ -2948,3 +2953,195 @@ int ctdb_ctrl_getcapabilities(struct ctdb_context *ctdb, struct timeval timeout, talloc_free(tmp_ctx); return ret; } + +struct ctdb_transaction_handle { + struct ctdb_db_context *ctdb_db; + struct ctdb_marshall_buffer *m; +}; + +/* start a transaction on a database */ +static int ctdb_transaction_destructor(struct ctdb_transaction_handle *h) +{ + tdb_transaction_cancel(h->ctdb_db->ltdb->tdb); + return 0; +} + +/* start a transaction on a database */ +struct ctdb_transaction_handle *ctdb_transaction_start(struct ctdb_db_context *ctdb_db, + TALLOC_CTX *mem_ctx) +{ + struct ctdb_record_handle *rh; + struct ctdb_transaction_handle *h; + TDB_DATA key; + struct ctdb_ltdb_header header; + TALLOC_CTX *tmp_ctx; + const char *keyname = CTDB_TRANSACTION_LOCK_KEY; + int ret; + + key.dptr = discard_const(keyname); + key.dsize = strlen(keyname); + + if (!ctdb_db->persistent) { + DEBUG(DEBUG_ERR,(__location__ " Attempted transaction on non-persistent database\n")); + return NULL; + } + +again: + tmp_ctx = talloc_new(mem_ctx); + + rh = ctdb_fetch_lock(ctdb_db, tmp_ctx, key, NULL); + if (rh == NULL) { + DEBUG(DEBUG_ERR,(__location__ " Failed to fetch_lock database\n")); + talloc_free(tmp_ctx); + return NULL; + } + talloc_free(rh); + + ret = tdb_transaction_start(ctdb_db->ltdb->tdb); + if (ret != 0) { + DEBUG(DEBUG_ERR,(__location__ " Failed to start tdb transaction\n")); + talloc_free(tmp_ctx); + return NULL; + } + + ret = ctdb_ltdb_fetch(ctdb_db, key, &header, tmp_ctx, NULL); + if (ret != 0 || header.dmaster != ctdb_db->ctdb->pnn) { + tdb_transaction_cancel(ctdb_db->ltdb->tdb); + talloc_free(tmp_ctx); + goto again; + } + + talloc_free(tmp_ctx); + + /* we have a good transaction */ + h = talloc_zero(mem_ctx, struct ctdb_transaction_handle); + if (h == NULL) { + tdb_transaction_cancel(ctdb_db->ltdb->tdb); + DEBUG(DEBUG_ERR,(__location__ " oom for transaction handle\n")); + return NULL; + } + + h->ctdb_db = ctdb_db; + + talloc_set_destructor(h, ctdb_transaction_destructor); + + return h; +} + + + +/* + fetch a record inside a transaction + */ +int ctdb_transaction_fetch(struct ctdb_transaction_handle *h, + TALLOC_CTX *mem_ctx, + TDB_DATA key, TDB_DATA *data) +{ + struct ctdb_ltdb_header header; + int ret; + + ZERO_STRUCT(header); + + ret = ctdb_ltdb_fetch(h->ctdb_db, key, &header, mem_ctx, data); + if (ret == -1 && header.dmaster == (uint32_t)-1) { + /* record doesn't exist yet */ + *data = tdb_null; + return 0; + } + return ret; +} + +/* + stores a record inside a transaction + */ +int ctdb_transaction_store(struct ctdb_transaction_handle *h, + TDB_DATA key, TDB_DATA data) +{ + TALLOC_CTX *tmp_ctx = talloc_new(h); + struct ctdb_ltdb_header header; + int ret; + + ZERO_STRUCT(header); + + /* we need the header so we can update the RSN */ + ret = ctdb_ltdb_fetch(h->ctdb_db, key, &header, tmp_ctx, NULL); + if (ret == -1 && header.dmaster == (uint32_t)-1) { + /* the record doesn't exist - create one with us as dmaster. + This is only safe because we are in a transaction and this + is a persistent database */ + header.dmaster = h->ctdb_db->ctdb->pnn; + header.rsn = 0; + } else if (ret != 0) { + DEBUG(DEBUG_ERR,(__location__ " Failed to fetch record\n")); + talloc_free(tmp_ctx); + return ret; + } + + header.rsn++; + + h->m = ctdb_marshall_add(h, h->m, h->ctdb_db->db_id, 0, key, &header, data); + if (h->m == NULL) { + DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n")); + talloc_free(tmp_ctx); + return -1; + } + + ret = ctdb_ltdb_store(h->ctdb_db, key, &header, data); + + talloc_free(tmp_ctx); + + return ret; +} + +/* + commit a transaction + */ +int ctdb_transaction_commit(struct ctdb_transaction_handle *h) +{ + int ret; + int32_t status; + struct ctdb_context *ctdb = h->ctdb_db->ctdb; + + talloc_set_destructor(h, NULL); + + if (h->m == NULL) { + /* no changes were made */ + talloc_free(h); + return 0; + } + + /* tell ctdbd to commit to the other nodes */ + ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id, + CTDB_CONTROL_TRANS2_COMMIT, 0, + ctdb_marshall_finish(h->m), NULL, NULL, &status, NULL, NULL); + if (ret != 0 || status != 0) { + DEBUG(DEBUG_ERR,(__location__ " Control failed for remote transaction commit\n")); + talloc_free(h); + return -1; + } + + /* do the real commit locally */ + ret = tdb_transaction_commit(h->ctdb_db->ltdb->tdb); + if (ret != 0) { + DEBUG(DEBUG_ERR,(__location__ " Failed to commit transaction\n")); + ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id, + CTDB_CONTROL_TRANS2_ERROR, CTDB_CTRL_FLAG_NOREPLY, + tdb_null, NULL, NULL, NULL, NULL, NULL); + talloc_free(h); + return ret; + } + + /* tell ctdbd that we are finished with our local commit */ + ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id, + CTDB_CONTROL_TRANS2_FINISHED, 0, + tdb_null, NULL, NULL, &status, NULL, NULL); + if (ret != 0 || status != 0) { + DEBUG(DEBUG_ERR,(__location__ " Control failed to finish transaction commit\n")); + talloc_free(h); + return -1; + } + + + talloc_free(h); + return 0; +} From 5672c421d1aab60c987cc594b01d3ba47139635b Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Wed, 30 Jul 2008 19:58:03 +1000 Subject: [PATCH 33/90] we don't need ctdb_ltdb_persistent_store() any more (This used to be ctdb commit 2bc7f3aef4668bd1680db87ef215c349280a84f2) --- ctdb/common/ctdb_ltdb.c | 59 ----------------------------------------- 1 file changed, 59 deletions(-) diff --git a/ctdb/common/ctdb_ltdb.c b/ctdb/common/ctdb_ltdb.c index f3604584fbe..12fcf52d959 100644 --- a/ctdb/common/ctdb_ltdb.c +++ b/ctdb/common/ctdb_ltdb.c @@ -157,65 +157,6 @@ int ctdb_ltdb_store(struct ctdb_db_context *ctdb_db, TDB_DATA key, return ret; } -/* - write a record to a persistent database - this is done by a child process -*/ -int ctdb_ltdb_persistent_store(struct ctdb_db_context *ctdb_db, TDB_DATA key, - struct ctdb_ltdb_header *header, TDB_DATA data) -{ - struct ctdb_context *ctdb = ctdb_db->ctdb; - TDB_DATA rec; - int ret; - - if (ctdb->flags & CTDB_FLAG_TORTURE) { - struct ctdb_ltdb_header *h2; - rec = tdb_fetch(ctdb_db->ltdb->tdb, key); - h2 = (struct ctdb_ltdb_header *)rec.dptr; - if (rec.dptr && rec.dsize >= sizeof(h2) && h2->rsn > header->rsn) { - DEBUG(DEBUG_CRIT,("RSN regression! %llu %llu\n", - (unsigned long long)h2->rsn, (unsigned long long)header->rsn)); - } - if (rec.dptr) free(rec.dptr); - } - - rec.dsize = sizeof(*header) + data.dsize; - rec.dptr = talloc_size(ctdb, rec.dsize); - CTDB_NO_MEMORY(ctdb, rec.dptr); - - memcpy(rec.dptr, header, sizeof(*header)); - memcpy(rec.dptr + sizeof(*header), data.dptr, data.dsize); - - /* if this is a persistent database without NOSYNC then we - will do this via a transaction */ - if (!(ctdb_db->client_tdb_flags & TDB_NOSYNC)) { - ret = tdb_transaction_start(ctdb_db->ltdb->tdb); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Failed to start local transaction\n")); - goto failed; - } - ret = tdb_store(ctdb_db->ltdb->tdb, key, rec, TDB_REPLACE); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Failed to store persistent data\n")); - tdb_transaction_cancel(ctdb_db->ltdb->tdb); - goto failed; - } - ret = tdb_transaction_commit(ctdb_db->ltdb->tdb); - if (ret != 0) { - DEBUG(DEBUG_ERR, (__location__ " Failed to commit persistent store transaction.\n")); - tdb_transaction_cancel(ctdb_db->ltdb->tdb); - goto failed; - } - } else { - ret = tdb_store(ctdb_db->ltdb->tdb, key, rec, TDB_REPLACE); - } - -failed: - talloc_free(rec.dptr); - - return ret; -} - /* lock a record in the ltdb, given a key */ From b8e93a9233da5e356f105567c69e0a6cde063a86 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Wed, 30 Jul 2008 19:58:17 +1000 Subject: [PATCH 34/90] added marshalling helper functions (This used to be ctdb commit 12087e7d751a8756076662cd8db5dcf35316c0c5) --- ctdb/common/ctdb_util.c | 100 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/ctdb/common/ctdb_util.c b/ctdb/common/ctdb_util.c index 1b0988d3a74..b096a52946b 100644 --- a/ctdb/common/ctdb_util.c +++ b/ctdb/common/ctdb_util.c @@ -200,6 +200,106 @@ struct ctdb_rec_data *ctdb_marshall_record(TALLOC_CTX *mem_ctx, uint32_t reqid, return d; } + +/* helper function for marshalling multiple records */ +struct ctdb_marshall_buffer *ctdb_marshall_add(TALLOC_CTX *mem_ctx, + struct ctdb_marshall_buffer *m, + uint64_t db_id, + uint32_t reqid, + TDB_DATA key, + struct ctdb_ltdb_header *header, + TDB_DATA data) +{ + struct ctdb_rec_data *r; + size_t m_size, r_size; + struct ctdb_marshall_buffer *m2; + + r = ctdb_marshall_record(mem_ctx, reqid, key, header, data); + if (r == NULL) { + talloc_free(m); + return NULL; + } + + if (m == NULL) { + m = talloc_zero_size(mem_ctx, offsetof(struct ctdb_marshall_buffer, data)); + if (m == NULL) { + return NULL; + } + m->db_id = db_id; + } + + m_size = talloc_get_size(m); + r_size = talloc_get_size(r); + + m2 = talloc_realloc_size(mem_ctx, m, m_size + r_size); + if (m2 == NULL) { + talloc_free(m); + return NULL; + } + + memcpy(m_size + (uint8_t *)m2, r, r_size); + + talloc_free(r); + + m2->count++; + + return m2; +} + +/* we've finished marshalling, return a data blob with the marshalled records */ +TDB_DATA ctdb_marshall_finish(struct ctdb_marshall_buffer *m) +{ + TDB_DATA data; + data.dptr = (uint8_t *)m; + data.dsize = talloc_get_size(m); + return data; +} + +/* + loop over a marshalling buffer + + - pass r==NULL to start + - loop the number of times indicated by m->count +*/ +struct ctdb_rec_data *ctdb_marshall_loop_next(struct ctdb_marshall_buffer *m, struct ctdb_rec_data *r, + uint32_t *reqid, + struct ctdb_ltdb_header *header, + TDB_DATA *key, TDB_DATA *data) +{ + if (r == NULL) { + r = (struct ctdb_rec_data *)&m->data[0]; + } else { + r = (struct ctdb_rec_data *)(r->length + (uint8_t *)r); + } + + if (reqid != NULL) { + *reqid = r->reqid; + } + + if (key != NULL) { + key->dptr = &r->data[0]; + key->dsize = r->keylen; + } + if (data != NULL) { + data->dptr = &r->data[r->keylen]; + data->dsize = r->datalen; + if (header != NULL) { + data->dptr += sizeof(*header); + data->dsize -= sizeof(*header); + } + } + + if (header != NULL) { + if (r->datalen < sizeof(*header)) { + return NULL; + } + *header = *(struct ctdb_ltdb_header *)&r->data[r->keylen]; + } + + return r; +} + + #if HAVE_SCHED_H #include #endif From 237e2f5409f0de2e3ab916072cdff7eedad882ee Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Wed, 30 Jul 2008 19:58:27 +1000 Subject: [PATCH 35/90] new prototypes (This used to be ctdb commit 71d9d24abae62f70acbd7c1ded8af0b817607c2a) --- ctdb/include/ctdb.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/ctdb/include/ctdb.h b/ctdb/include/ctdb.h index 985b78f7d50..c6f257433b3 100644 --- a/ctdb/include/ctdb.h +++ b/ctdb/include/ctdb.h @@ -106,6 +106,8 @@ struct ctdb_call_info { /* send a broadcast to all connected nodes */ #define CTDB_BROADCAST_CONNECTED 0xF0000004 +/* the key used for transaction locking on persistent databases */ +#define CTDB_TRANSACTION_LOCK_KEY "__transaction_lock__" enum control_state {CTDB_CONTROL_WAIT, CTDB_CONTROL_DONE, CTDB_CONTROL_ERROR, CTDB_CONTROL_TIMEOUT}; @@ -546,4 +548,23 @@ struct ctdb_client_control_state *ctdb_ctrl_getcapabilities_send(struct ctdb_con int ctdb_ctrl_getcapabilities_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *capabilities); +struct ctdb_marshall_buffer *ctdb_marshall_add(TALLOC_CTX *mem_ctx, + struct ctdb_marshall_buffer *m, + uint64_t db_id, + uint32_t reqid, + TDB_DATA key, + struct ctdb_ltdb_header *header, + TDB_DATA data); +TDB_DATA ctdb_marshall_finish(struct ctdb_marshall_buffer *m); + +struct ctdb_transaction_handle *ctdb_transaction_start(struct ctdb_db_context *ctdb_db, + TALLOC_CTX *mem_ctx); +int ctdb_transaction_fetch(struct ctdb_transaction_handle *h, + TALLOC_CTX *mem_ctx, + TDB_DATA key, TDB_DATA *data); +int ctdb_transaction_store(struct ctdb_transaction_handle *h, + TDB_DATA key, TDB_DATA data); +int ctdb_transaction_commit(struct ctdb_transaction_handle *h); + + #endif From ca3eaf87e1129a8ee3160a064a3fec39e8883774 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Wed, 30 Jul 2008 19:58:49 +1000 Subject: [PATCH 36/90] make sure we honor the TDB_NOSYNC flag from clients in the server (This used to be ctdb commit 9806d18b93218c216d538e28f9ed495269f0a938) --- ctdb/server/ctdb_ltdb_server.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ctdb/server/ctdb_ltdb_server.c b/ctdb/server/ctdb_ltdb_server.c index a6ad4eefe78..35212506df8 100644 --- a/ctdb/server/ctdb_ltdb_server.c +++ b/ctdb/server/ctdb_ltdb_server.c @@ -330,7 +330,7 @@ int32_t ctdb_control_db_attach(struct ctdb_context *ctdb, TDB_DATA indata, if (db) { outdata->dptr = (uint8_t *)&db->db_id; outdata->dsize = sizeof(db->db_id); - db->client_tdb_flags |= tdb_flags; + tdb_add_flags(db->ltdb->tdb, tdb_flags); return 0; } @@ -345,7 +345,7 @@ int32_t ctdb_control_db_attach(struct ctdb_context *ctdb, TDB_DATA indata, } /* remember the flags the client has specified */ - db->client_tdb_flags = tdb_flags; + tdb_add_flags(db->ltdb->tdb, tdb_flags); outdata->dptr = (uint8_t *)&db->db_id; outdata->dsize = sizeof(db->db_id); From cf739ac892d92be149095eee25109f379fb03c02 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Wed, 30 Jul 2008 19:59:18 +1000 Subject: [PATCH 37/90] renamed the pulldb structure to a ctdb_marshall_buffer (This used to be ctdb commit bad53b2d342bb9760497e6f4a61e64ca50d6e771) --- ctdb/server/ctdb_recover.c | 2 +- ctdb/server/ctdb_recoverd.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ctdb/server/ctdb_recover.c b/ctdb/server/ctdb_recover.c index 2b7489849ce..ead04ed0f21 100644 --- a/ctdb/server/ctdb_recover.c +++ b/ctdb/server/ctdb_recover.c @@ -219,7 +219,7 @@ ctdb_control_reload_nodes_file(struct ctdb_context *ctdb, uint32_t opcode) */ struct pulldb_data { struct ctdb_context *ctdb; - struct ctdb_control_pulldb_reply *pulldata; + struct ctdb_marshall_buffer *pulldata; uint32_t len; bool failed; }; diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 2bc24dd7496..2e29feaa279 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -533,7 +533,7 @@ static int pull_one_remote_database(struct ctdb_context *ctdb, uint32_t srcnode, { int ret; TDB_DATA outdata; - struct ctdb_control_pulldb_reply *reply; + struct ctdb_marshall_buffer *reply; struct ctdb_rec_data *rec; int i; TALLOC_CTX *tmp_ctx = talloc_new(recdb); From 5e969d7a6c6deb5ab9ea0f4a532296ccfe34d327 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Wed, 30 Jul 2008 19:59:34 +1000 Subject: [PATCH 38/90] cleanup of the old persistent db test (This used to be ctdb commit 306af4ccef132ea023f1f01e11f877a3a742ee4c) --- ctdb/tests/ctdb_persistent.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/ctdb/tests/ctdb_persistent.c b/ctdb/tests/ctdb_persistent.c index c95ab9357ac..8f0452abad4 100644 --- a/ctdb/tests/ctdb_persistent.c +++ b/ctdb/tests/ctdb_persistent.c @@ -96,12 +96,9 @@ static void check_counters(struct ctdb_context *ctdb, TDB_DATA data) static void test_store_records(struct ctdb_context *ctdb, struct event_context *ev) { - TDB_DATA key, data; + TDB_DATA key; struct ctdb_db_context *ctdb_db; - TALLOC_CTX *tmp_ctx = talloc_new(ctdb); - int ret; - struct ctdb_record_handle *h; - uint32_t *counters; + ctdb_db = ctdb_db_handle(ctdb, "persistent.tdb"); key.dptr = discard_const("testkey"); @@ -109,6 +106,12 @@ static void test_store_records(struct ctdb_context *ctdb, struct event_context * start_timer(); while (end_timer() < timelimit) { + TDB_DATA data; + TALLOC_CTX *tmp_ctx = talloc_new(ctdb); + struct ctdb_record_handle *h; + int ret; + uint32_t *counters; + h = ctdb_fetch_lock(ctdb_db, tmp_ctx, key, &data); if (h == NULL) { printf("Failed to fetch record '%s' on node %d\n", @@ -150,9 +153,9 @@ static void test_store_records(struct ctdb_context *ctdb, struct event_context * } talloc_free(h); + talloc_free(tmp_ctx); } - talloc_free(tmp_ctx); } /* From 446295f2573f098a3e9d1018050eb68b8f0c9b0b Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Wed, 30 Jul 2008 19:59:42 +1000 Subject: [PATCH 39/90] fixed a warning (This used to be ctdb commit 3a71844cfdb9fe69208030432ddb547b0e215726) --- ctdb/tests/ctdb_traverse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ctdb/tests/ctdb_traverse.c b/ctdb/tests/ctdb_traverse.c index f5ca7159c40..1726b235d43 100644 --- a/ctdb/tests/ctdb_traverse.c +++ b/ctdb/tests/ctdb_traverse.c @@ -27,7 +27,7 @@ #include #include -static char *dbname = "test.tdb"; +static const char *dbname = "test.tdb"; static int traverse_callback(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA data, void *private_data) { From ae2b9ee17fd4fed9c755802751d780ab98767786 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Wed, 30 Jul 2008 19:59:54 +1000 Subject: [PATCH 40/90] fixed some warnings (This used to be ctdb commit fe55bfc8fb6dc628f72f220843e829a251d09936) --- ctdb/tests/rb_test.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/ctdb/tests/rb_test.c b/ctdb/tests/rb_test.c index 536b31c6d64..553a0f6c488 100644 --- a/ctdb/tests/rb_test.c +++ b/ctdb/tests/rb_test.c @@ -188,61 +188,61 @@ int main(int argc, const char *argv[]) trbt_insertarray32_callback(tree, 3, key1, callback, u32array[0]); data = trbt_lookuparray32(tree, 3, key1); - printf("key1 dataptr:0x%08x == %d\n",(int)data,data?*data:-1); + printf("key1 dataptr:%p == %d\n",data,data?*data:-1); data = trbt_lookuparray32(tree, 3, key2); - printf("key2 dataptr:0x%08x == %d\n",(int)data,data?*data:-1); + printf("key2 dataptr:%p == %d\n",data,data?*data:-1); data = trbt_lookuparray32(tree, 3, key3); - printf("key3 dataptr:0x%08x == %d\n",(int)data,data?*data:-1); + printf("key3 dataptr:%p == %d\n",data,data?*data:-1); data = trbt_lookuparray32(tree, 3, key4); - printf("key4 dataptr:0x%08x == %d\n",(int)data,data?*data:-1); + printf("key4 dataptr:%p == %d\n",data,data?*data:-1); trbt_traversearray32(tree, 3, traverse, NULL); printf("\ndeleting key4\n"); talloc_free(trbt_lookuparray32(tree, 3, key4)); data = trbt_lookuparray32(tree, 3, key1); - printf("key1 dataptr:0x%08x == %d\n",(int)data,data?*data:-1); + printf("key1 dataptr:%p == %d\n",data,data?*data:-1); data = trbt_lookuparray32(tree, 3, key2); - printf("key2 dataptr:0x%08x == %d\n",(int)data,data?*data:-1); + printf("key2 dataptr:%p == %d\n",data,data?*data:-1); data = trbt_lookuparray32(tree, 3, key3); - printf("key3 dataptr:0x%08x == %d\n",(int)data,data?*data:-1); + printf("key3 dataptr:%p == %d\n",data,data?*data:-1); data = trbt_lookuparray32(tree, 3, key4); - printf("key4 dataptr:0x%08x == %d\n",(int)data,data?*data:-1); + printf("key4 dataptr:%p == %d\n",data,data?*data:-1); trbt_traversearray32(tree, 3, traverse, NULL); printf("\ndeleting key2\n"); talloc_free(trbt_lookuparray32(tree, 3, key2)); data = trbt_lookuparray32(tree, 3, key1); - printf("key1 dataptr:0x%08x == %d\n",(int)data,data?*data:-1); + printf("key1 dataptr:%p == %d\n",data,data?*data:-1); data = trbt_lookuparray32(tree, 3, key2); - printf("key2 dataptr:0x%08x == %d\n",(int)data,data?*data:-1); + printf("key2 dataptr:%p == %d\n",data,data?*data:-1); data = trbt_lookuparray32(tree, 3, key3); - printf("key3 dataptr:0x%08x == %d\n",(int)data,data?*data:-1); + printf("key3 dataptr:%p == %d\n",data,data?*data:-1); data = trbt_lookuparray32(tree, 3, key4); - printf("key4 dataptr:0x%08x == %d\n",(int)data,data?*data:-1); + printf("key4 dataptr:%p == %d\n",data,data?*data:-1); trbt_traversearray32(tree, 3, traverse, NULL); printf("\ndeleting key3\n"); talloc_free(trbt_lookuparray32(tree, 3, key3)); data = trbt_lookuparray32(tree, 3, key1); - printf("key1 dataptr:0x%08x == %d\n",(int)data,data?*data:-1); + printf("key1 dataptr:%p == %d\n",data,data?*data:-1); data = trbt_lookuparray32(tree, 3, key2); - printf("key2 dataptr:0x%08x == %d\n",(int)data,data?*data:-1); + printf("key2 dataptr:%p == %d\n",data,data?*data:-1); data = trbt_lookuparray32(tree, 3, key3); - printf("key3 dataptr:0x%08x == %d\n",(int)data,data?*data:-1); + printf("key3 dataptr:%p == %d\n",data,data?*data:-1); data = trbt_lookuparray32(tree, 3, key4); - printf("key4 dataptr:0x%08x == %d\n",(int)data,data?*data:-1); + printf("key4 dataptr:%p == %d\n",data,data?*data:-1); trbt_traversearray32(tree, 3, traverse, NULL); printf("\ndeleting key1\n"); talloc_free(trbt_lookuparray32(tree, 3, key1)); data = trbt_lookuparray32(tree, 3, key1); - printf("key1 dataptr:0x%08x == %d\n",(int)data,data?*data:-1); + printf("key1 dataptr:%p == %d\n",data,data?*data:-1); data = trbt_lookuparray32(tree, 3, key2); - printf("key2 dataptr:0x%08x == %d\n",(int)data,data?*data:-1); + printf("key2 dataptr:%p == %d\n",data,data?*data:-1); data = trbt_lookuparray32(tree, 3, key3); - printf("key3 dataptr:0x%08x == %d\n",(int)data,data?*data:-1); + printf("key3 dataptr:%p == %d\n",data,data?*data:-1); data = trbt_lookuparray32(tree, 3, key4); - printf("key4 dataptr:0x%08x == %d\n",(int)data,data?*data:-1); + printf("key4 dataptr:%p == %d\n",data,data?*data:-1); trbt_traversearray32(tree, 3, traverse, NULL); talloc_free(tree); From d47fe5f83babab6ca7b0f85020c90f859a0ee338 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Fri, 1 Aug 2008 14:17:50 +1000 Subject: [PATCH 41/90] ensure we use killtcp on non-NFS/non-CIFS ports for faster failover of other protocols (This used to be ctdb commit aefcb1f817581ac8cd67712d07159fc802f96623) --- ctdb/config/functions | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ctdb/config/functions b/ctdb/config/functions index 20325b1812e..5df121f7310 100644 --- a/ctdb/config/functions +++ b/ctdb/config/functions @@ -229,8 +229,16 @@ kill_tcp_connections() { srcport=`echo $src | cut -d: -f2` destip=`echo $dest | cut -d: -f1` destport=`echo $dest | cut -d: -f2` - ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1 echo "Killing TCP connection $srcip:$srcport $destip:$destport" + ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1 + case $destport in + # we only do one-way killtcp for NFS and CIFS + 139|445|2049) : ;; + # for all others we do 2-way + *) + ctdb killtcp $destip:$destport $srcip:$srcport >/dev/null 2>&1 || _failed=1 + ;; + esac _killcount=`expr $_killcount + 1` done < $connfile /bin/rm -f $connfile From 8d76f55bfc0806bf339c548e53fd1a142b464e5c Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Fri, 1 Aug 2008 14:23:15 +1000 Subject: [PATCH 42/90] we need an additional gratuitous arp before the NFS tickles (This used to be ctdb commit f7a70a5f9043b1d7293a515abf5b5228365693da) --- ctdb/config/events.d/61.nfstickle | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ctdb/config/events.d/61.nfstickle b/ctdb/config/events.d/61.nfstickle index 53355951f54..5fa62065bd4 100755 --- a/ctdb/config/events.d/61.nfstickle +++ b/ctdb/config/events.d/61.nfstickle @@ -26,6 +26,9 @@ case $cmd in iface=$1 ip=$2 shopt -s nullglob + # first send a grat arp, to ensure the client knows the updated + # mac address for this IP + ctdb gratiousarp $ip $iface # send tickle acks for all the connections the old server had for f in $NFS_TICKLE_SHARED_DIRECTORY/*/$ip; do cat $f | while read dest; do From 78acc59784522f52fe75b9a6dd7784595035a93e Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Mon, 4 Aug 2008 14:51:51 +1000 Subject: [PATCH 43/90] implemented replayable transactions in ctdb to prevent deadlock (This used to be ctdb commit b6d9a0396fb4b325778d3810dc656f719f31b9f1) --- ctdb/client/ctdb_client.c | 176 ++++++++++++++++++++++++++++------ ctdb/server/ctdb_persistent.c | 6 +- 2 files changed, 146 insertions(+), 36 deletions(-) diff --git a/ctdb/client/ctdb_client.c b/ctdb/client/ctdb_client.c index 5004a69b0e9..48eb19d969e 100644 --- a/ctdb/client/ctdb_client.c +++ b/ctdb/client/ctdb_client.c @@ -2956,7 +2956,12 @@ int ctdb_ctrl_getcapabilities(struct ctdb_context *ctdb, struct timeval timeout, struct ctdb_transaction_handle { struct ctdb_db_context *ctdb_db; - struct ctdb_marshall_buffer *m; + bool in_replay; + /* we store the reads and writes done under a transaction one + list stores both reads and writes, the other just writes + */ + struct ctdb_marshall_buffer *m_all; + struct ctdb_marshall_buffer *m_write; }; /* start a transaction on a database */ @@ -2967,33 +2972,32 @@ static int ctdb_transaction_destructor(struct ctdb_transaction_handle *h) } /* start a transaction on a database */ -struct ctdb_transaction_handle *ctdb_transaction_start(struct ctdb_db_context *ctdb_db, - TALLOC_CTX *mem_ctx) +static int ctdb_transaction_fetch_start(struct ctdb_transaction_handle *h) { struct ctdb_record_handle *rh; - struct ctdb_transaction_handle *h; TDB_DATA key; struct ctdb_ltdb_header header; TALLOC_CTX *tmp_ctx; const char *keyname = CTDB_TRANSACTION_LOCK_KEY; int ret; + struct ctdb_db_context *ctdb_db = h->ctdb_db; key.dptr = discard_const(keyname); key.dsize = strlen(keyname); if (!ctdb_db->persistent) { DEBUG(DEBUG_ERR,(__location__ " Attempted transaction on non-persistent database\n")); - return NULL; + return -1; } again: - tmp_ctx = talloc_new(mem_ctx); + tmp_ctx = talloc_new(h); rh = ctdb_fetch_lock(ctdb_db, tmp_ctx, key, NULL); if (rh == NULL) { DEBUG(DEBUG_ERR,(__location__ " Failed to fetch_lock database\n")); talloc_free(tmp_ctx); - return NULL; + return -1; } talloc_free(rh); @@ -3001,7 +3005,7 @@ again: if (ret != 0) { DEBUG(DEBUG_ERR,(__location__ " Failed to start tdb transaction\n")); talloc_free(tmp_ctx); - return NULL; + return -1; } ret = ctdb_ltdb_fetch(ctdb_db, key, &header, tmp_ctx, NULL); @@ -3013,16 +3017,32 @@ again: talloc_free(tmp_ctx); + return 0; +} + + +/* start a transaction on a database */ +struct ctdb_transaction_handle *ctdb_transaction_start(struct ctdb_db_context *ctdb_db, + TALLOC_CTX *mem_ctx) +{ + struct ctdb_transaction_handle *h; + int ret; + /* we have a good transaction */ h = talloc_zero(mem_ctx, struct ctdb_transaction_handle); if (h == NULL) { - tdb_transaction_cancel(ctdb_db->ltdb->tdb); DEBUG(DEBUG_ERR,(__location__ " oom for transaction handle\n")); return NULL; } h->ctdb_db = ctdb_db; + ret = ctdb_transaction_fetch_start(h); + if (ret != 0) { + talloc_free(h); + return NULL; + } + talloc_set_destructor(h, ctdb_transaction_destructor); return h; @@ -3046,9 +3066,22 @@ int ctdb_transaction_fetch(struct ctdb_transaction_handle *h, if (ret == -1 && header.dmaster == (uint32_t)-1) { /* record doesn't exist yet */ *data = tdb_null; - return 0; + ret = 0; } - return ret; + + if (ret != 0) { + return ret; + } + + if (!h->in_replay) { + h->m_all = ctdb_marshall_add(h, h->m_all, h->ctdb_db->db_id, 1, key, NULL, *data); + if (h->m_all == NULL) { + DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n")); + return -1; + } + } + + return 0; } /* @@ -3079,11 +3112,20 @@ int ctdb_transaction_store(struct ctdb_transaction_handle *h, header.rsn++; - h->m = ctdb_marshall_add(h, h->m, h->ctdb_db->db_id, 0, key, &header, data); - if (h->m == NULL) { - DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n")); - talloc_free(tmp_ctx); - return -1; + if (!h->in_replay) { + h->m_all = ctdb_marshall_add(h, h->m_all, h->ctdb_db->db_id, 0, key, NULL, data); + if (h->m_all == NULL) { + DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n")); + talloc_free(tmp_ctx); + return -1; + } + + h->m_write = ctdb_marshall_add(h, h->m_write, h->ctdb_db->db_id, 0, key, &header, data); + if (h->m_write == NULL) { + DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n")); + talloc_free(tmp_ctx); + return -1; + } } ret = ctdb_ltdb_store(h->ctdb_db, key, &header, data); @@ -3093,6 +3135,61 @@ int ctdb_transaction_store(struct ctdb_transaction_handle *h, return ret; } +/* + replay a transaction + */ +static int ctdb_replay_transaction(struct ctdb_transaction_handle *h) +{ + int ret, i; + struct ctdb_rec_data *rec = NULL; + + h->in_replay = true; + + ret = ctdb_transaction_fetch_start(h); + if (ret != 0) { + return ret; + } + + for (i=0;im_all->count;i++) { + TDB_DATA key, data; + + rec = ctdb_marshall_loop_next(h->m_all, rec, NULL, NULL, &key, &data); + if (rec == NULL) { + DEBUG(DEBUG_ERR, (__location__ " Out of records in ctdb_replay_transaction?\n")); + goto failed; + } + + if (rec->reqid == 0) { + /* its a store */ + if (ctdb_transaction_store(h, key, data) != 0) { + goto failed; + } + } else { + TDB_DATA data2; + TALLOC_CTX *tmp_ctx = talloc_new(h); + + if (ctdb_transaction_fetch(h, tmp_ctx, key, &data2) != 0) { + talloc_free(tmp_ctx); + goto failed; + } + if (data2.dsize != data.dsize || + memcmp(data2.dptr, data.dptr, data.dsize) != 0) { + /* the record has changed on us - we have to give up */ + talloc_free(tmp_ctx); + goto failed; + } + talloc_free(tmp_ctx); + } + } + + return 0; + +failed: + tdb_transaction_cancel(h->ctdb_db->ltdb->tdb); + return -1; +} + + /* commit a transaction */ @@ -3101,23 +3198,47 @@ int ctdb_transaction_commit(struct ctdb_transaction_handle *h) int ret; int32_t status; struct ctdb_context *ctdb = h->ctdb_db->ctdb; + struct timeval timeout; talloc_set_destructor(h, NULL); - if (h->m == NULL) { + if (h->m_write == NULL) { /* no changes were made */ talloc_free(h); return 0; } + /* our commit strategy is quite complex. + + - we first try to commit the changes to all other nodes + + - if that works, then we commit locally and we are done + + - if a commit on another node fails, then we need to cancel + the transaction, then restart the transaction (thus + opening a window of time for a pending recovery to + complete), then replay the transaction, checking all the + reads and writes (checking that reads give the same data, + and writes succeed). Then we retry the transaction to the + other nodes + */ + +again: /* tell ctdbd to commit to the other nodes */ + timeout = timeval_current_ofs(1, 0); ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id, CTDB_CONTROL_TRANS2_COMMIT, 0, - ctdb_marshall_finish(h->m), NULL, NULL, &status, NULL, NULL); + ctdb_marshall_finish(h->m_write), NULL, NULL, &status, + &timeout, NULL); if (ret != 0 || status != 0) { - DEBUG(DEBUG_ERR,(__location__ " Control failed for remote transaction commit\n")); - talloc_free(h); - return -1; + tdb_transaction_cancel(h->ctdb_db->ltdb->tdb); + sleep(1); + if (ctdb_replay_transaction(h) != 0) { + DEBUG(DEBUG_ERR,(__location__ " Failed to replay transaction\n")); + talloc_free(h); + return -1; + } + goto again; } /* do the real commit locally */ @@ -3132,16 +3253,9 @@ int ctdb_transaction_commit(struct ctdb_transaction_handle *h) } /* tell ctdbd that we are finished with our local commit */ - ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id, - CTDB_CONTROL_TRANS2_FINISHED, 0, - tdb_null, NULL, NULL, &status, NULL, NULL); - if (ret != 0 || status != 0) { - DEBUG(DEBUG_ERR,(__location__ " Control failed to finish transaction commit\n")); - talloc_free(h); - return -1; - } - - + ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id, + CTDB_CONTROL_TRANS2_FINISHED, CTDB_CTRL_FLAG_NOREPLY, + tdb_null, NULL, NULL, &status, NULL, NULL); talloc_free(h); return 0; } diff --git a/ctdb/server/ctdb_persistent.c b/ctdb/server/ctdb_persistent.c index 81c623999a8..77cff9c6e32 100644 --- a/ctdb/server/ctdb_persistent.c +++ b/ctdb/server/ctdb_persistent.c @@ -50,8 +50,6 @@ static void ctdb_persistent_callback(struct ctdb_context *ctdb, status, errormsg)); state->status = status; state->errormsg = errormsg; - DEBUG(DEBUG_ERR,(__location__ " Forcing recovery\n")); - ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE; } state->num_pending--; if (state->num_pending == 0) { @@ -69,8 +67,6 @@ static void ctdb_persistent_store_timeout(struct event_context *ev, struct timed struct ctdb_persistent_state *state = talloc_get_type(private_data, struct ctdb_persistent_state); ctdb_request_control_reply(state->ctdb, state->c, NULL, -1, "timeout in ctdb_persistent_state"); - DEBUG(DEBUG_ERR,(__location__ " Forcing recovery\n")); - state->ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE; talloc_free(state); } @@ -415,7 +411,7 @@ int32_t ctdb_control_update_record(struct ctdb_context *ctdb, struct ctdb_marshall_buffer *m = (struct ctdb_marshall_buffer *)recdata.dptr; if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) { - DEBUG(DEBUG_DEBUG,("rejecting ctdb_control_update_record when recovery active\n")); + DEBUG(DEBUG_INFO,("rejecting ctdb_control_update_record when recovery active\n")); return -1; } From eaaa6e7774e3e465e85a791e3fd7d8a8283746b9 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Fri, 1 Aug 2008 14:17:50 +1000 Subject: [PATCH 44/90] ensure we use killtcp on non-NFS/non-CIFS ports for faster failover of other protocols (This used to be ctdb commit 09aa91224fe7b835dc0a2c58868cce28ce54809f) --- ctdb/config/functions | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ctdb/config/functions b/ctdb/config/functions index 20325b1812e..5df121f7310 100644 --- a/ctdb/config/functions +++ b/ctdb/config/functions @@ -229,8 +229,16 @@ kill_tcp_connections() { srcport=`echo $src | cut -d: -f2` destip=`echo $dest | cut -d: -f1` destport=`echo $dest | cut -d: -f2` - ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1 echo "Killing TCP connection $srcip:$srcport $destip:$destport" + ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1 + case $destport in + # we only do one-way killtcp for NFS and CIFS + 139|445|2049) : ;; + # for all others we do 2-way + *) + ctdb killtcp $destip:$destport $srcip:$srcport >/dev/null 2>&1 || _failed=1 + ;; + esac _killcount=`expr $_killcount + 1` done < $connfile /bin/rm -f $connfile From 5ac8bf15e20fbdbe7769fd30045ec1b71f2ef431 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Fri, 1 Aug 2008 14:23:15 +1000 Subject: [PATCH 45/90] we need an additional gratuitous arp before the NFS tickles (This used to be ctdb commit d1d48f8661d83b01de6c552ee70021acdc6384cb) --- ctdb/config/events.d/61.nfstickle | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ctdb/config/events.d/61.nfstickle b/ctdb/config/events.d/61.nfstickle index 53355951f54..5fa62065bd4 100755 --- a/ctdb/config/events.d/61.nfstickle +++ b/ctdb/config/events.d/61.nfstickle @@ -26,6 +26,9 @@ case $cmd in iface=$1 ip=$2 shopt -s nullglob + # first send a grat arp, to ensure the client knows the updated + # mac address for this IP + ctdb gratiousarp $ip $iface # send tickle acks for all the connections the old server had for f in $NFS_TICKLE_SHARED_DIRECTORY/*/$ip; do cat $f | while read dest; do From bb154c8de6801a23e84088462c3c65d3b7e24103 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Mon, 4 Aug 2008 14:58:52 +1000 Subject: [PATCH 46/90] New version 1.0.52 Signed-off-by: Ronnie Sahlberg (This used to be ctdb commit a9bce1ac9794f108825190948f404c864a533435) --- ctdb/packaging/RPM/ctdb.spec | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ctdb/packaging/RPM/ctdb.spec b/ctdb/packaging/RPM/ctdb.spec index bff072b60f8..13fae3d28f2 100644 --- a/ctdb/packaging/RPM/ctdb.spec +++ b/ctdb/packaging/RPM/ctdb.spec @@ -5,7 +5,7 @@ Vendor: Samba Team Packager: Samba Team Name: ctdb Version: 1.0 -Release: 51 +Release: 52 Epoch: 0 License: GNU GPL version 3 Group: System Environment/Daemons @@ -118,6 +118,12 @@ fi %{_includedir}/ctdb_private.h %changelog +* Mon Aug 4 2008 : Version 1.0.52 + - Send an explicit gratious arp when starting sending the tcp tickles. + - When doing failover, issue a killtcp to non-NFS/non-CIFS clients + so that they fail quickly. NFS and CIFS already fail and recover + quickly. + - Update the test scripts to handle CTRL-C to kill off the test. * Mon Jul 28 2008 : Version 1.0.51 - Strip off the vlan tag from bond devices before we check in /proc if the interface is up or not. From b9d8bb23af8abefb2d967e9b4e9d6e60c4a3b520 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 6 Aug 2008 11:52:26 +1000 Subject: [PATCH 47/90] remove the reclock file we store pnn counts in. This file creates additional locking stress on the backend filesystem and we may not need it anyway. (This used to be ctdb commit 84236e03e40bcf46fa634d106903277c149a734f) --- ctdb/client/ctdb_client.c | 23 ------ ctdb/include/ctdb_private.h | 3 +- ctdb/server/ctdb_control.c | 4 - ctdb/server/ctdb_recover.c | 16 ---- ctdb/server/ctdb_recoverd.c | 152 ------------------------------------ ctdb/tools/ctdb.c | 55 ------------- 6 files changed, 1 insertion(+), 252 deletions(-) diff --git a/ctdb/client/ctdb_client.c b/ctdb/client/ctdb_client.c index 48eb19d969e..d06f8961df2 100644 --- a/ctdb/client/ctdb_client.c +++ b/ctdb/client/ctdb_client.c @@ -1231,29 +1231,6 @@ int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, struct timeval timeout, uint32 return 0; } -/* - get the reclock filename - */ -int ctdb_ctrl_getreclock(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, - TALLOC_CTX *mem_ctx, const char **reclock) -{ - int ret; - TDB_DATA outdata; - int32_t res; - - ret = ctdb_control(ctdb, destnode, 0, - CTDB_CONTROL_GET_RECLOCK_FILE, 0, tdb_null, - mem_ctx, &outdata, &res, &timeout, NULL); - if (ret != 0 || res != 0) { - DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getreclock failed\n")); - return -1; - } - - *reclock = (const char *)talloc_steal(mem_ctx, outdata.dptr); - - return 0; -} - /* get a list of nodes (vnn and flags ) from a remote node */ diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 79046aa7eed..ff4d271794b 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -536,7 +536,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0, CTDB_CONTROL_START_RECOVERY = 70, CTDB_CONTROL_END_RECOVERY = 71, CTDB_CONTROL_RELOAD_NODES_FILE = 72, - CTDB_CONTROL_GET_RECLOCK_FILE = 73, + /* #73 removed */ CTDB_CONTROL_TRY_DELETE_RECORDS = 74, CTDB_CONTROL_ENABLE_MONITOR = 75, CTDB_CONTROL_DISABLE_MONITOR = 76, @@ -1240,7 +1240,6 @@ int32_t ctdb_control_get_tunable(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata); int32_t ctdb_control_set_tunable(struct ctdb_context *ctdb, TDB_DATA indata); int32_t ctdb_control_list_tunables(struct ctdb_context *ctdb, TDB_DATA *outdata); -int32_t ctdb_control_get_reclock_file(struct ctdb_context *ctdb, TDB_DATA *outdata); int32_t ctdb_control_try_delete_records(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata); int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata); int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata); diff --git a/ctdb/server/ctdb_control.c b/ctdb/server/ctdb_control.c index 3d223299914..59b0657f898 100644 --- a/ctdb/server/ctdb_control.c +++ b/ctdb/server/ctdb_control.c @@ -378,10 +378,6 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb, case CTDB_CONTROL_END_RECOVERY: return ctdb_control_end_recovery(ctdb, c, async_reply); - case CTDB_CONTROL_GET_RECLOCK_FILE: - CHECK_CONTROL_DATA_SIZE(0); - return ctdb_control_get_reclock_file(ctdb, outdata); - case CTDB_CONTROL_TRY_DELETE_RECORDS: return ctdb_control_try_delete_records(ctdb, indata, outdata); diff --git a/ctdb/server/ctdb_recover.c b/ctdb/server/ctdb_recover.c index ead04ed0f21..f0b97acdeed 100644 --- a/ctdb/server/ctdb_recover.c +++ b/ctdb/server/ctdb_recover.c @@ -865,22 +865,6 @@ int32_t ctdb_control_start_recovery(struct ctdb_context *ctdb, return 0; } -/* - report the location for the reclock file - */ -int32_t ctdb_control_get_reclock_file(struct ctdb_context *ctdb, TDB_DATA *outdata) -{ - char *reclock = NULL; - - reclock = talloc_strdup(outdata, ctdb->recovery_lock_file); - CTDB_NO_MEMORY(ctdb, reclock); - - outdata->dsize = strlen(reclock)+1; - outdata->dptr = (uint8_t *)reclock; - - return 0; -} - /* try to delete all these records as part of the vacuuming process and return the records we failed to delete diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 2e29feaa279..329522991d3 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -41,7 +41,6 @@ struct ban_state { */ struct ctdb_recoverd { struct ctdb_context *ctdb; - int rec_file_fd; uint32_t recmaster; uint32_t num_active; uint32_t num_connected; @@ -2143,148 +2142,6 @@ static enum monitor_result verify_recmaster(struct ctdb_recoverd *rec, struct ct return status; } -/* - this function writes the number of connected nodes we have for this pnn - to the pnn slot in the reclock file -*/ -static void -ctdb_recoverd_write_pnn_connect_count(struct ctdb_recoverd *rec) -{ - const char count = rec->num_connected; - struct ctdb_context *ctdb = talloc_get_type(rec->ctdb, struct ctdb_context); - - if (rec->rec_file_fd == -1) { - DEBUG(DEBUG_CRIT,(__location__ " Unable to write pnn count. pnnfile is not open.\n")); - return; - } - - if (pwrite(rec->rec_file_fd, &count, 1, ctdb->pnn) == -1) { - DEBUG(DEBUG_CRIT, (__location__ " Failed to write pnn count\n")); - close(rec->rec_file_fd); - rec->rec_file_fd = -1; - } -} - -/* - this function opens the reclock file and sets a byterage lock for the single - byte at position pnn+1. - the existence/non-existence of such a lock provides an alternative mechanism - to know whether a remote node(recovery daemon) is running or not. -*/ -static void -ctdb_recoverd_get_pnn_lock(struct ctdb_recoverd *rec) -{ - struct ctdb_context *ctdb = talloc_get_type(rec->ctdb, struct ctdb_context); - struct flock lock; - char *pnnfile = NULL; - - DEBUG(DEBUG_INFO, ("Setting PNN lock for pnn:%d\n", ctdb->pnn)); - - if (rec->rec_file_fd != -1) { - close(rec->rec_file_fd); - rec->rec_file_fd = -1; - } - - pnnfile = talloc_asprintf(rec, "%s.pnn", ctdb->recovery_lock_file); - CTDB_NO_MEMORY_FATAL(ctdb, pnnfile); - - rec->rec_file_fd = open(pnnfile, O_RDWR|O_CREAT, 0600); - if (rec->rec_file_fd == -1) { - DEBUG(DEBUG_CRIT,(__location__ " Unable to open %s - (%s)\n", - pnnfile, strerror(errno))); - talloc_free(pnnfile); - return; - } - - set_close_on_exec(rec->rec_file_fd); - lock.l_type = F_WRLCK; - lock.l_whence = SEEK_SET; - lock.l_start = ctdb->pnn; - lock.l_len = 1; - lock.l_pid = 0; - - if (fcntl(rec->rec_file_fd, F_SETLK, &lock) != 0) { - close(rec->rec_file_fd); - rec->rec_file_fd = -1; - DEBUG(DEBUG_CRIT,(__location__ " Failed to get pnn lock on '%s'\n", pnnfile)); - talloc_free(pnnfile); - return; - } - - - DEBUG(DEBUG_NOTICE,(__location__ " Got pnn lock on '%s'\n", pnnfile)); - talloc_free(pnnfile); - - /* we start out with 0 connected nodes */ - ctdb_recoverd_write_pnn_connect_count(rec); -} - -/* - called when we need to do the periodical reclock pnn count update - */ -static void ctdb_update_pnn_count(struct event_context *ev, struct timed_event *te, - struct timeval t, void *p) -{ - int i, count; - struct ctdb_recoverd *rec = talloc_get_type(p, struct ctdb_recoverd); - struct ctdb_context *ctdb = rec->ctdb; - struct ctdb_node_map *nodemap = rec->nodemap; - - /* close and reopen the pnn lock file */ - ctdb_recoverd_get_pnn_lock(rec); - - ctdb_recoverd_write_pnn_connect_count(rec); - - event_add_timed(rec->ctdb->ev, rec->ctdb, - timeval_current_ofs(ctdb->tunable.reclock_ping_period, 0), - ctdb_update_pnn_count, rec); - - /* check if there is a split cluster and yeld the recmaster role - it the other half of the cluster is larger - */ - DEBUG(DEBUG_DEBUG, ("CHECK FOR SPLIT CLUSTER\n")); - if (rec->nodemap == NULL) { - return; - } - if (rec->rec_file_fd == -1) { - return; - } - /* only test this if we think we are the recmaster */ - if (ctdb->pnn != rec->recmaster) { - DEBUG(DEBUG_DEBUG, ("We are not recmaster, skip test\n")); - return; - } - if (ctdb->recovery_lock_fd == -1) { - DEBUG(DEBUG_ERR, (__location__ " Lost reclock pnn file. Yielding recmaster role\n")); - close(ctdb->recovery_lock_fd); - ctdb->recovery_lock_fd = -1; - force_election(rec, ctdb->pnn, rec->nodemap); - return; - } - for (i=0; inum; i++) { - /* we dont need to check ourself */ - if (nodemap->nodes[i].pnn == ctdb->pnn) { - continue; - } - /* dont check nodes that are connected to us */ - if (!(nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED)) { - continue; - } - /* check if the node is "connected" and how connected it it */ - count = ctdb_read_pnn_lock(rec->rec_file_fd, nodemap->nodes[i].pnn); - if (count < 0) { - continue; - } - /* check if that node is more connected that us */ - if (count > rec->num_connected) { - DEBUG(DEBUG_ERR, ("DISCONNECTED Node %u is more connected than we are, yielding recmaster role\n", nodemap->nodes[i].pnn)); - close(ctdb->recovery_lock_fd); - ctdb->recovery_lock_fd = -1; - force_election(rec, ctdb->pnn, rec->nodemap); - return; - } - } -} /* called to check that the allocation of public ip addresses is ok. */ @@ -2419,10 +2276,6 @@ static void monitor_cluster(struct ctdb_context *ctdb) rec->priority_time = timeval_current(); - /* open the rec file fd and lock our slot */ - rec->rec_file_fd = -1; - ctdb_recoverd_get_pnn_lock(rec); - /* register a message port for sending memory dumps */ ctdb_set_message_handler(ctdb, CTDB_SRVID_MEM_DUMP, mem_dump_handler, rec); @@ -2441,11 +2294,6 @@ static void monitor_cluster(struct ctdb_context *ctdb) /* register a message port for vacuum fetch */ ctdb_set_message_handler(ctdb, CTDB_SRVID_VACUUM_FETCH, vacuum_fetch_handler, rec); - /* update the reclock pnn file connected count on a regular basis */ - event_add_timed(ctdb->ev, ctdb, - timeval_current_ofs(ctdb->tunable.reclock_ping_period, 0), - ctdb_update_pnn_count, rec); - again: if (mem_ctx) { talloc_free(mem_ctx); diff --git a/ctdb/tools/ctdb.c b/ctdb/tools/ctdb.c index 22671a7a75f..4a3aa870217 100644 --- a/ctdb/tools/ctdb.c +++ b/ctdb/tools/ctdb.c @@ -1490,60 +1490,6 @@ static int control_getdbmap(struct ctdb_context *ctdb, int argc, const char **ar return 0; } -/* - get the filename of the reclock file - */ -static int control_getreclock(struct ctdb_context *ctdb, int argc, const char **argv) -{ - int i, ret, fd; - const char *reclock; - struct ctdb_node_map *nodemap=NULL; - char *pnnfile; - - ret = ctdb_ctrl_getreclock(ctdb, TIMELIMIT(), options.pnn, ctdb, &reclock); - if (ret != 0) { - DEBUG(DEBUG_ERR, ("Unable to get reclock file from node %u\n", options.pnn)); - return ret; - } - - ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), options.pnn, ctdb, &nodemap); - if (ret != 0) { - DEBUG(DEBUG_ERR, ("Unable to get nodemap from node %u\n", options.pnn)); - return ret; - } - - - pnnfile = talloc_asprintf(ctdb, "%s.pnn", reclock); - CTDB_NO_MEMORY(ctdb, pnnfile); - - fd = open(pnnfile, O_RDONLY); - if (fd == -1) { - DEBUG(DEBUG_CRIT,(__location__ " Failed to open reclock pnn file %s - (%s)\n", - pnnfile, strerror(errno))); - exit(10); - } - - - printf("Reclock file : %s\n", reclock); - for (i=0; inum; i++) { - int count; - - count = ctdb_read_pnn_lock(fd, nodemap->nodes[i].pnn); - - printf("pnn:%d %-16s", nodemap->nodes[i].pnn, - inet_ntoa(nodemap->nodes[i].sin.sin_addr)); - if (count == -1) { - printf(" NOT ACTIVE\n"); - } else { - printf(" ACTIVE with %d connections\n", count); - } - } - - close(fd); - return 0; -} - - /* check if the local node is recmaster or not it will return 1 if this node is the recmaster and 0 if it is not @@ -2030,7 +1976,6 @@ static const struct { { "repack", ctdb_repack, false, "repack all databases", "[max_freelist]"}, { "listnodes", control_listnodes, false, "list all nodes in the cluster"}, { "reloadnodes", control_reload_nodes_file, false, "reload the nodes file and restart the transport on all nodes"}, - { "getreclock", control_getreclock, false, "get the path to the reclock file" }, { "moveip", control_moveip, false, "move/failover an ip address to another node", " "}, { "addip", control_addip, true, "add a ip address to a node", " "}, { "delip", control_delip, false, "delete an ip address from a node", ""}, From bbedba23c73f3c537783608c95830db0fab545ea Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Thu, 7 Aug 2008 13:34:18 +1000 Subject: [PATCH 48/90] cover some corner cases where the persistent database could become inconsistent (This used to be ctdb commit c76c214be401cb116265ed17ffe6c77c979ded82) --- ctdb/client/ctdb_client.c | 3 +++ ctdb/server/ctdb_persistent.c | 6 ++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/ctdb/client/ctdb_client.c b/ctdb/client/ctdb_client.c index 48eb19d969e..cec16d1f9b2 100644 --- a/ctdb/client/ctdb_client.c +++ b/ctdb/client/ctdb_client.c @@ -3235,6 +3235,9 @@ again: sleep(1); if (ctdb_replay_transaction(h) != 0) { DEBUG(DEBUG_ERR,(__location__ " Failed to replay transaction\n")); + ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id, + CTDB_CONTROL_TRANS2_ERROR, CTDB_CTRL_FLAG_NOREPLY, + tdb_null, NULL, NULL, NULL, NULL, NULL); talloc_free(h); return -1; } diff --git a/ctdb/server/ctdb_persistent.c b/ctdb/server/ctdb_persistent.c index 77cff9c6e32..6a8a10072c7 100644 --- a/ctdb/server/ctdb_persistent.c +++ b/ctdb/server/ctdb_persistent.c @@ -463,6 +463,8 @@ int32_t ctdb_control_trans2_finished(struct ctdb_context *ctdb, if (client->num_persistent_updates == 0) { DEBUG(DEBUG_ERR, (__location__ " ERROR: num_persistent_updates == 0\n")); + DEBUG(DEBUG_ERR,(__location__ " Forcing recovery\n")); + client->ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE; return -1; } client->num_persistent_updates--; @@ -481,9 +483,9 @@ int32_t ctdb_control_trans2_error(struct ctdb_context *ctdb, if (client->num_persistent_updates == 0) { DEBUG(DEBUG_ERR, (__location__ " ERROR: num_persistent_updates == 0\n")); - return -1; + } else { + client->num_persistent_updates--; } - client->num_persistent_updates--; DEBUG(DEBUG_ERR,(__location__ " Forcing recovery\n")); client->ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE; From 2ffa38ad59aedbb8cff6e1958e90b1552ec10ecd Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 7 Aug 2008 18:57:24 +1000 Subject: [PATCH 49/90] new version 1.0.53 this adds completely new transaction code for persistent databases (This used to be ctdb commit bcbac6724840cdabe55e38a4c62779f853bd09ba) --- ctdb/packaging/RPM/ctdb.spec | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ctdb/packaging/RPM/ctdb.spec b/ctdb/packaging/RPM/ctdb.spec index 13fae3d28f2..fcaf2eec83f 100644 --- a/ctdb/packaging/RPM/ctdb.spec +++ b/ctdb/packaging/RPM/ctdb.spec @@ -5,7 +5,7 @@ Vendor: Samba Team Packager: Samba Team Name: ctdb Version: 1.0 -Release: 52 +Release: 53 Epoch: 0 License: GNU GPL version 3 Group: System Environment/Daemons @@ -118,6 +118,9 @@ fi %{_includedir}/ctdb_private.h %changelog +* Thu Aug 7 2008 : Version 1.0.53 + - Remove the reclock.pnn file it can cause gpfs to fail to umount + - New transaction code * Mon Aug 4 2008 : Version 1.0.52 - Send an explicit gratious arp when starting sending the tcp tickles. - When doing failover, issue a killtcp to non-NFS/non-CIFS clients From 5ee51ae84e6f14d282d7a859194f1cdb72cbf3ea Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Fri, 8 Aug 2008 00:44:33 +1000 Subject: [PATCH 50/90] fixed a looping error bug with the new transactions code (This used to be ctdb commit 0592ba2a4fbd1b3b7a6bd0780eadbd6d449baaad) --- ctdb/client/ctdb_client.c | 7 +++---- ctdb/server/ctdb_persistent.c | 14 +++++++++++--- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/ctdb/client/ctdb_client.c b/ctdb/client/ctdb_client.c index cec16d1f9b2..3717096cb91 100644 --- a/ctdb/client/ctdb_client.c +++ b/ctdb/client/ctdb_client.c @@ -3028,7 +3028,6 @@ struct ctdb_transaction_handle *ctdb_transaction_start(struct ctdb_db_context *c struct ctdb_transaction_handle *h; int ret; - /* we have a good transaction */ h = talloc_zero(mem_ctx, struct ctdb_transaction_handle); if (h == NULL) { DEBUG(DEBUG_ERR,(__location__ " oom for transaction handle\n")); @@ -3200,14 +3199,14 @@ int ctdb_transaction_commit(struct ctdb_transaction_handle *h) struct ctdb_context *ctdb = h->ctdb_db->ctdb; struct timeval timeout; - talloc_set_destructor(h, NULL); - if (h->m_write == NULL) { /* no changes were made */ talloc_free(h); return 0; } + talloc_set_destructor(h, NULL); + /* our commit strategy is quite complex. - we first try to commit the changes to all other nodes @@ -3258,7 +3257,7 @@ again: /* tell ctdbd that we are finished with our local commit */ ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id, CTDB_CONTROL_TRANS2_FINISHED, CTDB_CTRL_FLAG_NOREPLY, - tdb_null, NULL, NULL, &status, NULL, NULL); + tdb_null, NULL, NULL, NULL, NULL, NULL); talloc_free(h); return 0; } diff --git a/ctdb/server/ctdb_persistent.c b/ctdb/server/ctdb_persistent.c index 6a8a10072c7..5b88b4bbed9 100644 --- a/ctdb/server/ctdb_persistent.c +++ b/ctdb/server/ctdb_persistent.c @@ -189,31 +189,39 @@ static int ctdb_persistent_store(struct ctdb_persistent_write_state *state) for (i=0;icount;i++) { struct ctdb_ltdb_header oldheader; struct ctdb_ltdb_header header; - TDB_DATA key, data; + TDB_DATA key, data, olddata; + TALLOC_CTX *tmp_ctx = talloc_new(state); rec = ctdb_marshall_loop_next(m, rec, NULL, &header, &key, &data); if (rec == NULL) { DEBUG(DEBUG_ERR,("Failed to get next record %d for db_id 0x%08x in ctdb_persistent_store\n", i, state->ctdb_db->db_id)); + talloc_free(tmp_ctx); goto failed; } /* fetch the old header and ensure the rsn is less than the new rsn */ - ret = ctdb_ltdb_fetch(state->ctdb_db, key, &oldheader, NULL, NULL); + ret = ctdb_ltdb_fetch(state->ctdb_db, key, &oldheader, tmp_ctx, &olddata); if (ret != 0) { DEBUG(DEBUG_ERR,("Failed to fetch old record for db_id 0x%08x in ctdb_persistent_store\n", state->ctdb_db->db_id)); + talloc_free(tmp_ctx); goto failed; } - if (oldheader.rsn >= header.rsn) { + if (oldheader.rsn >= header.rsn && + (olddata.dsize != data.dsize || + memcmp(olddata.dptr, data.dptr, data.dsize) != 0)) { DEBUG(DEBUG_CRIT,("existing header for db_id 0x%08x has larger RSN %llu than new RSN %llu in ctdb_persistent_store\n", state->ctdb_db->db_id, (unsigned long long)oldheader.rsn, (unsigned long long)header.rsn)); + talloc_free(tmp_ctx); goto failed; } + talloc_free(tmp_ctx); + ret = ctdb_ltdb_store(state->ctdb_db, key, &header, data); if (ret != 0) { DEBUG(DEBUG_CRIT,("Failed to store record for db_id 0x%08x in ctdb_persistent_store\n", From 5a0249d34cbca77b6ce0ea7e1b8fdc91d336ec2f Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Fri, 8 Aug 2008 09:58:49 +1000 Subject: [PATCH 51/90] return a more detailed error code from a trans2 commit error (This used to be ctdb commit 6915661a460cd589b441ac7cd8695f35c4e83113) --- ctdb/include/ctdb_private.h | 10 ++++++++++ ctdb/server/ctdb_persistent.c | 23 +++++++++++++++++++++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index ff4d271794b..6cc1dc9f9c2 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -789,6 +789,16 @@ struct ctdb_req_keepalive { struct ctdb_req_header hdr; }; + +/* types of failures possible from TRANS2_COMMIT */ +enum ctdb_trans2_commit_error { + CTDB_TRANS2_COMMIT_SUCCESS=0, /* all nodes committed successfully */ + CTDB_TRANS2_COMMIT_TIMEOUT=1, /* at least one node timed out */ + CTDB_TRANS2_COMMIT_ALLFAIL=2, /* all nodes failed the commit */ + CTDB_TRANS2_COMMIT_SOMEFAIL=3 /* some nodes failed the commit, some allowed it */ +}; + + /* internal prototypes */ void ctdb_set_error(struct ctdb_context *ctdb, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3); void ctdb_fatal(struct ctdb_context *ctdb, const char *msg); diff --git a/ctdb/server/ctdb_persistent.c b/ctdb/server/ctdb_persistent.c index 5b88b4bbed9..faa6e83029b 100644 --- a/ctdb/server/ctdb_persistent.c +++ b/ctdb/server/ctdb_persistent.c @@ -32,8 +32,16 @@ struct ctdb_persistent_state { const char *errormsg; uint32_t num_pending; int32_t status; + uint32_t num_failed, num_sent; }; +/* + 1) all nodes fail, and all nodes reply + 2) some nodes fail, all nodes reply + 3) some nodes timeout + 4) all nodes succeed + */ + /* called when a node has acknowledged a ctdb_control_update_record call */ @@ -50,10 +58,19 @@ static void ctdb_persistent_callback(struct ctdb_context *ctdb, status, errormsg)); state->status = status; state->errormsg = errormsg; + state->num_failed++; } state->num_pending--; if (state->num_pending == 0) { - ctdb_request_control_reply(state->ctdb, state->c, NULL, state->status, state->errormsg); + enum ctdb_trans2_commit_error etype; + if (state->num_failed == state->num_sent) { + etype = CTDB_TRANS2_COMMIT_ALLFAIL; + } else if (state->num_failed != 0) { + etype = CTDB_TRANS2_COMMIT_SOMEFAIL; + } else { + etype = CTDB_TRANS2_COMMIT_SUCCESS; + } + ctdb_request_control_reply(state->ctdb, state->c, NULL, etype, state->errormsg); talloc_free(state); } } @@ -66,7 +83,8 @@ static void ctdb_persistent_store_timeout(struct event_context *ev, struct timed { struct ctdb_persistent_state *state = talloc_get_type(private_data, struct ctdb_persistent_state); - ctdb_request_control_reply(state->ctdb, state->c, NULL, -1, "timeout in ctdb_persistent_state"); + ctdb_request_control_reply(state->ctdb, state->c, NULL, CTDB_TRANS2_COMMIT_TIMEOUT, + "timeout in ctdb_persistent_state"); talloc_free(state); } @@ -141,6 +159,7 @@ int32_t ctdb_control_trans2_commit(struct ctdb_context *ctdb, } state->num_pending++; + state->num_sent++; } if (state->num_pending == 0) { From e1fe26051b8e4cad34912deee00e42df04c9db79 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Fri, 8 Aug 2008 10:00:33 +1000 Subject: [PATCH 52/90] up release number (This used to be ctdb commit 7ed5fbe7fa3bc3cb729d9b516d2a73d52e28d22d) --- ctdb/packaging/RPM/ctdb.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ctdb/packaging/RPM/ctdb.spec b/ctdb/packaging/RPM/ctdb.spec index fcaf2eec83f..24e78fe8715 100644 --- a/ctdb/packaging/RPM/ctdb.spec +++ b/ctdb/packaging/RPM/ctdb.spec @@ -5,7 +5,7 @@ Vendor: Samba Team Packager: Samba Team Name: ctdb Version: 1.0 -Release: 53 +Release: 53.transactions2 Epoch: 0 License: GNU GPL version 3 Group: System Environment/Daemons From 1da433254cabee505b4cd749a5f002e49f40999a Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Fri, 8 Aug 2008 10:01:20 +1000 Subject: [PATCH 53/90] new version 1.0.54 (This used to be ctdb commit a4814aa8b0b165b9d6c4c55fc5aee33cd1a570bd) --- ctdb/packaging/RPM/ctdb.spec | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ctdb/packaging/RPM/ctdb.spec b/ctdb/packaging/RPM/ctdb.spec index 24e78fe8715..ea9ffbd1133 100644 --- a/ctdb/packaging/RPM/ctdb.spec +++ b/ctdb/packaging/RPM/ctdb.spec @@ -5,7 +5,7 @@ Vendor: Samba Team Packager: Samba Team Name: ctdb Version: 1.0 -Release: 53.transactions2 +Release: 54 Epoch: 0 License: GNU GPL version 3 Group: System Environment/Daemons @@ -118,6 +118,10 @@ fi %{_includedir}/ctdb_private.h %changelog +* Fri Aug 8 2008 : Version 1.0.54 + - fix a looping error in the transaction code + - provide a more detailed error code for persistent store errors + so clients can make more intelligent choices on how to try to recover * Thu Aug 7 2008 : Version 1.0.53 - Remove the reclock.pnn file it can cause gpfs to fail to umount - New transaction code From dfdb41350b804eb9284d3cfe939c6f29e2734d9c Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Fri, 8 Aug 2008 10:15:23 +1000 Subject: [PATCH 54/90] save writing the same data twice (This used to be ctdb commit 2426b9010ef45f5e96ffc12b8a69a3b0566b4f98) --- ctdb/client/ctdb_client.c | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/ctdb/client/ctdb_client.c b/ctdb/client/ctdb_client.c index 2b31d81bcb0..6e1a209a7c2 100644 --- a/ctdb/client/ctdb_client.c +++ b/ctdb/client/ctdb_client.c @@ -3068,12 +3068,13 @@ int ctdb_transaction_store(struct ctdb_transaction_handle *h, { TALLOC_CTX *tmp_ctx = talloc_new(h); struct ctdb_ltdb_header header; + TDB_DATA olddata; int ret; ZERO_STRUCT(header); /* we need the header so we can update the RSN */ - ret = ctdb_ltdb_fetch(h->ctdb_db, key, &header, tmp_ctx, NULL); + ret = ctdb_ltdb_fetch(h->ctdb_db, key, &header, tmp_ctx, &olddata); if (ret == -1 && header.dmaster == (uint32_t)-1) { /* the record doesn't exist - create one with us as dmaster. This is only safe because we are in a transaction and this @@ -3086,6 +3087,13 @@ int ctdb_transaction_store(struct ctdb_transaction_handle *h, return ret; } + if (data.dsize == olddata.dsize && + memcmp(data.dptr, olddata.dptr, data.dsize) == 0) { + /* save writing the same data */ + talloc_free(tmp_ctx); + return 0; + } + header.rsn++; if (!h->in_replay) { @@ -3095,13 +3103,13 @@ int ctdb_transaction_store(struct ctdb_transaction_handle *h, talloc_free(tmp_ctx); return -1; } - - h->m_write = ctdb_marshall_add(h, h->m_write, h->ctdb_db->db_id, 0, key, &header, data); - if (h->m_write == NULL) { - DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n")); - talloc_free(tmp_ctx); - return -1; - } + } + + h->m_write = ctdb_marshall_add(h, h->m_write, h->ctdb_db->db_id, 0, key, &header, data); + if (h->m_write == NULL) { + DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n")); + talloc_free(tmp_ctx); + return -1; } ret = ctdb_ltdb_store(h->ctdb_db, key, &header, data); @@ -3120,6 +3128,8 @@ static int ctdb_replay_transaction(struct ctdb_transaction_handle *h) struct ctdb_rec_data *rec = NULL; h->in_replay = true; + talloc_free(h->m_write); + h->m_write = NULL; ret = ctdb_transaction_fetch_start(h); if (ret != 0) { @@ -3176,12 +3186,6 @@ int ctdb_transaction_commit(struct ctdb_transaction_handle *h) struct ctdb_context *ctdb = h->ctdb_db->ctdb; struct timeval timeout; - if (h->m_write == NULL) { - /* no changes were made */ - talloc_free(h); - return 0; - } - talloc_set_destructor(h, NULL); /* our commit strategy is quite complex. @@ -3200,6 +3204,13 @@ int ctdb_transaction_commit(struct ctdb_transaction_handle *h) */ again: + if (h->m_write == NULL) { + /* no changes were made */ + tdb_transaction_cancel(h->ctdb_db->ltdb->tdb); + talloc_free(h); + return 0; + } + /* tell ctdbd to commit to the other nodes */ timeout = timeval_current_ofs(1, 0); ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id, From f2fd0d8b59118f856d067b6c48c01c33eb0c447f Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Fri, 8 Aug 2008 11:04:21 +1000 Subject: [PATCH 55/90] imported failure handling from dbwrap_ctdb.c (This used to be ctdb commit 7c6b621f7307dc39ffcd7d965ac613642af201b8) --- ctdb/client/ctdb_client.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/ctdb/client/ctdb_client.c b/ctdb/client/ctdb_client.c index 6e1a209a7c2..c0875b0368c 100644 --- a/ctdb/client/ctdb_client.c +++ b/ctdb/client/ctdb_client.c @@ -3185,6 +3185,7 @@ int ctdb_transaction_commit(struct ctdb_transaction_handle *h) int32_t status; struct ctdb_context *ctdb = h->ctdb_db->ctdb; struct timeval timeout; + enum ctdb_controls failure_control = CTDB_CONTROL_TRANS2_ERROR; talloc_set_destructor(h, NULL); @@ -3220,15 +3221,35 @@ again: if (ret != 0 || status != 0) { tdb_transaction_cancel(h->ctdb_db->ltdb->tdb); sleep(1); + + if (ret != 0) { + failure_control = CTDB_CONTROL_TRANS2_ERROR; + } else { + /* work out what error code we will give if we + have to fail the operation */ + switch ((enum ctdb_trans2_commit_error)status) { + case CTDB_TRANS2_COMMIT_SUCCESS: + case CTDB_TRANS2_COMMIT_SOMEFAIL: + case CTDB_TRANS2_COMMIT_TIMEOUT: + failure_control = CTDB_CONTROL_TRANS2_ERROR; + break; + case CTDB_TRANS2_COMMIT_ALLFAIL: + failure_control = CTDB_CONTROL_TRANS2_FINISHED; + break; + } + } + if (ctdb_replay_transaction(h) != 0) { DEBUG(DEBUG_ERR,(__location__ " Failed to replay transaction\n")); ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id, - CTDB_CONTROL_TRANS2_ERROR, CTDB_CTRL_FLAG_NOREPLY, + failure_control, CTDB_CTRL_FLAG_NOREPLY, tdb_null, NULL, NULL, NULL, NULL, NULL); talloc_free(h); return -1; } goto again; + } else { + failure_control = CTDB_CONTROL_TRANS2_ERROR; } /* do the real commit locally */ @@ -3236,7 +3257,7 @@ again: if (ret != 0) { DEBUG(DEBUG_ERR,(__location__ " Failed to commit transaction\n")); ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id, - CTDB_CONTROL_TRANS2_ERROR, CTDB_CTRL_FLAG_NOREPLY, + failure_control, CTDB_CTRL_FLAG_NOREPLY, tdb_null, NULL, NULL, NULL, NULL, NULL); talloc_free(h); return ret; From aa1bc0abba40b9a4168126d88516ec3c7cd5c2fc Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Fri, 8 Aug 2008 13:11:28 +1000 Subject: [PATCH 56/90] added a new control CTDB_CONTROL_TRANS2_COMMIT_RETRY so we can tell the difference between a initial commit attempt and a retry, which allows us to get the persistent updates counter right for retries (This used to be ctdb commit 7f29c50ccbc7789bfbc20bcb4b65758af9ebe6c5) --- ctdb/include/ctdb_private.h | 1 + ctdb/server/ctdb_control.c | 1 + ctdb/server/ctdb_persistent.c | 12 +++++++++--- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 6cc1dc9f9c2..f44a940a84e 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -549,6 +549,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0, CTDB_CONTROL_TRANS2_COMMIT = 83, CTDB_CONTROL_TRANS2_FINISHED = 84, CTDB_CONTROL_TRANS2_ERROR = 85, + CTDB_CONTROL_TRANS2_COMMIT_RETRY = 86, }; /* diff --git a/ctdb/server/ctdb_control.c b/ctdb/server/ctdb_control.c index 59b0657f898..edfe344fdb9 100644 --- a/ctdb/server/ctdb_control.c +++ b/ctdb/server/ctdb_control.c @@ -397,6 +397,7 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb, return ctdb_control_cancel_persistent_update(ctdb, c, indata); case CTDB_CONTROL_TRANS2_COMMIT: + case CTDB_CONTROL_TRANS2_COMMIT_RETRY: return ctdb_control_trans2_commit(ctdb, c, indata, async_reply); case CTDB_CONTROL_TRANS2_ERROR: diff --git a/ctdb/server/ctdb_persistent.c b/ctdb/server/ctdb_persistent.c index faa6e83029b..42b148c95bb 100644 --- a/ctdb/server/ctdb_persistent.c +++ b/ctdb/server/ctdb_persistent.c @@ -121,12 +121,18 @@ int32_t ctdb_control_trans2_commit(struct ctdb_context *ctdb, then have it decremented in ctdb_control_trans2_error or ctdb_control_trans2_finished */ - if (c->opcode == CTDB_CONTROL_PERSISTENT_STORE) { + switch (c->opcode) { + case CTDB_CONTROL_PERSISTENT_STORE: if (client->num_persistent_updates > 0) { client->num_persistent_updates--; - } - } else { + } + break; + case CTDB_CONTROL_TRANS2_COMMIT: client->num_persistent_updates++; + break; + case CTDB_CONTROL_TRANS2_COMMIT_RETRY: + /* already updated from the first commit */ + break; } state = talloc_zero(ctdb, struct ctdb_persistent_state); From 6e86fa34bdd05d4f583391873fc02f549285ebb8 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Fri, 8 Aug 2008 13:11:41 +1000 Subject: [PATCH 57/90] added retry handling in client (This used to be ctdb commit b3b9707dd8244758ff1080401a9e03e74766e1ab) --- ctdb/client/ctdb_client.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/ctdb/client/ctdb_client.c b/ctdb/client/ctdb_client.c index c0875b0368c..0d853746aac 100644 --- a/ctdb/client/ctdb_client.c +++ b/ctdb/client/ctdb_client.c @@ -3181,7 +3181,7 @@ failed: */ int ctdb_transaction_commit(struct ctdb_transaction_handle *h) { - int ret; + int ret, retries=0; int32_t status; struct ctdb_context *ctdb = h->ctdb_db->ctdb; struct timeval timeout; @@ -3215,7 +3215,7 @@ again: /* tell ctdbd to commit to the other nodes */ timeout = timeval_current_ofs(1, 0); ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id, - CTDB_CONTROL_TRANS2_COMMIT, 0, + retries==0?CTDB_CONTROL_TRANS2_COMMIT:CTDB_CONTROL_TRANS2_COMMIT_RETRY, 0, ctdb_marshall_finish(h->m_write), NULL, NULL, &status, &timeout, NULL); if (ret != 0 || status != 0) { @@ -3239,6 +3239,16 @@ again: } } + if (++retries == 10) { + DEBUG(DEBUG_ERR,(__location__ " Giving up transaction on db 0x%08x after %d retries failure_control=%u\n", + h->ctdb_db->db_id, retries, (unsigned)failure_control)); + ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id, + failure_control, CTDB_CTRL_FLAG_NOREPLY, + tdb_null, NULL, NULL, NULL, NULL, NULL); + talloc_free(h); + return -1; + } + if (ctdb_replay_transaction(h) != 0) { DEBUG(DEBUG_ERR,(__location__ " Failed to replay transaction\n")); ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id, From 1431210d469f917897d371d6afa6f428e6d82803 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Fri, 8 Aug 2008 22:06:39 +1000 Subject: [PATCH 58/90] fixed send of release IP message (This used to be ctdb commit db6bc3745a56cc12e60e727190a098a6527690d6) --- ctdb/server/ctdb_takeover.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index 10369d4b2c0..53f48c175a1 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -304,13 +304,18 @@ static void release_ip_callback(struct ctdb_context *ctdb, int status, /* send a message to all clients of this node telling them that the cluster has been reconfigured and they should release any sockets on this IP */ +#if 1 + strncpy(ip, inet_ntoa(state->addr->ip.sin_addr), sizeof(ip)-1); +#else if (inet_ntop(state->addr->sa.sa_family, &state->addr->sa.sa_data[0], ip, sizeof(ip)) == NULL) { DEBUG(DEBUG_ERR, (__location__ " inet_ntop() failed\n")); } - +#endif data.dptr = (uint8_t *)ip; data.dsize = strlen(ip)+1; + DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr)); + ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data); /* kill clients that have registered with this IP */ From 4ec495abca73d667a3ea1988421fb4f5b27d04bd Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Mon, 11 Aug 2008 10:33:22 +1000 Subject: [PATCH 59/90] new version 1.0.55 (This used to be ctdb commit 29aaa339ef745208fb0b746db5cbc3a32d0d81b2) --- ctdb/packaging/RPM/ctdb.spec | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ctdb/packaging/RPM/ctdb.spec b/ctdb/packaging/RPM/ctdb.spec index ea9ffbd1133..fedb26ea464 100644 --- a/ctdb/packaging/RPM/ctdb.spec +++ b/ctdb/packaging/RPM/ctdb.spec @@ -5,7 +5,7 @@ Vendor: Samba Team Packager: Samba Team Name: ctdb Version: 1.0 -Release: 54 +Release: 55 Epoch: 0 License: GNU GPL version 3 Group: System Environment/Daemons @@ -118,6 +118,8 @@ fi %{_includedir}/ctdb_private.h %changelog +* Mon Aug 11 2008: Version 1.0.55 + - Fix the releaseip message we seond to samba. * Fri Aug 8 2008 : Version 1.0.54 - fix a looping error in the transaction code - provide a more detailed error code for persistent store errors From bef21da483ca61f3c785a2809bf75dcd34819ee2 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Mon, 11 Aug 2008 10:36:38 +1000 Subject: [PATCH 60/90] fix the date soe rpmbuild works (This used to be ctdb commit c94b295b078dc009c18aa416d0de232a12241014) --- ctdb/packaging/RPM/ctdb.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ctdb/packaging/RPM/ctdb.spec b/ctdb/packaging/RPM/ctdb.spec index fedb26ea464..cb5cd6a0953 100644 --- a/ctdb/packaging/RPM/ctdb.spec +++ b/ctdb/packaging/RPM/ctdb.spec @@ -118,7 +118,7 @@ fi %{_includedir}/ctdb_private.h %changelog -* Mon Aug 11 2008: Version 1.0.55 +* Mon Aug 11 2008 : Version 1.0.55 - Fix the releaseip message we seond to samba. * Fri Aug 8 2008 : Version 1.0.54 - fix a looping error in the transaction code From 76528cfc6bffd8db32033e6e77802831ec59d7d2 Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Mon, 11 Aug 2008 23:33:05 +1000 Subject: [PATCH 61/90] fixed a memory leak in the recovery daemon thanks to vl for spotting this (This used to be ctdb commit 96df98d9f86ecc6bb1a458eb2101e5c1bc0f96e6) --- ctdb/server/ctdb_recoverd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 329522991d3..c503aa5489a 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -2153,7 +2153,7 @@ static int verify_ip_allocation(struct ctdb_context *ctdb, uint32_t pnn) struct ctdb_uptime *uptime2 = NULL; int ret, j; - ret = ctdb_ctrl_uptime(ctdb, ctdb, CONTROL_TIMEOUT(), + ret = ctdb_ctrl_uptime(ctdb, mem_ctx, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &uptime1); if (ret != 0) { DEBUG(DEBUG_ERR, ("Unable to get uptime from local node %u\n", pnn)); @@ -2169,7 +2169,7 @@ static int verify_ip_allocation(struct ctdb_context *ctdb, uint32_t pnn) return -1; } - ret = ctdb_ctrl_uptime(ctdb, ctdb, CONTROL_TIMEOUT(), + ret = ctdb_ctrl_uptime(ctdb, mem_ctx, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &uptime2); if (ret != 0) { DEBUG(DEBUG_ERR, ("Unable to get uptime from local node %u\n", pnn)); From 0e3baef65afd080af84799d142659280634bf315 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Mon, 11 Aug 2008 23:50:42 +1000 Subject: [PATCH 62/90] new version 1.0.56 (This used to be ctdb commit 196973fef38a9b258e0976d5454161d11d573ddc) --- ctdb/packaging/RPM/ctdb.spec | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ctdb/packaging/RPM/ctdb.spec b/ctdb/packaging/RPM/ctdb.spec index cb5cd6a0953..93b9d3fbff3 100644 --- a/ctdb/packaging/RPM/ctdb.spec +++ b/ctdb/packaging/RPM/ctdb.spec @@ -5,7 +5,7 @@ Vendor: Samba Team Packager: Samba Team Name: ctdb Version: 1.0 -Release: 55 +Release: 56 Epoch: 0 License: GNU GPL version 3 Group: System Environment/Daemons @@ -118,6 +118,8 @@ fi %{_includedir}/ctdb_private.h %changelog +* Mon Aug 11 2008 : Version 1.0.56 + - fix a memory leak in the recovery daemon. * Mon Aug 11 2008 : Version 1.0.55 - Fix the releaseip message we seond to samba. * Fri Aug 8 2008 : Version 1.0.54 From b4425e7aa897e54a52212d4eac84b6800a4818dd Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Mon, 11 Aug 2008 23:52:46 +1000 Subject: [PATCH 63/90] up release version (This used to be ctdb commit 7769edbd5507f791e77cc5e5642d854bebec2df0) --- ctdb/packaging/RPM/ctdb.spec | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ctdb/packaging/RPM/ctdb.spec b/ctdb/packaging/RPM/ctdb.spec index cb5cd6a0953..67b57c4025a 100644 --- a/ctdb/packaging/RPM/ctdb.spec +++ b/ctdb/packaging/RPM/ctdb.spec @@ -5,7 +5,7 @@ Vendor: Samba Team Packager: Samba Team Name: ctdb Version: 1.0 -Release: 55 +Release: 56 Epoch: 0 License: GNU GPL version 3 Group: System Environment/Daemons @@ -118,6 +118,8 @@ fi %{_includedir}/ctdb_private.h %changelog +* Mon Aug 11 2008 : Version 1.0.56 + - Fixed a memory leak in the recovery daemon * Mon Aug 11 2008 : Version 1.0.55 - Fix the releaseip message we seond to samba. * Fri Aug 8 2008 : Version 1.0.54 From 65ae40d4a9b8947128f233584b6193f08a3d4ccf Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 13 Aug 2008 22:03:29 +1000 Subject: [PATCH 64/90] Add two new ctdb commands : ctdb backupdb : which will copy a database out from ctdb and write it to a file ctdb restoredb : which will read a database backup from a file and write it into ctdb (This used to be ctdb commit b567e215f5c58d646a392408b9cc1df8ef029b33) --- ctdb/tools/ctdb.c | 270 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 269 insertions(+), 1 deletion(-) diff --git a/ctdb/tools/ctdb.c b/ctdb/tools/ctdb.c index 4a3aa870217..55f1908bb9d 100644 --- a/ctdb/tools/ctdb.c +++ b/ctdb/tools/ctdb.c @@ -1751,7 +1751,7 @@ static int control_attach(struct ctdb_context *ctdb, int argc, const char **argv } /* - dump memory usage + run an eventscript on a node */ static int control_eventscript(struct ctdb_context *ctdb, int argc, const char **argv) { @@ -1782,6 +1782,272 @@ static int control_eventscript(struct ctdb_context *ctdb, int argc, const char * return 0; } +struct db_file_header { + unsigned long persistent; + unsigned long size; +}; + +/* + * backup a database to a file + */ +static int control_backupdb(struct ctdb_context *ctdb, int argc, const char **argv) +{ + int i, ret; + struct ctdb_dbid_map *dbmap=NULL; + struct ctdb_node_map *nodemap=NULL; + TALLOC_CTX *tmp_ctx = talloc_new(ctdb); + TDB_DATA outdata; + struct db_file_header dbhdr; + int fh; + uint32_t *nodes; + + if (argc != 2) { + DEBUG(DEBUG_ERR,("Invalid arguments\n")); + return -1; + } + + ret = ctdb_ctrl_getdbmap(ctdb, TIMELIMIT(), options.pnn, tmp_ctx, &dbmap); + if (ret != 0) { + DEBUG(DEBUG_ERR, ("Unable to get dbids from node %u\n", options.pnn)); + return ret; + } + + for(i=0;inum;i++){ + const char *name; + + ctdb_ctrl_getdbname(ctdb, TIMELIMIT(), options.pnn, dbmap->dbs[i].dbid, tmp_ctx, &name); + if(!strcmp(argv[0], name)){ + talloc_free(discard_const(name)); + break; + } + talloc_free(discard_const(name)); + } + if (i == dbmap->num) { + DEBUG(DEBUG_ERR,("No database with name '%s' found\n", argv[0])); + talloc_free(tmp_ctx); + return -1; + } + + ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), options.pnn, ctdb, &nodemap); + if (ret != 0) { + DEBUG(DEBUG_ERR, ("Unable to get nodemap from node %u\n", options.pnn)); + talloc_free(tmp_ctx); + return ret; + } + + /* freeze all nodes */ + nodes = list_of_active_nodes(ctdb, nodemap, tmp_ctx, true); + if (ctdb_client_async_control(ctdb, CTDB_CONTROL_FREEZE, + nodes, TIMELIMIT(), + false, tdb_null, + NULL, NULL, + NULL) != 0) { + DEBUG(DEBUG_ERR, ("Unable to freeze nodes.\n")); + ctdb_ctrl_setrecmode(ctdb, TIMELIMIT(), options.pnn, CTDB_RECOVERY_ACTIVE); + talloc_free(tmp_ctx); + return -1; + } + + ret = ctdb_ctrl_pulldb(ctdb, options.pnn, dbmap->dbs[i].dbid, + CTDB_LMASTER_ANY, tmp_ctx, + TIMELIMIT(), &outdata); + if (ret != 0) { + DEBUG(DEBUG_ERR,(__location__ " Unable to copy db from node %u\n", options.pnn)); + ctdb_ctrl_setrecmode(ctdb, TIMELIMIT(), options.pnn, CTDB_RECOVERY_ACTIVE); + talloc_free(tmp_ctx); + return -1; + } + + /* thaw all nodes */ + nodes = list_of_active_nodes(ctdb, nodemap, tmp_ctx, true); + if (ctdb_client_async_control(ctdb, CTDB_CONTROL_THAW, + nodes, TIMELIMIT(), + false, tdb_null, + NULL, NULL, + NULL) != 0) { + DEBUG(DEBUG_ERR, ("Unable to thaw nodes.\n")); + ctdb_ctrl_setrecmode(ctdb, TIMELIMIT(), options.pnn, CTDB_RECOVERY_ACTIVE); + talloc_free(tmp_ctx); + return -1; + } + + fh = open(argv[1], O_RDWR|O_CREAT, 0600); + if (fh == -1) { + DEBUG(DEBUG_ERR,("Failed to open file '%s'\n", argv[1])); + talloc_free(tmp_ctx); + return -1; + } + + dbhdr.persistent = dbmap->dbs[i].persistent; + dbhdr.size = outdata.dsize; + write(fh, &dbhdr, sizeof(dbhdr)); + write(fh, outdata.dptr, outdata.dsize); + + close(fh); + talloc_free(tmp_ctx); + return 0; +} + +/* + * restore a database from a file + */ +static int control_restoredb(struct ctdb_context *ctdb, int argc, const char **argv) +{ + int ret; + TALLOC_CTX *tmp_ctx = talloc_new(ctdb); + TDB_DATA outdata; + TDB_DATA data; + struct db_file_header dbhdr; + struct ctdb_db_context *ctdb_db; + struct ctdb_node_map *nodemap=NULL; + struct ctdb_vnn_map *vnnmap=NULL; + int fh; + struct ctdb_control_wipe_database w; + uint32_t *nodes; + uint32_t generation; + + if (argc != 2) { + DEBUG(DEBUG_ERR,("Invalid arguments\n")); + return -1; + } + + fh = open(argv[1], O_RDONLY); + if (fh == -1) { + DEBUG(DEBUG_ERR,("Failed to open file '%s'\n", argv[1])); + talloc_free(tmp_ctx); + return -1; + } + + read(fh, &dbhdr, sizeof(dbhdr)); + outdata.dsize = dbhdr.size; + outdata.dptr = talloc_size(tmp_ctx, outdata.dsize); + if (outdata.dptr == NULL) { + DEBUG(DEBUG_ERR,("Failed to allocate data of size '%lu'\n", dbhdr.size)); + close(fh); + talloc_free(tmp_ctx); + return -1; + } + read(fh, outdata.dptr, outdata.dsize); + close(fh); + + + ctdb_db = ctdb_attach(ctdb, argv[0], dbhdr.persistent, 0); + if (ctdb_db == NULL) { + DEBUG(DEBUG_ERR,("Unable to attach to database '%s'\n", argv[0])); + talloc_free(tmp_ctx); + return -1; + } + + ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), options.pnn, ctdb, &nodemap); + if (ret != 0) { + DEBUG(DEBUG_ERR, ("Unable to get nodemap from node %u\n", options.pnn)); + talloc_free(tmp_ctx); + return ret; + } + + + ret = ctdb_ctrl_getvnnmap(ctdb, TIMELIMIT(), options.pnn, tmp_ctx, &vnnmap); + if (ret != 0) { + DEBUG(DEBUG_ERR, ("Unable to get vnnmap from node %u\n", options.pnn)); + talloc_free(tmp_ctx); + return ret; + } + + /* freeze all nodes */ + nodes = list_of_active_nodes(ctdb, nodemap, tmp_ctx, true); + if (ctdb_client_async_control(ctdb, CTDB_CONTROL_FREEZE, + nodes, TIMELIMIT(), + false, tdb_null, + NULL, NULL, + NULL) != 0) { + DEBUG(DEBUG_ERR, ("Unable to freeze nodes.\n")); + ctdb_ctrl_setrecmode(ctdb, TIMELIMIT(), options.pnn, CTDB_RECOVERY_ACTIVE); + talloc_free(tmp_ctx); + return -1; + } + + generation = vnnmap->generation; + data.dptr = (void *)&generation; + data.dsize = sizeof(generation); + + /* start a cluster wide transaction */ + nodes = list_of_active_nodes(ctdb, nodemap, tmp_ctx, true); + if (ctdb_client_async_control(ctdb, CTDB_CONTROL_TRANSACTION_START, + nodes, + TIMELIMIT(), false, data, + NULL, NULL, + NULL) != 0) { + DEBUG(DEBUG_ERR, ("Unable to start cluster wide transactions.\n")); + return -1; + } + + + w.db_id = ctdb_db->db_id; + w.transaction_id = generation; + + data.dptr = (void *)&w; + data.dsize = sizeof(w); + + /* wipe all the remote databases. */ + nodes = list_of_active_nodes(ctdb, nodemap, tmp_ctx, true); + if (ctdb_client_async_control(ctdb, CTDB_CONTROL_WIPE_DATABASE, + nodes, + TIMELIMIT(), false, data, + NULL, NULL, + NULL) != 0) { + DEBUG(DEBUG_ERR, ("Unable to wipe database.\n")); + ctdb_ctrl_setrecmode(ctdb, TIMELIMIT(), options.pnn, CTDB_RECOVERY_ACTIVE); + talloc_free(tmp_ctx); + return -1; + } + + /* push the database */ + nodes = list_of_active_nodes(ctdb, nodemap, tmp_ctx, true); + if (ctdb_client_async_control(ctdb, CTDB_CONTROL_PUSH_DB, + nodes, + TIMELIMIT(), false, outdata, + NULL, NULL, + NULL) != 0) { + DEBUG(DEBUG_ERR, ("Failed to push database.\n")); + ctdb_ctrl_setrecmode(ctdb, TIMELIMIT(), options.pnn, CTDB_RECOVERY_ACTIVE); + talloc_free(tmp_ctx); + return -1; + } + + data.dptr = (void *)&generation; + data.dsize = sizeof(generation); + + /* commit all the changes */ + if (ctdb_client_async_control(ctdb, CTDB_CONTROL_TRANSACTION_COMMIT, + nodes, + TIMELIMIT(), false, data, + NULL, NULL, + NULL) != 0) { + DEBUG(DEBUG_ERR, ("Unable to commit databases.\n")); + ctdb_ctrl_setrecmode(ctdb, TIMELIMIT(), options.pnn, CTDB_RECOVERY_ACTIVE); + talloc_free(tmp_ctx); + return -1; + } + + + /* thaw all nodes */ + nodes = list_of_active_nodes(ctdb, nodemap, tmp_ctx, true); + if (ctdb_client_async_control(ctdb, CTDB_CONTROL_THAW, + nodes, TIMELIMIT(), + false, tdb_null, + NULL, NULL, + NULL) != 0) { + DEBUG(DEBUG_ERR, ("Unable to thaw nodes.\n")); + ctdb_ctrl_setrecmode(ctdb, TIMELIMIT(), options.pnn, CTDB_RECOVERY_ACTIVE); + talloc_free(tmp_ctx); + return -1; + } + + + talloc_free(tmp_ctx); + return 0; +} + /* dump memory usage */ @@ -1980,6 +2246,8 @@ static const struct { { "addip", control_addip, true, "add a ip address to a node", " "}, { "delip", control_delip, false, "delete an ip address from a node", ""}, { "eventscript", control_eventscript, true, "run the eventscript with the given parameters on a node", ""}, + { "backupdb", control_backupdb, false, "backup the database into a file.", " "}, + { "restoredb", control_restoredb, false, "restore the database from a file.", " "}, }; /* From 6f5ee6b5cc540a63facafa21014c6d4debc4dd51 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 14 Aug 2008 08:35:19 +1000 Subject: [PATCH 65/90] Encode a file version number in the database backup header Encode the database name in the header so we dont need to provide the database name when doing a restore Encode a timestamp in the header telling us when the backup was created (This used to be ctdb commit 77762170ad1dbc4620565bb898af5d493fac117d) --- ctdb/tools/ctdb.c | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/ctdb/tools/ctdb.c b/ctdb/tools/ctdb.c index 55f1908bb9d..7b912af9d66 100644 --- a/ctdb/tools/ctdb.c +++ b/ctdb/tools/ctdb.c @@ -1782,9 +1782,14 @@ static int control_eventscript(struct ctdb_context *ctdb, int argc, const char * return 0; } +#define DB_VERSION 1 +#define MAX_DB_NAME 64 struct db_file_header { + unsigned long version; + time_t timestamp; unsigned long persistent; unsigned long size; + const char name[MAX_DB_NAME]; }; /* @@ -1878,8 +1883,16 @@ static int control_backupdb(struct ctdb_context *ctdb, int argc, const char **ar return -1; } + dbhdr.version = DB_VERSION; + dbhdr.timestamp = time(NULL); dbhdr.persistent = dbmap->dbs[i].persistent; dbhdr.size = outdata.dsize; + if (strlen(argv[1]) >= MAX_DB_NAME) { + DEBUG(DEBUG_ERR,("Too long dbname\n")); + talloc_free(tmp_ctx); + return -1; + } + strncpy(discard_const(dbhdr.name), argv[1], MAX_DB_NAME); write(fh, &dbhdr, sizeof(dbhdr)); write(fh, outdata.dptr, outdata.dsize); @@ -1905,20 +1918,28 @@ static int control_restoredb(struct ctdb_context *ctdb, int argc, const char **a struct ctdb_control_wipe_database w; uint32_t *nodes; uint32_t generation; + struct tm *tm; + char tbuf[100]; - if (argc != 2) { + if (argc != 1) { DEBUG(DEBUG_ERR,("Invalid arguments\n")); return -1; } - fh = open(argv[1], O_RDONLY); + fh = open(argv[0], O_RDONLY); if (fh == -1) { - DEBUG(DEBUG_ERR,("Failed to open file '%s'\n", argv[1])); + DEBUG(DEBUG_ERR,("Failed to open file '%s'\n", argv[0])); talloc_free(tmp_ctx); return -1; } read(fh, &dbhdr, sizeof(dbhdr)); + if (dbhdr.version != DB_VERSION) { + DEBUG(DEBUG_ERR,("Invalid version of database dump. File is version %lu but expected version was %u\n", dbhdr.version, DB_VERSION)); + talloc_free(tmp_ctx); + return -1; + } + outdata.dsize = dbhdr.size; outdata.dptr = talloc_size(tmp_ctx, outdata.dsize); if (outdata.dptr == NULL) { @@ -1930,10 +1951,15 @@ static int control_restoredb(struct ctdb_context *ctdb, int argc, const char **a read(fh, outdata.dptr, outdata.dsize); close(fh); + tm = localtime(&dbhdr.timestamp); + strftime(tbuf,sizeof(tbuf)-1,"%Y/%m/%d %H:%M:%S", tm); + printf("Restoring database '%s' from backup @ %s\n", + dbhdr.name, tbuf); - ctdb_db = ctdb_attach(ctdb, argv[0], dbhdr.persistent, 0); + + ctdb_db = ctdb_attach(ctdb, dbhdr.name, dbhdr.persistent, 0); if (ctdb_db == NULL) { - DEBUG(DEBUG_ERR,("Unable to attach to database '%s'\n", argv[0])); + DEBUG(DEBUG_ERR,("Unable to attach to database '%s'\n", dbhdr.name)); talloc_free(tmp_ctx); return -1; } @@ -2247,7 +2273,7 @@ static const struct { { "delip", control_delip, false, "delete an ip address from a node", ""}, { "eventscript", control_eventscript, true, "run the eventscript with the given parameters on a node", ""}, { "backupdb", control_backupdb, false, "backup the database into a file.", " "}, - { "restoredb", control_restoredb, false, "restore the database from a file.", " "}, + { "restoredb", control_restoredb, false, "restore the database from a file.", ""}, }; /* From 748cf6f2745653f9762f8d12c9361255d62f9591 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 14 Aug 2008 08:36:39 +1000 Subject: [PATCH 66/90] store the database name, not the backup filename in the database header (This used to be ctdb commit 0674b33a7492cc1a194833f5ca87d8b30457faee) --- ctdb/tools/ctdb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ctdb/tools/ctdb.c b/ctdb/tools/ctdb.c index 7b912af9d66..7d8cf846d08 100644 --- a/ctdb/tools/ctdb.c +++ b/ctdb/tools/ctdb.c @@ -1887,12 +1887,12 @@ static int control_backupdb(struct ctdb_context *ctdb, int argc, const char **ar dbhdr.timestamp = time(NULL); dbhdr.persistent = dbmap->dbs[i].persistent; dbhdr.size = outdata.dsize; - if (strlen(argv[1]) >= MAX_DB_NAME) { + if (strlen(argv[0]) >= MAX_DB_NAME) { DEBUG(DEBUG_ERR,("Too long dbname\n")); talloc_free(tmp_ctx); return -1; } - strncpy(discard_const(dbhdr.name), argv[1], MAX_DB_NAME); + strncpy(discard_const(dbhdr.name), argv[0], MAX_DB_NAME); write(fh, &dbhdr, sizeof(dbhdr)); write(fh, outdata.dptr, outdata.dsize); From d793154dbb705e46fdc53c44c97aaeac8690025e Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 14 Aug 2008 09:52:23 +1000 Subject: [PATCH 67/90] only freeze the local node when doing a backup and not the entire cluster (This used to be ctdb commit ff413beb4bb31e277e843235a1ce5e5ad7b92c71) --- ctdb/tools/ctdb.c | 31 +++++++------------------------ 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/ctdb/tools/ctdb.c b/ctdb/tools/ctdb.c index 7d8cf846d08..9ebc852b64d 100644 --- a/ctdb/tools/ctdb.c +++ b/ctdb/tools/ctdb.c @@ -1799,12 +1799,10 @@ static int control_backupdb(struct ctdb_context *ctdb, int argc, const char **ar { int i, ret; struct ctdb_dbid_map *dbmap=NULL; - struct ctdb_node_map *nodemap=NULL; TALLOC_CTX *tmp_ctx = talloc_new(ctdb); TDB_DATA outdata; struct db_file_header dbhdr; int fh; - uint32_t *nodes; if (argc != 2) { DEBUG(DEBUG_ERR,("Invalid arguments\n")); @@ -1833,21 +1831,10 @@ static int control_backupdb(struct ctdb_context *ctdb, int argc, const char **ar return -1; } - ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), options.pnn, ctdb, &nodemap); + /* freeze the node */ + ret = ctdb_ctrl_freeze(ctdb, TIMELIMIT(), options.pnn); if (ret != 0) { - DEBUG(DEBUG_ERR, ("Unable to get nodemap from node %u\n", options.pnn)); - talloc_free(tmp_ctx); - return ret; - } - - /* freeze all nodes */ - nodes = list_of_active_nodes(ctdb, nodemap, tmp_ctx, true); - if (ctdb_client_async_control(ctdb, CTDB_CONTROL_FREEZE, - nodes, TIMELIMIT(), - false, tdb_null, - NULL, NULL, - NULL) != 0) { - DEBUG(DEBUG_ERR, ("Unable to freeze nodes.\n")); + DEBUG(DEBUG_ERR, ("Unable to freeze node\n")); ctdb_ctrl_setrecmode(ctdb, TIMELIMIT(), options.pnn, CTDB_RECOVERY_ACTIVE); talloc_free(tmp_ctx); return -1; @@ -1863,14 +1850,10 @@ static int control_backupdb(struct ctdb_context *ctdb, int argc, const char **ar return -1; } - /* thaw all nodes */ - nodes = list_of_active_nodes(ctdb, nodemap, tmp_ctx, true); - if (ctdb_client_async_control(ctdb, CTDB_CONTROL_THAW, - nodes, TIMELIMIT(), - false, tdb_null, - NULL, NULL, - NULL) != 0) { - DEBUG(DEBUG_ERR, ("Unable to thaw nodes.\n")); + /* thaw the node */ + ret = ctdb_ctrl_thaw(ctdb, TIMELIMIT(), options.pnn); + if (ret != 0) { + DEBUG(DEBUG_ERR, ("Unable to thaw node.\n")); ctdb_ctrl_setrecmode(ctdb, TIMELIMIT(), options.pnn, CTDB_RECOVERY_ACTIVE); talloc_free(tmp_ctx); return -1; From ed6ca6a84dbc99ffcb7978dd1e98b2b41a2904af Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 14 Aug 2008 10:57:08 +1000 Subject: [PATCH 68/90] use a local tdb_traverse instead of a ctdb_pulldb to lessen the impact of the system while performing a database backup (This used to be ctdb commit 48fad9c06185a1f2580473cac02b3722e35c2023) --- ctdb/tools/ctdb.c | 89 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 70 insertions(+), 19 deletions(-) diff --git a/ctdb/tools/ctdb.c b/ctdb/tools/ctdb.c index 9ebc852b64d..eed92d3773d 100644 --- a/ctdb/tools/ctdb.c +++ b/ctdb/tools/ctdb.c @@ -29,6 +29,7 @@ #include "../include/ctdb.h" #include "../include/ctdb_private.h" #include "../common/rb_tree.h" +#include "db_wrap.h" static void usage(void); @@ -1792,6 +1793,40 @@ struct db_file_header { const char name[MAX_DB_NAME]; }; +struct backup_data { + struct ctdb_marshall_buffer *records; + uint32_t len; + uint32_t total; + bool traverse_error; +}; + +static int backup_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *private) +{ + struct backup_data *bd = talloc_get_type(private, struct backup_data); + struct ctdb_rec_data *rec; + + /* add the record */ + rec = ctdb_marshall_record(bd->records, 0, key, NULL, data); + if (rec == NULL) { + bd->traverse_error = true; + DEBUG(DEBUG_ERR,("Failed to marshall record\n")); + return -1; + } + bd->records = talloc_realloc_size(NULL, bd->records, rec->length + bd->len); + if (bd->records == NULL) { + DEBUG(DEBUG_ERR,("Failed to expand marshalling buffer\n")); + bd->traverse_error = true; + return -1; + } + bd->records->count++; + memcpy(bd->len+(uint8_t *)bd->records, rec, rec->length); + bd->len += rec->length; + talloc_free(rec); + + bd->total++; + return 0; +} + /* * backup a database to a file */ @@ -1800,8 +1835,9 @@ static int control_backupdb(struct ctdb_context *ctdb, int argc, const char **ar int i, ret; struct ctdb_dbid_map *dbmap=NULL; TALLOC_CTX *tmp_ctx = talloc_new(ctdb); - TDB_DATA outdata; struct db_file_header dbhdr; + struct ctdb_db_context *ctdb_db; + struct backup_data *bd; int fh; if (argc != 2) { @@ -1831,34 +1867,49 @@ static int control_backupdb(struct ctdb_context *ctdb, int argc, const char **ar return -1; } - /* freeze the node */ - ret = ctdb_ctrl_freeze(ctdb, TIMELIMIT(), options.pnn); - if (ret != 0) { - DEBUG(DEBUG_ERR, ("Unable to freeze node\n")); - ctdb_ctrl_setrecmode(ctdb, TIMELIMIT(), options.pnn, CTDB_RECOVERY_ACTIVE); + + ctdb_db = ctdb_attach(ctdb, argv[0], dbmap->dbs[i].persistent, 0); + if (ctdb_db == NULL) { + DEBUG(DEBUG_ERR,("Unable to attach to database '%s'\n", argv[0])); + return -1; + } + + + ret = tdb_transaction_start(ctdb_db->ltdb->tdb); + if (ret == -1) { + DEBUG(DEBUG_ERR,("Failed to start transaction\n")); talloc_free(tmp_ctx); return -1; } - ret = ctdb_ctrl_pulldb(ctdb, options.pnn, dbmap->dbs[i].dbid, - CTDB_LMASTER_ANY, tmp_ctx, - TIMELIMIT(), &outdata); - if (ret != 0) { - DEBUG(DEBUG_ERR,(__location__ " Unable to copy db from node %u\n", options.pnn)); - ctdb_ctrl_setrecmode(ctdb, TIMELIMIT(), options.pnn, CTDB_RECOVERY_ACTIVE); + + bd = talloc_zero(tmp_ctx, struct backup_data); + if (bd == NULL) { + DEBUG(DEBUG_ERR,("Failed to allocate backup_data\n")); talloc_free(tmp_ctx); return -1; } - /* thaw the node */ - ret = ctdb_ctrl_thaw(ctdb, TIMELIMIT(), options.pnn); - if (ret != 0) { - DEBUG(DEBUG_ERR, ("Unable to thaw node.\n")); - ctdb_ctrl_setrecmode(ctdb, TIMELIMIT(), options.pnn, CTDB_RECOVERY_ACTIVE); + bd->records = talloc_zero(bd, struct ctdb_marshall_buffer); + if (bd->records == NULL) { + DEBUG(DEBUG_ERR,("Failed to allocate ctdb_marshall_buffer\n")); talloc_free(tmp_ctx); return -1; } + bd->len = offsetof(struct ctdb_marshall_buffer, data); + bd->records->db_id = ctdb_db->db_id; + /* traverse the database collecting all records */ + if (tdb_traverse_read(ctdb_db->ltdb->tdb, backup_traverse, bd) == -1 || + bd->traverse_error) { + DEBUG(DEBUG_ERR,("Traverse error\n")); + talloc_free(tmp_ctx); + return -1; + } + + tdb_transaction_cancel(ctdb_db->ltdb->tdb); + + fh = open(argv[1], O_RDWR|O_CREAT, 0600); if (fh == -1) { DEBUG(DEBUG_ERR,("Failed to open file '%s'\n", argv[1])); @@ -1869,7 +1920,7 @@ static int control_backupdb(struct ctdb_context *ctdb, int argc, const char **ar dbhdr.version = DB_VERSION; dbhdr.timestamp = time(NULL); dbhdr.persistent = dbmap->dbs[i].persistent; - dbhdr.size = outdata.dsize; + dbhdr.size = bd->len; if (strlen(argv[0]) >= MAX_DB_NAME) { DEBUG(DEBUG_ERR,("Too long dbname\n")); talloc_free(tmp_ctx); @@ -1877,7 +1928,7 @@ static int control_backupdb(struct ctdb_context *ctdb, int argc, const char **ar } strncpy(discard_const(dbhdr.name), argv[0], MAX_DB_NAME); write(fh, &dbhdr, sizeof(dbhdr)); - write(fh, outdata.dptr, outdata.dsize); + write(fh, bd->records, bd->len); close(fh); talloc_free(tmp_ctx); From ef997d344f189891ec201a71e8f57bc1ca88992c Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 19 Aug 2008 14:58:29 +1000 Subject: [PATCH 69/90] initial ipv6 patch Signed-off-by: Ronnie Sahlberg (This used to be ctdb commit 1f131f21386f428bbbbb29098d56c2f64596583b) --- ctdb/client/ctdb_client.c | 6 +- ctdb/common/ctdb_util.c | 111 +++++------ ctdb/common/system_aix.c | 87 +++++---- ctdb/common/system_linux.c | 112 +++++++---- ctdb/include/ctdb_private.h | 53 ++--- ctdb/server/ctdb_control.c | 4 +- ctdb/server/ctdb_daemon.c | 2 +- ctdb/server/ctdb_recover.c | 5 +- ctdb/server/ctdb_recoverd.c | 11 +- ctdb/server/ctdb_takeover.c | 377 +++++++++++++++++++----------------- ctdb/server/ctdbd.c | 6 +- ctdb/tcp/tcp_connect.c | 131 ++++++++----- ctdb/tests/nodes.txt | 8 +- ctdb/tests/nodes6.txt | 11 ++ ctdb/tests/start_daemons.sh | 9 +- ctdb/tools/ctdb.c | 94 ++++----- ctdb/utils/ipmux/ipmux.c | 8 +- 17 files changed, 577 insertions(+), 458 deletions(-) create mode 100644 ctdb/tests/nodes6.txt diff --git a/ctdb/client/ctdb_client.c b/ctdb/client/ctdb_client.c index 0d853746aac..dfcd4d90e79 100644 --- a/ctdb/client/ctdb_client.c +++ b/ctdb/client/ctdb_client.c @@ -2422,15 +2422,15 @@ int ctdb_ctrl_gratious_arp(struct ctdb_context *ctdb, int ctdb_ctrl_get_tcp_tickles(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, - struct sockaddr_in *ip, + ctdb_sock_addr *addr, struct ctdb_control_tcp_tickle_list **list) { int ret; TDB_DATA data, outdata; int32_t status; - data.dptr = (uint8_t*)ip; - data.dsize = sizeof(struct sockaddr_in); + data.dptr = (uint8_t*)addr; + data.dsize = sizeof(ctdb_sock_addr); ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_TCP_TICKLE_LIST, 0, data, diff --git a/ctdb/common/ctdb_util.c b/ctdb/common/ctdb_util.c index b096a52946b..20238363b60 100644 --- a/ctdb/common/ctdb_util.c +++ b/ctdb/common/ctdb_util.c @@ -362,40 +362,6 @@ void set_close_on_exec(int fd) } -/* - parse a ip:num pair with the given separator - */ -static bool parse_ip_num(const char *s, struct in_addr *addr, unsigned *num, const char sep) -{ - const char *p; - char *endp = NULL; - char buf[16]; - - p = strchr(s, sep); - if (p == NULL) { - return false; - } - - if (p - s > 15) { - return false; - } - - *num = strtoul(p+1, &endp, 10); - if (endp == NULL || *endp != 0) { - /* trailing garbage */ - return false; - } - - strlcpy(buf, s, 1+p-s); - - if (inet_aton(buf, addr) == 0) { - return false; - } - - return true; -} - - static bool parse_ipv4(const char *s, unsigned port, ctdb_sock_addr *saddr) { saddr->ip.sin_family = AF_INET; @@ -492,31 +458,51 @@ bool parse_ip(const char *addr, ctdb_sock_addr *saddr) /* parse a ip/mask pair */ -bool parse_ip_mask(const char *s, struct sockaddr_in *ip, unsigned *mask) +bool parse_ip_mask(const char *str, ctdb_sock_addr *addr, unsigned *mask) { - ZERO_STRUCT(*ip); + TALLOC_CTX *tmp_ctx = talloc_new(NULL); + char *s, *p; + char *endp = NULL; + bool ret; - if (!parse_ip_num(s, &ip->sin_addr, mask, '/')) { + ZERO_STRUCT(*addr); + s = talloc_strdup(tmp_ctx, str); + if (s == NULL) { + DEBUG(DEBUG_ERR, (__location__ " Failed strdup()\n")); + talloc_free(tmp_ctx); return false; } - if (*mask > 32) { + + p = rindex(s, '/'); + if (p == NULL) { + DEBUG(DEBUG_ERR, (__location__ " This addr: %s does not contain a mask\n", s)); + talloc_free(tmp_ctx); return false; } - ip->sin_family = AF_INET; - ip->sin_port = 0; - return true; + + *mask = strtoul(p+1, &endp, 10); + if (endp == NULL || *endp != 0) { + /* trailing garbage */ + DEBUG(DEBUG_ERR, (__location__ " Trailing garbage after the mask in %s\n", s)); + talloc_free(tmp_ctx); + return false; + } + *p = 0; + + + /* now is this a ipv4 or ipv6 address ?*/ + p = index(s, ':'); + if (p == NULL) { + ret = parse_ipv4(s, 0, addr); + } else { + ret = parse_ipv6(s, 0, addr); + } + + talloc_free(tmp_ctx); + return ret; } -/* - compare two sockaddr_in structures - matching only on IP - */ -bool ctdb_same_ipv4(const struct sockaddr_in *ip1, const struct sockaddr_in *ip2) -{ - return ip1->sin_family == ip2->sin_family && - ip1->sin_addr.s_addr == ip2->sin_addr.s_addr; -} - -bool ctdb_same_ip(ctdb_sock_addr *ip1, ctdb_sock_addr *ip2) +bool ctdb_same_ip(const ctdb_sock_addr *ip1, const ctdb_sock_addr *ip2) { if (ip1->sa.sa_family != ip2->sa.sa_family) { return false; @@ -538,13 +524,30 @@ bool ctdb_same_ip(ctdb_sock_addr *ip1, ctdb_sock_addr *ip2) } /* - compare two sockaddr_in structures + compare two ctdb_sock_addr structures */ -bool ctdb_same_sockaddr(const struct sockaddr_in *ip1, const struct sockaddr_in *ip2) +bool ctdb_same_sockaddr(const ctdb_sock_addr *ip1, const ctdb_sock_addr *ip2) { - return ctdb_same_ipv4(ip1, ip2) && ip1->sin_port == ip2->sin_port; + return ctdb_same_ip(ip1, ip2) && ip1->ip.sin_port == ip2->ip.sin_port; } +char *ctdb_addr_to_str(ctdb_sock_addr *addr) +{ + static char cip[128] = ""; + + switch (addr->sa.sa_family) { + case AF_INET: + inet_ntop(addr->ip.sin_family, &addr->ip.sin_addr, cip, sizeof(cip)); + break; + case AF_INET6: + inet_ntop(addr->ip6.sin6_family, &addr->ip6.sin6_addr, cip, sizeof(cip)); + break; + default: + DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family %u\n", addr->sa.sa_family)); + } + + return cip; +} void ctdb_block_signal(int signum) diff --git a/ctdb/common/system_aix.c b/ctdb/common/system_aix.c index b08692d1774..03d997ff6fc 100644 --- a/ctdb/common/system_aix.c +++ b/ctdb/common/system_aix.c @@ -194,17 +194,17 @@ int ctdb_sys_send_tcp(const ctdb_sock_addr *dest, we try to bind to it, and if that fails then we don't have that IP on an interface */ -bool ctdb_sys_have_ip(struct sockaddr_in ip) +bool ctdb_sys_have_ip(ctdb_sock_addr *addr) { int s; int ret; - ip.sin_port = 0; - s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); + addr->sa.sa_port = 0; + s = socket(addr->sa.sa_family, SOCK_STREAM, IPPROTO_TCP); if (s == -1) { return false; } - ret = bind(s, (struct sockaddr *)&ip, sizeof(ip)); + ret = bind(s, (struct sockaddr *)addr, sizeof(ctdb_sock_addr)); close(s); return ret == 0; } @@ -306,7 +306,7 @@ static int aix_get_mac_addr(const char *device_name, uint8_t mac[6]) } int ctdb_sys_read_tcp_packet(int s, void *private_data, - struct sockaddr_in *src, struct sockaddr_in *dst, + ctdb_sock_addr *src, ctdb_sock_addr *dst, uint32_t *ack_seq, uint32_t *seq) { int ret; @@ -326,44 +326,53 @@ int ctdb_sys_read_tcp_packet(int s, void *private_data, /* Ethernet */ eth = (struct ether_header *)buffer; - /* We are only interested in IP packets */ - if (eth->ether_type != htons(ETHERTYPE_IP)) { - return -1; - } + /* we want either IPv4 or IPv6 */ + if (eth->ether_type == htons(ETHERTYPE_IP)) { + /* IP */ + ip = (struct ip *)(eth+1); - /* IP */ - ip = (struct ip *)(eth+1); + /* We only want IPv4 packets */ + if (ip->ip_v != 4) { + return -1; + } + /* Dont look at fragments */ + if ((ntohs(ip->ip_off)&0x1fff) != 0) { + return -1; + } + /* we only want TCP */ + if (ip->ip_p != IPPROTO_TCP) { + return -1; + } - /* We only want IPv4 packets */ - if (ip->ip_v != 4) { - return -1; - } - /* Dont look at fragments */ - if ((ntohs(ip->ip_off)&0x1fff) != 0) { - return -1; - } - /* we only want TCP */ - if (ip->ip_p != IPPROTO_TCP) { - return -1; - } - - /* make sure its not a short packet */ - if (offsetof(struct tcphdr, th_ack) + 4 + - (ip->ip_hl*4) > ret) { - return -1; - } - /* TCP */ - tcp = (struct tcphdr *)((ip->ip_hl*4) + (char *)ip); + /* make sure its not a short packet */ + if (offsetof(struct tcphdr, th_ack) + 4 + + (ip->ip_hl*4) > ret) { + return -1; + } + /* TCP */ + tcp = (struct tcphdr *)((ip->ip_hl*4) + (char *)ip); - /* tell the caller which one we've found */ - src->sin_addr.s_addr = ip->ip_src.s_addr; - src->sin_port = tcp->th_sport; - dst->sin_addr.s_addr = ip->ip_dst.s_addr; - dst->sin_port = tcp->th_dport; - *ack_seq = tcp->th_ack; - *seq = tcp->th_seq; + /* tell the caller which one we've found */ + src->ip.sin_family = AF_INET; + src->sin_addr.s_addr = ip->ip_src.s_addr; + src->sin_port = tcp->th_sport; + dst->ip.sin_family = AF_INET; + dst->sin_addr.s_addr = ip->ip_dst.s_addr; + dst->sin_port = tcp->th_dport; + *ack_seq = tcp->th_ack; + *seq = tcp->th_seq; - return 0; + + return 0; +#ifndef ETHERTYPE_IP6 +#define ETHERTYPE_IP6 0x86dd +#endif + } else if (eth->ether_type == htons(ETHERTYPE_IP)) { +see system_linux.c for what should go in here + return 0; + } + + return -1; } diff --git a/ctdb/common/system_linux.c b/ctdb/common/system_linux.c index 32db545b09f..760877fe3ad 100644 --- a/ctdb/common/system_linux.c +++ b/ctdb/common/system_linux.c @@ -344,17 +344,17 @@ int ctdb_sys_send_tcp(const ctdb_sock_addr *dest, ifname, if non-NULL, will return the name of the interface this ip is tied to */ -bool ctdb_sys_have_ip(struct sockaddr_in ip) +bool ctdb_sys_have_ip(ctdb_sock_addr *addr) { int s; int ret; - ip.sin_port = 0; - s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); + addr->ip.sin_port = 0; + s = socket(addr->sa.sa_family, SOCK_STREAM, IPPROTO_TCP); if (s == -1) { return false; } - ret = bind(s, (struct sockaddr *)&ip, sizeof(ip)); + ret = bind(s, (struct sockaddr *)addr, sizeof(ctdb_sock_addr)); close(s); return ret == 0; @@ -395,7 +395,7 @@ int ctdb_sys_close_capture_socket(void *private_data) called when the raw socket becomes readable */ int ctdb_sys_read_tcp_packet(int s, void *private_data, - struct sockaddr_in *src, struct sockaddr_in *dst, + ctdb_sock_addr *src, ctdb_sock_addr *dst, uint32_t *ack_seq, uint32_t *seq) { int ret; @@ -403,6 +403,7 @@ int ctdb_sys_read_tcp_packet(int s, void *private_data, char pkt[RCVPKTSIZE]; struct ether_header *eth; struct iphdr *ip; + struct ip6_hdr *ip6; struct tcphdr *tcp; ret = recv(s, pkt, RCVPKTSIZE, MSG_TRUNC); @@ -413,45 +414,74 @@ int ctdb_sys_read_tcp_packet(int s, void *private_data, /* Ethernet */ eth = (struct ether_header *)pkt; - /* We only want IP packets */ - if (ntohs(eth->ether_type) != ETHERTYPE_IP) { - return -1; - } - - /* IP */ - ip = (struct iphdr *)(eth+1); + /* we want either IPv4 or IPv6 */ + if (ntohs(eth->ether_type) == ETHERTYPE_IP) { + /* IP */ + ip = (struct iphdr *)(eth+1); - /* We only want IPv4 packets */ - if (ip->version != 4) { - return -1; - } - /* Dont look at fragments */ - if ((ntohs(ip->frag_off)&0x1fff) != 0) { - return -1; - } - /* we only want TCP */ - if (ip->protocol != IPPROTO_TCP) { - return -1; + /* We only want IPv4 packets */ + if (ip->version != 4) { + return -1; + } + /* Dont look at fragments */ + if ((ntohs(ip->frag_off)&0x1fff) != 0) { + return -1; + } + /* we only want TCP */ + if (ip->protocol != IPPROTO_TCP) { + return -1; + } + + /* make sure its not a short packet */ + if (offsetof(struct tcphdr, ack_seq) + 4 + + (ip->ihl*4) + sizeof(*eth) > ret) { + return -1; + } + /* TCP */ + tcp = (struct tcphdr *)((ip->ihl*4) + (char *)ip); + + /* tell the caller which one we've found */ + src->ip.sin_family = AF_INET; + src->ip.sin_addr.s_addr = ip->saddr; + src->ip.sin_port = tcp->source; + dst->ip.sin_family = AF_INET; + dst->ip.sin_addr.s_addr = ip->daddr; + dst->ip.sin_port = tcp->dest; + *ack_seq = tcp->ack_seq; + *seq = tcp->seq; + + return 0; +#ifndef ETHERTYPE_IP6 +#define ETHERTYPE_IP6 0x86dd +#endif + } else if (ntohs(eth->ether_type) == ETHERTYPE_IP6) { + /* IP6 */ + ip6 = (struct ip6_hdr *)(eth+1); + + /* we only want TCP */ + if (ip6->ip6_nxt != IPPROTO_TCP) { + return -1; + } + + /* TCP */ + tcp = (struct tcphdr *)(ip6+1); + + /* tell the caller which one we've found */ + src->ip6.sin6_family = AF_INET6; + src->ip6.sin6_port = tcp->source; + src->ip6.sin6_addr = ip6->ip6_src; + + dst->ip6.sin6_family = AF_INET6; + dst->ip6.sin6_port = tcp->source; + dst->ip6.sin6_addr = ip6->ip6_dst; + + *ack_seq = tcp->ack_seq; + *seq = tcp->seq; + + return 0; } - /* make sure its not a short packet */ - if (offsetof(struct tcphdr, ack_seq) + 4 + - (ip->ihl*4) + sizeof(*eth) > ret) { - return -1; - } - - /* TCP */ - tcp = (struct tcphdr *)((ip->ihl*4) + (char *)ip); - - /* tell the caller which one we've found */ - src->sin_addr.s_addr = ip->saddr; - src->sin_port = tcp->source; - dst->sin_addr.s_addr = ip->daddr; - dst->sin_port = tcp->dest; - *ack_seq = tcp->ack_seq; - *seq = tcp->seq; - - return 0; + return -1; } diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index f44a940a84e..f73f9ef963f 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -61,8 +61,8 @@ typedef union { a tcp connection description */ struct ctdb_tcp_connection { - struct sockaddr_in saddr; - struct sockaddr_in daddr; + ctdb_sock_addr src_addr; + ctdb_sock_addr dst_addr; }; /* the wire representation for a tcp tickle array */ @@ -73,7 +73,7 @@ struct ctdb_tcp_wire_array { /* the list of tcp tickles used by get/set tcp tickle list */ struct ctdb_control_tcp_tickle_list { - struct sockaddr_in ip; + ctdb_sock_addr addr; struct ctdb_tcp_wire_array tickles; }; @@ -170,7 +170,7 @@ struct ctdb_vnn { struct ctdb_vnn *prev, *next; const char *iface; - struct sockaddr_in public_address; + ctdb_sock_addr public_address; uint8_t public_netmask_bits; /* the node number that is serving this public address, if any. @@ -563,26 +563,27 @@ struct ctdb_control_set_call { /* struct for tcp_client control + used by samba can not modify */ -struct ctdb_control_tcp { - struct sockaddr_in src; - struct sockaddr_in dest; +struct ctdb_tcp_client { + struct sockaddr_in src; // samba uses this + struct sockaddr_in dest;// samba uses this }; /* struct for kill_tcp control */ struct ctdb_control_killtcp { - struct sockaddr_in src; - struct sockaddr_in dst; + ctdb_sock_addr src_addr; + ctdb_sock_addr dst_addr; }; /* - struct holding a sockaddr_in and an interface name, + struct holding a ctdb_sock_addr and an interface name, used to add/remove public addresses */ struct ctdb_control_ip_iface { - struct sockaddr_in sin; + ctdb_sock_addr addr; uint32_t mask; uint32_t len; char iface[1]; @@ -603,8 +604,8 @@ struct ctdb_control_gratious_arp { struct for tcp_add and tcp_remove controls */ struct ctdb_control_tcp_vnn { - struct sockaddr_in src; - struct sockaddr_in dest; + ctdb_sock_addr src; + ctdb_sock_addr dest; }; /* @@ -804,13 +805,11 @@ enum ctdb_trans2_commit_error { void ctdb_set_error(struct ctdb_context *ctdb, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3); void ctdb_fatal(struct ctdb_context *ctdb, const char *msg); bool ctdb_same_address(struct ctdb_address *a1, struct ctdb_address *a2); -bool parse_ip_mask(const char *s, struct sockaddr_in *ip, unsigned *mask); int ctdb_parse_address(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, const char *str, struct ctdb_address *address); -bool ctdb_same_ipv4(const struct sockaddr_in *ip1, const struct sockaddr_in *ip2); -bool ctdb_same_ip(ctdb_sock_addr *ip1, ctdb_sock_addr *ip2); -bool ctdb_same_sockaddr(const struct sockaddr_in *ip1, const struct sockaddr_in *ip2); +bool ctdb_same_ip(const ctdb_sock_addr *ip1, const ctdb_sock_addr *ip2); +bool ctdb_same_sockaddr(const ctdb_sock_addr *ip1, const ctdb_sock_addr *ip2); uint32_t ctdb_hash(const TDB_DATA *key); uint32_t ctdb_hash_string(const char *str); void ctdb_request_call(struct ctdb_context *ctdb, struct ctdb_req_header *hdr); @@ -1085,8 +1084,7 @@ struct ctdb_control_list_tunable { struct ctdb_node_and_flags { uint32_t pnn; uint32_t flags; - struct sockaddr_in sin; - + ctdb_sock_addr addr; }; struct ctdb_node_map { @@ -1191,7 +1189,7 @@ int32_t ctdb_control_end_recovery(struct ctdb_context *ctdb, struct ctdb_public_ip { uint32_t pnn; - struct sockaddr_in sin; + ctdb_sock_addr addr; }; int ctdb_ctrl_takeover_ip(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, struct ctdb_public_ip *ip); @@ -1210,7 +1208,7 @@ int ctdb_ctrl_get_public_ips(struct ctdb_context *ctdb, /* from takeover/system.c */ int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface); -bool ctdb_sys_have_ip(struct sockaddr_in ip); +bool ctdb_sys_have_ip(ctdb_sock_addr *addr); int ctdb_sys_send_tcp(const ctdb_sock_addr *dest, const ctdb_sock_addr *src, uint32_t seq, uint32_t ack, int rst); @@ -1266,13 +1264,14 @@ int ctdb_ctrl_get_all_tunables(struct ctdb_context *ctdb, void ctdb_start_freeze(struct ctdb_context *ctdb); -bool parse_ip_port(const char *s, ctdb_sock_addr *saddr); -bool parse_ip(const char *s, ctdb_sock_addr *saddr); +bool parse_ip_mask(const char *s, ctdb_sock_addr *addr, unsigned *mask); +bool parse_ip_port(const char *s, ctdb_sock_addr *addr); +bool parse_ip(const char *s, ctdb_sock_addr *addr); + int ctdb_sys_open_capture_socket(const char *iface, void **private_data); int ctdb_sys_close_capture_socket(void *private_data); -int ctdb_sys_read_tcp_packet(int s, void *private_data, struct sockaddr_in *src, struct sockaddr_in *dst, - uint32_t *ack_seq, uint32_t *seq); +int ctdb_sys_read_tcp_packet(int s, void *private_data, ctdb_sock_addr *src, ctdb_sock_addr *dst, uint32_t *ack_seq, uint32_t *seq); int ctdb_ctrl_killtcp(struct ctdb_context *ctdb, struct timeval timeout, @@ -1299,7 +1298,7 @@ int ctdb_ctrl_get_tcp_tickles(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, - struct sockaddr_in *ip, + ctdb_sock_addr *addr, struct ctdb_control_tcp_tickle_list **list); @@ -1376,4 +1375,6 @@ int32_t ctdb_control_trans2_finished(struct ctdb_context *ctdb, int32_t ctdb_control_trans2_error(struct ctdb_context *ctdb, struct ctdb_req_control *c); +char *ctdb_addr_to_str(ctdb_sock_addr *addr); + #endif diff --git a/ctdb/server/ctdb_control.c b/ctdb/server/ctdb_control.c index edfe344fdb9..68635d6b439 100644 --- a/ctdb/server/ctdb_control.c +++ b/ctdb/server/ctdb_control.c @@ -296,7 +296,7 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb, return ctdb_control_get_public_ips(ctdb, c, outdata); case CTDB_CONTROL_TCP_CLIENT: - CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_control_tcp)); + CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_tcp_client)); return ctdb_control_tcp_client(ctdb, client_id, indata); case CTDB_CONTROL_STARTUP: @@ -325,7 +325,7 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb, return ctdb_control_kill_tcp(ctdb, indata); case CTDB_CONTROL_GET_TCP_TICKLE_LIST: - CHECK_CONTROL_DATA_SIZE(sizeof(struct sockaddr_in)); + CHECK_CONTROL_DATA_SIZE(sizeof(ctdb_sock_addr)); return ctdb_control_get_tcp_tickle_list(ctdb, indata, outdata); case CTDB_CONTROL_SET_TCP_TICKLE_LIST: diff --git a/ctdb/server/ctdb_daemon.c b/ctdb/server/ctdb_daemon.c index 3978e28fdf4..efe3d753498 100644 --- a/ctdb/server/ctdb_daemon.c +++ b/ctdb/server/ctdb_daemon.c @@ -535,7 +535,7 @@ static void ctdb_daemon_read_cb(uint8_t *data, size_t cnt, void *args) static void ctdb_accept_client(struct event_context *ev, struct fd_event *fde, uint16_t flags, void *private_data) { - struct sockaddr_in addr; + struct sockaddr_un addr; socklen_t len; int fd; struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context); diff --git a/ctdb/server/ctdb_recover.c b/ctdb/server/ctdb_recover.c index f0b97acdeed..3243f42faa7 100644 --- a/ctdb/server/ctdb_recover.c +++ b/ctdb/server/ctdb_recover.c @@ -163,7 +163,10 @@ ctdb_control_getnodemap(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA ind node_map = (struct ctdb_node_map *)outdata->dptr; node_map->num = num_nodes; for (i=0; inodes[i]->address.address, &node_map->nodes[i].sin.sin_addr); + if (parse_ip(ctdb->nodes[i]->address.address, &node_map->nodes[i].addr) == 0) { + DEBUG(DEBUG_ERR, (__location__ " Failed to parse %s into a sockaddr\n", ctdb->nodes[i]->address.address)); + } + node_map->nodes[i].pnn = ctdb->nodes[i]->pnn; node_map->nodes[i].flags = ctdb->nodes[i]->flags; } diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index c503aa5489a..c6a4ab322a3 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -2207,8 +2207,9 @@ static int verify_ip_allocation(struct ctdb_context *ctdb, uint32_t pnn) */ for (j=0; jnum; j++) { if (ips->ips[j].pnn == pnn) { - if (!ctdb_sys_have_ip(ips->ips[j].sin)) { - DEBUG(DEBUG_CRIT,("Public address '%s' is missing and we should serve this ip\n", inet_ntoa(ips->ips[j].sin.sin_addr))); + if (!ctdb_sys_have_ip(&ips->ips[j].addr)) { + DEBUG(DEBUG_CRIT,("Public address '%s' is missing and we should serve this ip\n", + ctdb_addr_to_str(&ips->ips[j].addr))); ret = ctdb_ctrl_freeze(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE); if (ret != 0) { DEBUG(DEBUG_ERR,(__location__ " Failed to freeze node due to public ip address mismatches\n")); @@ -2225,8 +2226,10 @@ static int verify_ip_allocation(struct ctdb_context *ctdb, uint32_t pnn) } } } else { - if (ctdb_sys_have_ip(ips->ips[j].sin)) { - DEBUG(DEBUG_CRIT,("We are still serving a public address '%s' that we should not be serving.\n", inet_ntoa(ips->ips[j].sin.sin_addr))); + if (ctdb_sys_have_ip(&ips->ips[j].addr)) { + DEBUG(DEBUG_CRIT,("We are still serving a public address '%s' that we should not be serving.\n", + ctdb_addr_to_str(&ips->ips[j].addr))); + ret = ctdb_ctrl_freeze(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE); if (ret != 0) { DEBUG(DEBUG_ERR,(__location__ " Failed to freeze node due to public ip address mismatches\n")); diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index 53f48c175a1..54581909961 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -56,7 +56,7 @@ struct ctdb_tcp_list { struct ctdb_client_ip { struct ctdb_client_ip *prev, *next; struct ctdb_context *ctdb; - struct sockaddr_in ip; + ctdb_sock_addr addr; uint32_t client_id; }; @@ -72,7 +72,6 @@ static void ctdb_control_send_arp(struct event_context *ev, struct timed_event * int i, ret; struct ctdb_tcp_array *tcparray; - ret = ctdb_sys_send_arp(&arp->addr, arp->vnn->iface); if (ret != 0) { DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed (%s)\n", strerror(errno))); @@ -81,17 +80,20 @@ static void ctdb_control_send_arp(struct event_context *ev, struct timed_event * tcparray = arp->tcparray; if (tcparray) { for (i=0;inum;i++) { + struct ctdb_tcp_connection *tcon; + + tcon = &tcparray->connections[i]; DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n", - (unsigned)ntohs(tcparray->connections[i].daddr.sin_port), - inet_ntoa(tcparray->connections[i].saddr.sin_addr), - (unsigned)ntohs(tcparray->connections[i].saddr.sin_port))); + (unsigned)ntohs(tcon->dst_addr.ip.sin_port), + ctdb_addr_to_str(&tcon->src_addr), + (unsigned)ntohs(tcon->src_addr.ip.sin_port))); ret = ctdb_sys_send_tcp( - (ctdb_sock_addr *)&tcparray->connections[i].saddr, - (ctdb_sock_addr *)&tcparray->connections[i].daddr, + &tcon->src_addr, + &tcon->dst_addr, 0, 0, 0); if (ret != 0) { DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n", - inet_ntoa(tcparray->connections[i].saddr.sin_addr))); + ctdb_addr_to_str(&tcon->src_addr))); } } } @@ -126,14 +128,9 @@ static void takeover_ip_callback(struct ctdb_context *ctdb, int status, struct ctdb_tcp_array *tcparray; if (status != 0) { - char ip[128] = ""; - - if (inet_ntop(state->addr->sa.sa_family, &state->addr->sa.sa_data[0], ip, sizeof(ip)) == NULL) { - DEBUG(DEBUG_ERR, (__location__ " inet_ntop() failed\n")); - } - DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n", - ip, state->vnn->iface)); + ctdb_addr_to_str(state->addr), + state->vnn->iface)); ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL); talloc_free(state); return; @@ -181,12 +178,12 @@ failed: Find the vnn of the node that has a public ip address returns -1 if the address is not known as a public address */ -static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, struct sockaddr_in ip) +static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr) { struct ctdb_vnn *vnn; for (vnn=ctdb->vnn;vnn;vnn=vnn->next) { - if (ctdb_same_ipv4(&vnn->public_address, &ip)) { + if (ctdb_same_ip(&vnn->public_address, addr)) { return vnn; } } @@ -209,16 +206,16 @@ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn; /* update out vnn list */ - vnn = find_public_ip_vnn(ctdb, pip->sin); + vnn = find_public_ip_vnn(ctdb, &pip->addr); if (vnn == NULL) { DEBUG(DEBUG_ERR,("takeoverip called for an ip '%s' that is not a public address\n", - inet_ntoa(pip->sin.sin_addr))); + ctdb_addr_to_str(&pip->addr))); return 0; } vnn->pnn = pip->pnn; /* if our kernel already has this IP, do nothing */ - if (ctdb_sys_have_ip(pip->sin)) { + if (ctdb_sys_have_ip(&pip->addr)) { return 0; } @@ -229,24 +226,26 @@ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, state->addr = talloc(ctdb, ctdb_sock_addr); CTDB_NO_MEMORY(ctdb, state->addr); - state->addr->ip = pip->sin; //qqq pip must be converted - state->vnn = vnn; + *state->addr = pip->addr; + state->vnn = vnn; DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n", - inet_ntoa(pip->sin.sin_addr), vnn->public_netmask_bits, - vnn->iface)); + ctdb_addr_to_str(&pip->addr), + vnn->public_netmask_bits, + vnn->iface)); ret = ctdb_event_script_callback(ctdb, timeval_current_ofs(ctdb->tunable.script_timeout, 0), state, takeover_ip_callback, state, "takeip %s %s %u", vnn->iface, - inet_ntoa(pip->sin.sin_addr), + talloc_strdup(state, ctdb_addr_to_str(&pip->addr)), vnn->public_netmask_bits); if (ret != 0) { DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n", - inet_ntoa(pip->sin.sin_addr), vnn->iface)); + ctdb_addr_to_str(&pip->addr), + vnn->iface)); talloc_free(state); return -1; } @@ -263,27 +262,32 @@ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr) { struct ctdb_client_ip *ip; - char cip[128] = ""; - DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n", inet_ntop(addr->sa.sa_family, &addr->sa.sa_data[0], cip, sizeof(cip)))); + DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n", + ctdb_addr_to_str(addr))); for (ip=ctdb->client_ip_list; ip; ip=ip->next) { ctdb_sock_addr tmp_addr; - tmp_addr.ip = ip->ip; //qqq until ip->ip is no longer a sockaddr_in + tmp_addr = ip->addr; DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n", - ip->client_id, inet_ntoa(ip->ip.sin_addr))); + ip->client_id, + ctdb_addr_to_str(&ip->addr))); + if (ctdb_same_ip(&tmp_addr, addr)) { struct ctdb_client *client = ctdb_reqid_find(ctdb, ip->client_id, struct ctdb_client); DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n", - ip->client_id, inet_ntoa(ip->ip.sin_addr), client->pid)); + ip->client_id, + ctdb_addr_to_str(&ip->addr), + client->pid)); + if (client->pid != 0) { DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n", - (unsigned)client->pid, - inet_ntop(addr->sa.sa_family, &addr->sa.sa_data[0], cip, sizeof(cip)), - ip->client_id)); + (unsigned)client->pid, + ctdb_addr_to_str(addr), + ip->client_id)); kill(client->pid, SIGKILL); } } @@ -298,21 +302,13 @@ static void release_ip_callback(struct ctdb_context *ctdb, int status, { struct takeover_callback_state *state = talloc_get_type(private_data, struct takeover_callback_state); - char ip[128] = ""; TDB_DATA data; /* send a message to all clients of this node telling them that the cluster has been reconfigured and they should release any sockets on this IP */ -#if 1 - strncpy(ip, inet_ntoa(state->addr->ip.sin_addr), sizeof(ip)-1); -#else - if (inet_ntop(state->addr->sa.sa_family, &state->addr->sa.sa_data[0], ip, sizeof(ip)) == NULL) { - DEBUG(DEBUG_ERR, (__location__ " inet_ntop() failed\n")); - } -#endif - data.dptr = (uint8_t *)ip; - data.dsize = strlen(ip)+1; + data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr)); + data.dsize = strlen((char *)data.dptr)+1; DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr)); @@ -340,10 +336,10 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, struct ctdb_vnn *vnn; /* update our vnn list */ - vnn = find_public_ip_vnn(ctdb, pip->sin); + vnn = find_public_ip_vnn(ctdb, &pip->addr); if (vnn == NULL) { - DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n", - inet_ntoa(pip->sin.sin_addr))); + DEBUG(DEBUG_ERR,("takeoverip called for an ip '%s' that is not a public address\n", + ctdb_addr_to_str(&pip->addr))); return 0; } vnn->pnn = pip->pnn; @@ -352,16 +348,18 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, talloc_free(vnn->takeover_ctx); vnn->takeover_ctx = NULL; - if (!ctdb_sys_have_ip(pip->sin)) { + if (!ctdb_sys_have_ip(&pip->addr)) { DEBUG(DEBUG_INFO,("Redundant release of IP %s/%u on interface %s (ip not held)\n", - inet_ntoa(pip->sin.sin_addr), vnn->public_netmask_bits, - vnn->iface)); + ctdb_addr_to_str(&pip->addr), + vnn->public_netmask_bits, + vnn->iface)); return 0; } DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s\n", - inet_ntoa(pip->sin.sin_addr), vnn->public_netmask_bits, - vnn->iface)); + ctdb_addr_to_str(&pip->addr), + vnn->public_netmask_bits, + vnn->iface)); state = talloc(ctdb, struct takeover_callback_state); CTDB_NO_MEMORY(ctdb, state); @@ -369,20 +367,20 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, state->c = talloc_steal(state, c); state->addr = talloc(state, ctdb_sock_addr); CTDB_NO_MEMORY(ctdb, state->addr); - state->addr->ip = pip->sin; //qqq pip must be converted - - state->vnn = vnn; + *state->addr = pip->addr; + state->vnn = vnn; ret = ctdb_event_script_callback(ctdb, timeval_current_ofs(ctdb->tunable.script_timeout, 0), state, release_ip_callback, state, "releaseip %s %s %u", vnn->iface, - inet_ntoa(pip->sin.sin_addr), + talloc_strdup(state, ctdb_addr_to_str(&pip->addr)), vnn->public_netmask_bits); if (ret != 0) { DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n", - inet_ntoa(pip->sin.sin_addr), vnn->iface)); + ctdb_addr_to_str(&pip->addr), + vnn->iface)); talloc_free(state); return -1; } @@ -394,15 +392,15 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, -static int ctdb_add_public_address(struct ctdb_context *ctdb, struct sockaddr_in addr, unsigned mask, const char *iface) +static int ctdb_add_public_address(struct ctdb_context *ctdb, ctdb_sock_addr *addr, unsigned mask, const char *iface) { struct ctdb_vnn *vnn; /* Verify that we dont have an entry for this ip yet */ for (vnn=ctdb->vnn;vnn;vnn=vnn->next) { - if (ctdb_same_sockaddr(&addr, &vnn->public_address)) { + if (ctdb_same_sockaddr(addr, &vnn->public_address)) { DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", - inet_ntoa(addr.sin_addr))); + ctdb_addr_to_str(addr))); return -1; } } @@ -411,7 +409,7 @@ static int ctdb_add_public_address(struct ctdb_context *ctdb, struct sockaddr_in vnn = talloc_zero(ctdb, struct ctdb_vnn); CTDB_NO_MEMORY_FATAL(ctdb, vnn); vnn->iface = talloc_strdup(vnn, iface); - vnn->public_address = addr; + vnn->public_address = *addr; vnn->public_netmask_bits = mask; vnn->pnn = -1; @@ -451,7 +449,7 @@ int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist) for (i=0;inum;i++) { - if (ip->sin.sin_addr.s_addr == public_ips->ips[i].sin.sin_addr.s_addr) { + if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) { /* yes, this node can serve this public ip */ return 0; } @@ -579,7 +577,9 @@ static int find_takeover_node(struct ctdb_context *ctdb, } } if (pnn == -1) { - DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n", inet_ntoa(ip->sin.sin_addr))); + DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n", + ctdb_addr_to_str(&ip->addr))); + return -1; } @@ -598,8 +598,8 @@ add_ip_to_merged_list(struct ctdb_context *ctdb, /* do we already have this ip in our merged list ?*/ for (tmp_ip=ip_list;tmp_ip;tmp_ip=tmp_ip->next) { - /* we already have this public ip in the list */ - if (tmp_ip->sin.sin_addr.s_addr == ip->sin.sin_addr.s_addr) { + /* we already have this public ip in the list */ + if (ctdb_same_ip(&tmp_ip->addr, &ip->addr)) { return ip_list; } } @@ -608,7 +608,7 @@ add_ip_to_merged_list(struct ctdb_context *ctdb, tmp_ip = talloc_zero(tmp_ctx, struct ctdb_public_ip_list); CTDB_NO_MEMORY_NULL(ctdb, tmp_ip); tmp_ip->pnn = ip->pnn; - tmp_ip->sin = ip->sin; + tmp_ip->addr = ip->addr; tmp_ip->next = ip_list; return tmp_ip; @@ -734,7 +734,8 @@ try_again: for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) { if (tmp_ip->pnn == -1) { if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) { - DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n", inet_ntoa(tmp_ip->sin.sin_addr))); + DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n", + ctdb_addr_to_str(&tmp_ip->addr))); } } } @@ -801,7 +802,9 @@ try_again: } } if (maxnode == -1) { - DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n", inet_ntoa(tmp_ip->sin.sin_addr))); + DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n", + ctdb_addr_to_str(&tmp_ip->addr))); + continue; } @@ -864,9 +867,8 @@ finished: */ continue; } - ip.pnn = tmp_ip->pnn; - ip.sin.sin_family = AF_INET; - ip.sin.sin_addr = tmp_ip->sin.sin_addr; + ip.pnn = tmp_ip->pnn; + ip.addr = tmp_ip->addr; timeout = TAKEOVER_TIMEOUT(); data.dsize = sizeof(ip); @@ -900,9 +902,8 @@ finished: /* this IP won't be taken over */ continue; } - ip.pnn = tmp_ip->pnn; - ip.sin.sin_family = AF_INET; - ip.sin.sin_addr = tmp_ip->sin.sin_addr; + ip.pnn = tmp_ip->pnn; + ip.addr = tmp_ip->addr; timeout = TAKEOVER_TIMEOUT(); data.dsize = sizeof(ip); @@ -936,7 +937,10 @@ finished: static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip) { DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n", - inet_ntoa(ip->ip.sin_addr), ntohs(ip->ip.sin_port), ip->client_id)); + ctdb_addr_to_str(&ip->addr), + ntohs(ip->addr.ip.sin_port), + ip->client_id)); + DLIST_REMOVE(ip->ctdb->client_ip_list, ip); return 0; } @@ -945,31 +949,36 @@ static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip) called by a client to inform us of a TCP connection that it is managing that should tickled with an ACK when IP takeover is done */ +//qqq we need a new version of this control that takes ctdb_sock_addr +//and have samba move to that instead. +// This is IPV4 ONLY int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id, TDB_DATA indata) { struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client); - struct ctdb_control_tcp *p = (struct ctdb_control_tcp *)indata.dptr; + struct ctdb_tcp_client *p = (struct ctdb_tcp_client *)indata.dptr; struct ctdb_tcp_list *tcp; struct ctdb_control_tcp_vnn t; int ret; TDB_DATA data; struct ctdb_client_ip *ip; struct ctdb_vnn *vnn; + ctdb_sock_addr addr; - vnn = find_public_ip_vnn(ctdb, p->dest); + addr.ip = p->dest; + vnn = find_public_ip_vnn(ctdb, &addr); if (vnn == NULL) { if (ntohl(p->dest.sin_addr.s_addr) != INADDR_LOOPBACK) { DEBUG(DEBUG_INFO,("Could not add client IP %s. This is not a public address.\n", - inet_ntoa(p->dest.sin_addr))); + ctdb_addr_to_str((ctdb_sock_addr *)&p->dest))); } return 0; } if (vnn->pnn != ctdb->pnn) { DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n", - inet_ntoa(p->dest.sin_addr), - client_id, client->pid)); + ctdb_addr_to_str((ctdb_sock_addr *)&p->dest), + client_id, client->pid)); /* failing this call will tell smbd to die */ return -1; } @@ -977,8 +986,8 @@ int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id, ip = talloc(client, struct ctdb_client_ip); CTDB_NO_MEMORY(ctdb, ip); - ip->ctdb = ctdb; - ip->ip = p->dest; + ip->ctdb = ctdb; + ip->addr.ip = p->dest; ip->client_id = client_id; talloc_set_destructor(ip, ctdb_client_ip_destructor); DLIST_ADD(ctdb->client_ip_list, ip); @@ -986,21 +995,21 @@ int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id, tcp = talloc(client, struct ctdb_tcp_list); CTDB_NO_MEMORY(ctdb, tcp); - tcp->connection.saddr = p->src; - tcp->connection.daddr = p->dest; + tcp->connection.src_addr.ip = p->src; + tcp->connection.dst_addr.ip = p->dest; DLIST_ADD(client->tcp_list, tcp); - t.src = p->src; - t.dest = p->dest; + t.src.ip = p->src; + t.dest.ip = p->dest; data.dptr = (uint8_t *)&t; data.dsize = sizeof(t); DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n", - (unsigned)ntohs(p->dest.sin_port), - inet_ntoa(p->src.sin_addr), - (unsigned)ntohs(p->src.sin_port), client_id, client->pid)); + (unsigned)ntohs(p->dest.sin_port), + ctdb_addr_to_str((ctdb_sock_addr *)&p->src), + (unsigned)ntohs(p->src.sin_port), client_id, client->pid)); /* tell all nodes about this tcp connection */ ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, @@ -1014,16 +1023,6 @@ int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id, return 0; } -/* - see if two sockaddr_in are the same - */ -static bool same_sockaddr_in(struct sockaddr_in *in1, struct sockaddr_in *in2) -{ - return in1->sin_family == in2->sin_family && - in1->sin_port == in2->sin_port && - in1->sin_addr.s_addr == in2->sin_addr.s_addr; -} - /* find a tcp address on a list */ @@ -1037,8 +1036,8 @@ static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array, } for (i=0;inum;i++) { - if (same_sockaddr_in(&array->connections[i].saddr, &tcp->saddr) && - same_sockaddr_in(&array->connections[i].daddr, &tcp->daddr)) { + if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) && + ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) { return &array->connections[i]; } } @@ -1057,10 +1056,11 @@ int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata) struct ctdb_tcp_connection tcp; struct ctdb_vnn *vnn; - vnn = find_public_ip_vnn(ctdb, p->dest); + vnn = find_public_ip_vnn(ctdb, &p->dest); if (vnn == NULL) { - DEBUG(DEBUG_ERR,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n", - inet_ntoa(p->dest.sin_addr))); + DEBUG(DEBUG_ERR,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n", + ctdb_addr_to_str(&p->dest))); + return -1; } @@ -1079,21 +1079,21 @@ int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata) tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection)); CTDB_NO_MEMORY(ctdb, tcparray->connections); - tcparray->connections[tcparray->num].saddr = p->src; - tcparray->connections[tcparray->num].daddr = p->dest; + tcparray->connections[tcparray->num].src_addr = p->src; + tcparray->connections[tcparray->num].dst_addr = p->dest; tcparray->num++; return 0; } /* Do we already have this tickle ?*/ - tcp.saddr = p->src; - tcp.daddr = p->dest; + tcp.src_addr = p->src; + tcp.dst_addr = p->dest; if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) { DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n", - inet_ntoa(tcp.daddr.sin_addr), - ntohs(tcp.daddr.sin_port), - vnn->pnn)); + ctdb_addr_to_str(&tcp.dst_addr), + ntohs(tcp.dst_addr.ip.sin_port), + vnn->pnn)); return 0; } @@ -1104,14 +1104,14 @@ int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata) CTDB_NO_MEMORY(ctdb, tcparray->connections); vnn->tcp_array = tcparray; - tcparray->connections[tcparray->num].saddr = p->src; - tcparray->connections[tcparray->num].daddr = p->dest; + tcparray->connections[tcparray->num].src_addr = p->src; + tcparray->connections[tcparray->num].dst_addr = p->dest; tcparray->num++; DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n", - inet_ntoa(tcp.daddr.sin_addr), - ntohs(tcp.daddr.sin_port), - vnn->pnn)); + ctdb_addr_to_str(&tcp.dst_addr), + ntohs(tcp.dst_addr.ip.sin_port), + vnn->pnn)); return 0; } @@ -1125,10 +1125,11 @@ int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata) static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn) { struct ctdb_tcp_connection *tcpp; - struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, conn->daddr); + struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr); if (vnn == NULL) { - DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n", inet_ntoa(conn->daddr.sin_addr))); + DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n", + ctdb_addr_to_str(&conn->dst_addr))); return; } @@ -1137,8 +1138,8 @@ static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tc */ if (vnn->tcp_array == NULL) { DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n", - inet_ntoa(conn->daddr.sin_addr), - ntohs(conn->daddr.sin_port))); + ctdb_addr_to_str(&conn->dst_addr), + ntohs(conn->dst_addr.ip.sin_port))); return; } @@ -1149,8 +1150,8 @@ static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tc tcpp = ctdb_tcp_find(vnn->tcp_array, conn); if (tcpp == NULL) { DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n", - inet_ntoa(conn->daddr.sin_addr), - ntohs(conn->daddr.sin_port))); + ctdb_addr_to_str(&conn->dst_addr), + ntohs(conn->dst_addr.ip.sin_port))); return; } @@ -1174,8 +1175,8 @@ static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tc vnn->tcp_update_needed = true; DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n", - inet_ntoa(conn->saddr.sin_addr), - ntohs(conn->saddr.sin_port))); + ctdb_addr_to_str(&conn->src_addr), + ntohs(conn->src_addr.ip.sin_port))); } @@ -1212,15 +1213,14 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb) struct ctdb_vnn *vnn; for (vnn=ctdb->vnn;vnn;vnn=vnn->next) { - if (!ctdb_sys_have_ip(vnn->public_address)) { + if (!ctdb_sys_have_ip(&vnn->public_address)) { continue; } ctdb_event_script(ctdb, "releaseip %s %s %u", vnn->iface, - inet_ntoa(vnn->public_address.sin_addr), + talloc_strdup(ctdb, ctdb_addr_to_str(&vnn->public_address)), vnn->public_netmask_bits); -// convert when vnn->public_address is no longer a sockaddr_in - release_kill_clients(ctdb, (ctdb_sock_addr *)&vnn->public_address); + release_kill_clients(ctdb, &vnn->public_address); } } @@ -1252,8 +1252,8 @@ int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, ips->num = num; i = 0; for (vnn=ctdb->vnn;vnn;vnn=vnn->next) { - ips->ips[i].pnn = vnn->pnn; - ips->ips[i].sin = vnn->public_address; + ips->ips[i].pnn = vnn->pnn; + ips->ips[i].addr = vnn->public_address; i++; } @@ -1279,8 +1279,8 @@ struct ctdb_kill_tcp { a tcp connection that is to be killed */ struct ctdb_killtcp_con { - struct sockaddr_in src; - struct sockaddr_in dst; + ctdb_sock_addr src_addr; + ctdb_sock_addr dst_addr; int count; struct ctdb_kill_tcp *killtcp; }; @@ -1290,15 +1290,41 @@ struct ctdb_killtcp_con { this key is used to insert and lookup matching socketpairs that are to be tickled and RST */ -#define KILLTCP_KEYLEN 4 -static uint32_t *killtcp_key(struct sockaddr_in *src, struct sockaddr_in *dst) +#define KILLTCP_KEYLEN 10 +static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst) { static uint32_t key[KILLTCP_KEYLEN]; - key[0] = dst->sin_addr.s_addr; - key[1] = src->sin_addr.s_addr; - key[2] = dst->sin_port; - key[3] = src->sin_port; + bzero(key, sizeof(key)); + + if (src->sa.sa_family != dst->sa.sa_family) { + DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family)); + return key; + } + + switch (src->sa.sa_family) { + case AF_INET: + key[0] = dst->ip.sin_addr.s_addr; + key[1] = src->ip.sin_addr.s_addr; + key[2] = dst->ip.sin_port; + key[3] = src->ip.sin_port; + break; + case AF_INET6: + key[0] = dst->ip6.sin6_addr.s6_addr32[3]; + key[1] = src->ip6.sin6_addr.s6_addr32[3]; + key[2] = dst->ip6.sin6_addr.s6_addr32[2]; + key[3] = src->ip6.sin6_addr.s6_addr32[2]; + key[4] = dst->ip6.sin6_addr.s6_addr32[1]; + key[5] = src->ip6.sin6_addr.s6_addr32[1]; + key[6] = dst->ip6.sin6_addr.s6_addr32[0]; + key[7] = src->ip6.sin6_addr.s6_addr32[0]; + key[8] = dst->ip6.sin6_port; + key[9] = src->ip6.sin6_port; + break; + default: + DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family)); + return key; + } return key; } @@ -1311,7 +1337,7 @@ static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde, { struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp); struct ctdb_killtcp_con *con; - struct sockaddr_in src, dst; + ctdb_sock_addr src, dst; uint32_t ack_seq, seq; if (!(flags & EVENT_FD_READ)) { @@ -1339,12 +1365,12 @@ static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde, /* This one has been tickled ! now reset him and remove him from the list. */ - DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n", ntohs(con->dst.sin_port), inet_ntoa(con->src.sin_addr), ntohs(con->src.sin_port))); + DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n", + ntohs(con->dst_addr.ip.sin_port), + ctdb_addr_to_str(&con->src_addr), + ntohs(con->src_addr.ip.sin_port))); - ctdb_sys_send_tcp( - (ctdb_sock_addr *)&con->dst, - (ctdb_sock_addr *)&con->src, - ack_seq, seq, 1); + ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1); talloc_free(con); } @@ -1367,8 +1393,8 @@ static void tickle_connection_traverse(void *param, void *data) /* othervise, try tickling it again */ con->count++; ctdb_sys_send_tcp( - (ctdb_sock_addr *)&con->dst, - (ctdb_sock_addr *)&con->src, + (ctdb_sock_addr *)&con->dst_addr, + (ctdb_sock_addr *)&con->src_addr, 0, 0, 0); } @@ -1427,20 +1453,21 @@ static void *add_killtcp_callback(void *parm, void *data) add a tcp socket to the list of connections we want to RST */ static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, - struct sockaddr_in *src, struct sockaddr_in *dst) + ctdb_sock_addr *src, + ctdb_sock_addr *dst) { struct ctdb_kill_tcp *killtcp; struct ctdb_killtcp_con *con; struct ctdb_vnn *vnn; - vnn = find_public_ip_vnn(ctdb, *dst); + vnn = find_public_ip_vnn(ctdb, dst); if (vnn == NULL) { - vnn = find_public_ip_vnn(ctdb, *src); + vnn = find_public_ip_vnn(ctdb, src); } if (vnn == NULL) { /* if it is not a public ip it could be our 'single ip' */ if (ctdb->single_ip_vnn) { - if (ctdb_same_ipv4(&ctdb->single_ip_vnn->public_address, dst)) { + if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, dst)) { vnn = ctdb->single_ip_vnn; } } @@ -1475,14 +1502,14 @@ static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, */ con = talloc(killtcp, struct ctdb_killtcp_con); CTDB_NO_MEMORY(ctdb, con); - con->src = *src; - con->dst = *dst; - con->count = 0; - con->killtcp = killtcp; + con->src_addr = *src; + con->dst_addr = *dst; + con->count = 0; + con->killtcp = killtcp; trbt_insertarray32_callback(killtcp->connections, - KILLTCP_KEYLEN, killtcp_key(&con->dst, &con->src), + KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr), add_killtcp_callback, con); /* @@ -1511,8 +1538,8 @@ static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, /* tickle him once now */ ctdb_sys_send_tcp( - (ctdb_sock_addr *)&con->dst, - (ctdb_sock_addr *)&con->src, + &con->dst_addr, + &con->src_addr, 0, 0, 0); return 0; @@ -1530,7 +1557,7 @@ int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata) { struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr; - return ctdb_killtcp_add_connection(ctdb, &killtcp->src, &killtcp->dst); + return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr); } /* @@ -1563,10 +1590,11 @@ int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA ind return -1; } - vnn = find_public_ip_vnn(ctdb, list->ip); + vnn = find_public_ip_vnn(ctdb, &list->addr); if (vnn == NULL) { DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n", - inet_ntoa(list->ip.sin_addr))); + ctdb_addr_to_str(&list->addr))); + return 1; } @@ -1597,16 +1625,17 @@ int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA ind */ int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata) { - struct sockaddr_in *ip = (struct sockaddr_in *)indata.dptr; + ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr; struct ctdb_control_tcp_tickle_list *list; struct ctdb_tcp_array *tcparray; int num; struct ctdb_vnn *vnn; - vnn = find_public_ip_vnn(ctdb, *ip); + vnn = find_public_ip_vnn(ctdb, addr); if (vnn == NULL) { DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n", - inet_ntoa(ip->sin_addr))); + ctdb_addr_to_str(addr))); + return 1; } @@ -1625,7 +1654,7 @@ int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA ind CTDB_NO_MEMORY(ctdb, outdata->dptr); list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr; - list->ip = *ip; + list->addr = *addr; list->tickles.num = num; if (num) { memcpy(&list->tickles.connections[0], tcparray->connections, @@ -1641,7 +1670,7 @@ int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA ind */ static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, - struct sockaddr_in *ip, + ctdb_sock_addr *addr, struct ctdb_tcp_array *tcparray) { int ret, num; @@ -1661,7 +1690,7 @@ static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, CTDB_NO_MEMORY(ctdb, data.dptr); list = (struct ctdb_control_tcp_tickle_list *)data.dptr; - list->ip = *ip; + list->addr = *addr; list->tickles.num = num; if (tcparray) { memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num); @@ -1709,8 +1738,8 @@ static void ctdb_update_tcp_tickles(struct event_context *ev, &vnn->public_address, vnn->tcp_array); if (ret != 0) { - DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n", - inet_ntoa(vnn->public_address.sin_addr))); + DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n", + ctdb_addr_to_str(&vnn->public_address))); } } @@ -1833,7 +1862,7 @@ int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA inda return -1; } - return ctdb_add_public_address(ctdb, pub->sin, pub->mask, &pub->iface[0]); + return ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]); } /* @@ -1869,7 +1898,7 @@ int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA inda /* walk over all public addresses until we find a match */ for (vnn=ctdb->vnn;vnn;vnn=vnn->next) { - if (ctdb_same_ipv4(&vnn->public_address, &pub->sin)) { + if (ctdb_same_ip(&vnn->public_address, &pub->addr)) { TALLOC_CTX *mem_ctx = talloc_new(ctdb); DLIST_REMOVE(ctdb->vnn, vnn); @@ -1879,7 +1908,7 @@ int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA inda mem_ctx, delete_ip_callback, mem_ctx, "releaseip %s %s %u", vnn->iface, - inet_ntoa(vnn->public_address.sin_addr), + talloc_strdup(mem_ctx, ctdb_addr_to_str(&vnn->public_address)), vnn->public_netmask_bits); talloc_free(vnn); if (ret != 0) { diff --git a/ctdb/server/ctdbd.c b/ctdb/server/ctdbd.c index b7979049c19..4dc0f74b209 100644 --- a/ctdb/server/ctdbd.c +++ b/ctdb/server/ctdbd.c @@ -271,13 +271,11 @@ int main(int argc, const char *argv[]) svnn->iface = talloc_strdup(svnn, options.public_interface); CTDB_NO_MEMORY(ctdb, svnn->iface); - if (inet_aton(options.single_public_ip, - &svnn->public_address.sin_addr) == 0) { + if (parse_ip(options.single_public_ip, + &svnn->public_address) == 0) { DEBUG(DEBUG_ALERT,("Invalid --single-public-ip argument : %s . This is not a valid ip address. Exiting.\n", options.single_public_ip)); exit(10); } - svnn->public_address.sin_family = AF_INET; - svnn->public_address.sin_port = 0; } if (options.public_address_list) { diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c index 906a665d044..0e892affe91 100644 --- a/ctdb/tcp/tcp_connect.c +++ b/ctdb/tcp/tcp_connect.c @@ -104,16 +104,11 @@ static void ctdb_node_connect_write(struct event_context *ev, struct fd_event *f static int ctdb_tcp_get_address(struct ctdb_context *ctdb, - const char *address, struct in_addr *addr) + const char *address, ctdb_sock_addr *addr) { - if (inet_pton(AF_INET, address, addr) <= 0) { - struct hostent *he = gethostbyname(address); - if (he == NULL || he->h_length > sizeof(*addr)) { - ctdb_set_error(ctdb, "invalid nework address '%s'\n", - address); - return -1; - } - memcpy(addr, he->h_addr, he->h_length); + if (parse_ip(address, addr) == 0) { + DEBUG(DEBUG_CRIT, (__location__ " Unparsable address : %s.\n", address)); + return -1; } return 0; } @@ -129,26 +124,34 @@ void ctdb_tcp_node_connect(struct event_context *ev, struct timed_event *te, struct ctdb_tcp_node *tnode = talloc_get_type(node->private_data, struct ctdb_tcp_node); struct ctdb_context *ctdb = node->ctdb; - struct sockaddr_in sock_in; - struct sockaddr_in sock_out; + ctdb_sock_addr sock_in; + ctdb_sock_addr sock_out; ctdb_tcp_stop_connection(node); - tnode->fd = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); - - set_nonblocking(tnode->fd); - set_close_on_exec(tnode->fd); - ZERO_STRUCT(sock_out); #ifdef HAVE_SOCK_SIN_LEN - sock_out.sin_len = sizeof(sock_out); + sock_out.ip.sin_len = sizeof(sock_out); #endif - if (ctdb_tcp_get_address(ctdb, node->address.address, &sock_out.sin_addr) != 0) { + if (ctdb_tcp_get_address(ctdb, node->address.address, &sock_out) != 0) { + return; + } + switch (sock_out.sa.sa_family) { + case AF_INET: + sock_out.ip.sin_port = htons(node->address.port); + break; + case AF_INET6: + sock_out.ip6.sin6_port = htons(node->address.port); + break; + default: + DEBUG(DEBUG_ERR, (__location__ " unknown family %u\n", + sock_out.sa.sa_family)); return; } - sock_out.sin_port = htons(node->address.port); - sock_out.sin_family = PF_INET; + tnode->fd = socket(sock_out.sa.sa_family, SOCK_STREAM, IPPROTO_TCP); + set_nonblocking(tnode->fd); + set_close_on_exec(tnode->fd); /* Bind our side of the socketpair to the same address we use to listen * on incoming CTDB traffic. @@ -158,13 +161,11 @@ void ctdb_tcp_node_connect(struct event_context *ev, struct timed_event *te, */ ZERO_STRUCT(sock_in); #ifdef HAVE_SOCK_SIN_LEN - sock_in.sin_len = sizeof(sock_in); + sock_in.ip.sin_len = sizeof(sock_in); #endif - if (ctdb_tcp_get_address(ctdb, ctdb->address.address, &sock_in.sin_addr) != 0) { + if (ctdb_tcp_get_address(ctdb, ctdb->address.address, &sock_in) != 0) { return; } - sock_in.sin_port = htons(0); /* INPORT_ANY is not always available */ - sock_in.sin_family = PF_INET; bind(tnode->fd, (struct sockaddr *)&sock_in, sizeof(sock_in)); if (connect(tnode->fd, (struct sockaddr *)&sock_out, sizeof(sock_out)) != 0 && @@ -198,7 +199,7 @@ static void ctdb_listen_event(struct event_context *ev, struct fd_event *fde, { struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context); struct ctdb_tcp *ctcp = talloc_get_type(ctdb->private_data, struct ctdb_tcp); - struct sockaddr_in addr; + ctdb_sock_addr addr; socklen_t len; int fd, nodeid; struct ctdb_incoming *in; @@ -210,7 +211,7 @@ static void ctdb_listen_event(struct event_context *ev, struct fd_event *fde, fd = accept(ctcp->listen_fd, (struct sockaddr *)&addr, &len); if (fd == -1) return; - incoming_node = inet_ntoa(addr.sin_addr); + incoming_node = ctdb_addr_to_str(&addr); nodeid = ctdb_ip_to_nodeid(ctdb, incoming_node); if (nodeid == -1) { @@ -240,10 +241,11 @@ static int ctdb_tcp_listen_automatic(struct ctdb_context *ctdb) { struct ctdb_tcp *ctcp = talloc_get_type(ctdb->private_data, struct ctdb_tcp); - struct sockaddr_in sock; + ctdb_sock_addr sock; int lock_fd, i; const char *lock_path = "/tmp/.ctdb_socket_lock"; struct flock lock; + int one = 1; /* in order to ensure that we don't get two nodes with the same adddress, we must make the bind() and listen() calls @@ -279,16 +281,37 @@ static int ctdb_tcp_listen_automatic(struct ctdb_context *ctdb) ZERO_STRUCT(sock); #ifdef HAVE_SOCK_SIN_LEN - sock.sin_len = sizeof(sock); + sock.ip.sin_len = sizeof(sock); #endif - sock.sin_port = htons(ctdb->nodes[i]->address.port); - sock.sin_family = PF_INET; if (ctdb_tcp_get_address(ctdb, ctdb->nodes[i]->address.address, - &sock.sin_addr) != 0) { + &sock) != 0) { continue; } + switch (sock.sa.sa_family) { + case AF_INET: + sock.ip.sin_port = htons(ctdb->nodes[i]->address.port); + break; + case AF_INET6: + sock.ip6.sin6_port = htons(ctdb->nodes[i]->address.port); + break; + default: + DEBUG(DEBUG_ERR, (__location__ " unknown family %u\n", + sock.sa.sa_family)); + continue; + } + + ctcp->listen_fd = socket(sock.sa.sa_family, SOCK_STREAM, IPPROTO_TCP); + if (ctcp->listen_fd == -1) { + ctdb_set_error(ctdb, "socket failed\n"); + continue; + } + + set_close_on_exec(ctcp->listen_fd); + + setsockopt(ctcp->listen_fd,SOL_SOCKET,SO_REUSEADDR,(char *)&one,sizeof(one)); + if (bind(ctcp->listen_fd, (struct sockaddr * )&sock, sizeof(sock)) == 0) { break; @@ -341,19 +364,9 @@ int ctdb_tcp_listen(struct ctdb_context *ctdb) { struct ctdb_tcp *ctcp = talloc_get_type(ctdb->private_data, struct ctdb_tcp); - struct sockaddr_in sock; + ctdb_sock_addr sock; int one = 1; - ctcp->listen_fd = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); - if (ctcp->listen_fd == -1) { - ctdb_set_error(ctdb, "socket failed\n"); - return -1; - } - - set_close_on_exec(ctcp->listen_fd); - - setsockopt(ctcp->listen_fd,SOL_SOCKET,SO_REUSEADDR,(char *)&one,sizeof(one)); - /* we can either auto-bind to the first available address, or we can use a specified address */ if (!ctdb->address.address) { @@ -362,16 +375,36 @@ int ctdb_tcp_listen(struct ctdb_context *ctdb) ZERO_STRUCT(sock); #ifdef HAVE_SOCK_SIN_LEN - sock.sin_len = sizeof(sock); + sock.ip.sin_len = sizeof(sock); #endif - sock.sin_port = htons(ctdb->address.port); - sock.sin_family = PF_INET; - if (ctdb_tcp_get_address(ctdb, ctdb->address.address, - &sock.sin_addr) != 0) { + &sock) != 0) { goto failed; } + switch (sock.sa.sa_family) { + case AF_INET: + sock.ip.sin_port = htons(ctdb->address.port); + break; + case AF_INET6: + sock.ip6.sin6_port = htons(ctdb->address.port); + break; + default: + DEBUG(DEBUG_ERR, (__location__ " unknown family %u\n", + sock.sa.sa_family)); + goto failed; + } + + ctcp->listen_fd = socket(sock.sa.sa_family, SOCK_STREAM, IPPROTO_TCP); + if (ctcp->listen_fd == -1) { + ctdb_set_error(ctdb, "socket failed\n"); + return -1; + } + + set_close_on_exec(ctcp->listen_fd); + + setsockopt(ctcp->listen_fd,SOL_SOCKET,SO_REUSEADDR,(char *)&one,sizeof(one)); + if (bind(ctcp->listen_fd, (struct sockaddr * )&sock, sizeof(sock)) != 0) { goto failed; } @@ -386,7 +419,9 @@ int ctdb_tcp_listen(struct ctdb_context *ctdb) return 0; failed: - close(ctcp->listen_fd); + if (ctcp->listen_fd != -1) { + close(ctcp->listen_fd); + } ctcp->listen_fd = -1; return -1; } diff --git a/ctdb/tests/nodes.txt b/ctdb/tests/nodes.txt index 99b07328b38..2563adcc702 100644 --- a/ctdb/tests/nodes.txt +++ b/ctdb/tests/nodes.txt @@ -1,4 +1,4 @@ -127.0.0.1 -127.0.0.2 -127.0.0.3 -127.0.0.4 +::1 +::2 +::3 +::4 diff --git a/ctdb/tests/nodes6.txt b/ctdb/tests/nodes6.txt new file mode 100644 index 00000000000..7e1d085fdf0 --- /dev/null +++ b/ctdb/tests/nodes6.txt @@ -0,0 +1,11 @@ +::1 +::2 +::3 +::4 + +::2 +::3 +::4 +::2 +::3 +::4 diff --git a/ctdb/tests/start_daemons.sh b/ctdb/tests/start_daemons.sh index afcf23e0095..cf6b738f4a1 100755 --- a/ctdb/tests/start_daemons.sh +++ b/ctdb/tests/start_daemons.sh @@ -4,12 +4,17 @@ NUMNODES=2 if [ $# -gt 0 ]; then NUMNODES=$1 fi -NODES="./tests/nodes.txt" shift +NODES="./tests/nodes.txt" rm -f $NODES for i in `seq 1 $NUMNODES`; do - echo 127.0.0.$i >> $NODES + if [ "${CTDB_USE_IPV6}x" != "x" ]; then + echo ::$i >> $NODES + ip addr add ::$i/128 dev lo + else + echo 127.0.0.$i >> $NODES + fi done killall -q ctdbd diff --git a/ctdb/tools/ctdb.c b/ctdb/tools/ctdb.c index eed92d3773d..90af2a8e1bd 100644 --- a/ctdb/tools/ctdb.c +++ b/ctdb/tools/ctdb.c @@ -332,7 +332,7 @@ static int control_status(struct ctdb_context *ctdb, int argc, const char **argv printf(":Node:IP:Disconnected:Banned:Disabled:Unhealthy:\n"); for(i=0;inum;i++){ printf(":%d:%s:%d:%d:%d:%d:\n", nodemap->nodes[i].pnn, - inet_ntoa(nodemap->nodes[i].sin.sin_addr), + ctdb_addr_to_str(&nodemap->nodes[i].addr), !!(nodemap->nodes[i].flags&NODE_FLAGS_DISCONNECTED), !!(nodemap->nodes[i].flags&NODE_FLAGS_BANNED), !!(nodemap->nodes[i].flags&NODE_FLAGS_PERMANENTLY_DISABLED), @@ -370,7 +370,7 @@ static int control_status(struct ctdb_context *ctdb, int argc, const char **argv CTDB_NO_MEMORY_FATAL(ctdb, flags_str); } printf("pnn:%d %-16s %s%s\n", nodemap->nodes[i].pnn, - inet_ntoa(nodemap->nodes[i].sin.sin_addr), + ctdb_addr_to_str(&nodemap->nodes[i].addr), flags_str, nodemap->nodes[i].pnn == mypnn?" (THIS NODE)":""); talloc_free(flags_str); @@ -414,30 +414,29 @@ static int control_status(struct ctdb_context *ctdb, int argc, const char **argv static int control_get_tickles(struct ctdb_context *ctdb, int argc, const char **argv) { struct ctdb_control_tcp_tickle_list *list; - struct sockaddr_in ip; + ctdb_sock_addr addr; int i, ret; if (argc < 1) { usage(); } - ip.sin_family = AF_INET; - if (inet_aton(argv[0], &ip.sin_addr) == 0) { + if (parse_ip(argv[0], &addr) == 0) { DEBUG(DEBUG_ERR,("Wrongly formed ip address '%s'\n", argv[0])); return -1; } - ret = ctdb_ctrl_get_tcp_tickles(ctdb, TIMELIMIT(), options.pnn, ctdb, &ip, &list); + ret = ctdb_ctrl_get_tcp_tickles(ctdb, TIMELIMIT(), options.pnn, ctdb, &addr, &list); if (ret == -1) { DEBUG(DEBUG_ERR, ("Unable to list tickles\n")); return -1; } - printf("Tickles for ip:%s\n", inet_ntoa(list->ip.sin_addr)); + printf("Tickles for ip:%s\n", ctdb_addr_to_str(&list->addr)); printf("Num tickles:%u\n", list->tickles.num); for (i=0;itickles.num;i++) { - printf("SRC: %s:%u ", inet_ntoa(list->tickles.connections[i].saddr.sin_addr), ntohs(list->tickles.connections[i].saddr.sin_port)); - printf("DST: %s:%u\n", inet_ntoa(list->tickles.connections[i].daddr.sin_addr), ntohs(list->tickles.connections[i].daddr.sin_port)); + printf("SRC: %s:%u ", ctdb_addr_to_str(&list->tickles.connections[i].src_addr), ntohs(list->tickles.connections[i].src_addr.ip.sin_port)); + printf("DST: %s:%u\n", ctdb_addr_to_str(&list->tickles.connections[i].dst_addr), ntohs(list->tickles.connections[i].dst_addr.ip.sin_port)); } talloc_free(list); @@ -447,7 +446,7 @@ static int control_get_tickles(struct ctdb_context *ctdb, int argc, const char * /* send a release ip to all nodes */ static int control_send_release(struct ctdb_context *ctdb, uint32_t pnn, -struct sockaddr_in *sin) +ctdb_sock_addr *addr) { int ret; struct ctdb_public_ip pip; @@ -461,11 +460,10 @@ struct sockaddr_in *sin) } /* send a moveip message to the recovery master */ - pip.pnn = pnn; - pip.sin.sin_family = AF_INET; - pip.sin.sin_addr = sin->sin_addr; + pip.pnn = pnn; + pip.addr = *addr; data.dsize = sizeof(pip); - data.dptr = (unsigned char *)&pip; + data.dptr = (unsigned char *)&pip; /* send release ip to all nodes */ @@ -486,7 +484,7 @@ struct sockaddr_in *sin) static int control_moveip(struct ctdb_context *ctdb, int argc, const char **argv) { uint32_t pnn; - struct sockaddr_in ip; + ctdb_sock_addr addr; uint32_t value; struct ctdb_all_public_ips *ips; int i, ret; @@ -495,8 +493,7 @@ static int control_moveip(struct ctdb_context *ctdb, int argc, const char **argv usage(); } - ip.sin_family = AF_INET; - if (inet_aton(argv[0], &ip.sin_addr) == 0) { + if (parse_ip(argv[0], &addr) == 0) { DEBUG(DEBUG_ERR,("Wrongly formed ip address '%s'\n", argv[0])); return -1; } @@ -535,22 +532,22 @@ static int control_moveip(struct ctdb_context *ctdb, int argc, const char **argv } for (i=0;inum;i++) { - if (ctdb_same_ipv4(&ip, &ips->ips[i].sin)) { + if (ctdb_same_ip(&addr, &ips->ips[i].addr)) { break; } } if (i==ips->num) { DEBUG(DEBUG_ERR, ("Node %u can not host ip address '%s'\n", - pnn, inet_ntoa(ip.sin_addr))); + pnn, ctdb_addr_to_str(&addr))); return -1; } if (ips->ips[i].pnn == pnn) { DEBUG(DEBUG_ERR, ("Host %u is already hosting '%s'\n", - pnn, inet_ntoa(ips->ips[i].sin.sin_addr))); + pnn, ctdb_addr_to_str(&ips->ips[i].addr))); return -1; } - ret = control_send_release(ctdb, pnn, &ips->ips[i].sin); + ret = control_send_release(ctdb, pnn, &ips->ips[i].addr); if (ret != 0) { DEBUG(DEBUG_ERR, ("Failed to send 'change ip' to all nodes\n"));; return -1; @@ -559,20 +556,15 @@ static int control_moveip(struct ctdb_context *ctdb, int argc, const char **argv return 0; } -struct node_ip { - uint32_t pnn; - struct sockaddr_in sin; -}; - void getips_store_callback(void *param, void *data) { - struct node_ip *node_ip = (struct node_ip *)data; + struct ctdb_public_ip *node_ip = (struct ctdb_public_ip *)data; struct ctdb_all_public_ips *ips = param; int i; i = ips->num++; - ips->ips[i].pnn = node_ip->pnn; - ips->ips[i].sin = node_ip->sin; + ips->ips[i].pnn = node_ip->pnn; + ips->ips[i].addr = node_ip->addr; } void getips_count_callback(void *param, void *data) @@ -612,13 +604,13 @@ control_get_all_public_ips(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx, struc } for (j=0; jnum;j++) { - struct node_ip *node_ip; + struct ctdb_public_ip *node_ip; - node_ip = talloc(tmp_ctx, struct node_ip); - node_ip->pnn = tmp_ips->ips[j].pnn; - node_ip->sin = tmp_ips->ips[j].sin; + node_ip = talloc(tmp_ctx, struct ctdb_public_ip); + node_ip->pnn = tmp_ips->ips[j].pnn; + node_ip->addr = tmp_ips->ips[j].addr; - trbt_insert32(tree, tmp_ips->ips[j].sin.sin_addr.s_addr, node_ip); + trbt_insert32(tree, tmp_ips->ips[j].addr.ip.sin_addr.s_addr, node_ip); } talloc_free(tmp_ips); } @@ -643,7 +635,7 @@ control_get_all_public_ips(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx, struc * ip address or -1 */ static int -find_other_host_for_public_ip(struct ctdb_context *ctdb, struct sockaddr_in *addr) +find_other_host_for_public_ip(struct ctdb_context *ctdb, ctdb_sock_addr *addr) { TALLOC_CTX *tmp_ctx = talloc_new(ctdb); struct ctdb_all_public_ips *ips; @@ -673,7 +665,7 @@ find_other_host_for_public_ip(struct ctdb_context *ctdb, struct sockaddr_in *add } for (j=0;jnum;j++) { - if (ctdb_same_ipv4(addr, &ips->ips[j].sin)) { + if (ctdb_same_ip(addr, &ips->ips[j].addr)) { talloc_free(tmp_ctx); return nodemap->nodes[i].pnn; } @@ -693,7 +685,7 @@ static int control_addip(struct ctdb_context *ctdb, int argc, const char **argv) int i, ret; int len; unsigned mask; - struct sockaddr_in addr; + ctdb_sock_addr addr; struct ctdb_control_ip_iface *pub; TALLOC_CTX *tmp_ctx = talloc_new(ctdb); struct ctdb_all_public_ips *ips; @@ -721,7 +713,7 @@ static int control_addip(struct ctdb_context *ctdb, int argc, const char **argv) pub = talloc_size(tmp_ctx, len); CTDB_NO_MEMORY(ctdb, pub); - pub->sin = addr; + pub->addr = addr; pub->mask = mask; pub->len = strlen(argv[1])+1; memcpy(&pub->iface[0], argv[1], strlen(argv[1])+1); @@ -738,7 +730,7 @@ static int control_addip(struct ctdb_context *ctdb, int argc, const char **argv) * we will claim it */ for (i=0;inum;i++) { - if (ctdb_same_ipv4(&addr, &ips->ips[i].sin)) { + if (ctdb_same_ip(&addr, &ips->ips[i].addr)) { break; } } @@ -764,7 +756,7 @@ static int control_addip(struct ctdb_context *ctdb, int argc, const char **argv) static int control_delip(struct ctdb_context *ctdb, int argc, const char **argv) { int i, ret; - struct sockaddr_in addr; + ctdb_sock_addr addr; struct ctdb_control_ip_iface pub; TALLOC_CTX *tmp_ctx = talloc_new(ctdb); struct ctdb_all_public_ips *ips; @@ -774,13 +766,12 @@ static int control_delip(struct ctdb_context *ctdb, int argc, const char **argv) usage(); } - addr.sin_family = AF_INET; - if (inet_aton(argv[0], &addr.sin_addr) == 0) { + if (parse_ip(argv[0], &addr) == 0) { DEBUG(DEBUG_ERR,("Wrongly formed ip address '%s'\n", argv[0])); return -1; } - pub.sin = addr; + pub.addr = addr; pub.mask = 0; pub.len = 0; @@ -792,14 +783,14 @@ static int control_delip(struct ctdb_context *ctdb, int argc, const char **argv) } for (i=0;inum;i++) { - if (ctdb_same_ipv4(&addr, &ips->ips[i].sin)) { + if (ctdb_same_ip(&addr, &ips->ips[i].addr)) { break; } } if (i==ips->num) { DEBUG(DEBUG_ERR, ("This node does not support this public address '%s'\n", - inet_ntoa(addr.sin_addr))); + ctdb_addr_to_str(&addr))); talloc_free(tmp_ctx); return -1; } @@ -837,12 +828,12 @@ static int kill_tcp(struct ctdb_context *ctdb, int argc, const char **argv) usage(); } - if (!parse_ip_port(argv[0], (ctdb_sock_addr *)&killtcp.src)) { + if (!parse_ip_port(argv[0], &killtcp.src_addr)) { DEBUG(DEBUG_ERR, ("Bad IP:port '%s'\n", argv[0])); return -1; } - if (!parse_ip_port(argv[1], (ctdb_sock_addr *)&killtcp.dst)) { + if (!parse_ip_port(argv[1], &killtcp.dst_addr)) { DEBUG(DEBUG_ERR, ("Bad IP:port '%s'\n", argv[1])); return -1; } @@ -1052,9 +1043,9 @@ static int control_ip(struct ctdb_context *ctdb, int argc, const char **argv) for (i=1;i<=ips->num;i++) { if (options.machinereadable){ - printf(":%s:%d:\n", inet_ntoa(ips->ips[ips->num-i].sin.sin_addr), ips->ips[ips->num-i].pnn); + printf(":%s:%d:\n", ctdb_addr_to_str(&ips->ips[ips->num-i].addr), ips->ips[ips->num-i].pnn); } else { - printf("%s %d\n", inet_ntoa(ips->ips[ips->num-i].sin.sin_addr), ips->ips[ips->num-i].pnn); + printf("%s %d\n", ctdb_addr_to_str(&ips->ips[ips->num-i].addr), ips->ips[ips->num-i].pnn); } } @@ -1316,7 +1307,8 @@ static int control_lvs(struct ctdb_context *ctdb, int argc, const char **argv) } } - printf("%d:%s\n", i, inet_ntoa(nodemap->nodes[i].sin.sin_addr)); + printf("%d:%s\n", i, + ctdb_addr_to_str(&nodemap->nodes[i].addr)); } return 0; @@ -2194,7 +2186,7 @@ static int control_listnodes(struct ctdb_context *ctdb, int argc, const char **a } for(i=0;inum;i++){ - printf("%s\n", inet_ntoa(nodemap->nodes[i].sin.sin_addr)); + printf("%s\n", ctdb_addr_to_str(&nodemap->nodes[i].addr)); } return 0; diff --git a/ctdb/utils/ipmux/ipmux.c b/ctdb/utils/ipmux/ipmux.c index 539490e674d..598e9dcb6f2 100644 --- a/ctdb/utils/ipmux/ipmux.c +++ b/ctdb/utils/ipmux/ipmux.c @@ -34,7 +34,7 @@ struct ipmux_node { uint32_t pnn; - struct sockaddr_in sin; + ctdb_sock_addr addr; }; struct ipmux_node *ipmux_nodes; @@ -188,8 +188,8 @@ int main(int argc, const char *argv[]) if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) { continue; } - ipmux_nodes[num_nodes].pnn = i; - ipmux_nodes[num_nodes].sin = nodemap->nodes[i].sin; + ipmux_nodes[num_nodes].pnn = i; + ipmux_nodes[num_nodes].addr = nodemap->nodes[i].addr; num_nodes++; } @@ -251,7 +251,7 @@ int main(int argc, const char *argv[]) send the packet off and tell the kernel to not worry about this packet any more */ - ret = sendto(s, &ipqp->payload[0], ipqp->data_len, 0, &ipmux_nodes[hash].sin, sizeof(struct sockaddr_in)); + ret = sendto(s, &ipqp->payload[0], ipqp->data_len, 0, (struct sockaddr_in *)&ipmux_nodes[hash].addr, sizeof(ctdb_sock_addr)); ipq_set_verdict(ipqh, ipqp->packet_id, NF_DROP, 0, pktbuf); } From 342408d75e47775661486b4824076bcb49980869 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 19 Aug 2008 14:58:57 +1000 Subject: [PATCH 70/90] remove a file we dont need Signed-off-by: Ronnie Sahlberg (This used to be ctdb commit bf6effef0cc9e5f2eeeb38fce855a90624a76239) --- ctdb/tests/nodes6.txt | 11 ----------- 1 file changed, 11 deletions(-) delete mode 100644 ctdb/tests/nodes6.txt diff --git a/ctdb/tests/nodes6.txt b/ctdb/tests/nodes6.txt deleted file mode 100644 index 7e1d085fdf0..00000000000 --- a/ctdb/tests/nodes6.txt +++ /dev/null @@ -1,11 +0,0 @@ -::1 -::2 -::3 -::4 - -::2 -::3 -::4 -::2 -::3 -::4 From 37234887d9325b7d0a70c11c50bf5ddbad2eadb7 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 19 Aug 2008 18:24:08 +1000 Subject: [PATCH 71/90] fix the ipv6 checksum calculation for pseudoheader so that it actually works add support to send ipv6 "gratious arp" aka neighbor solicitation packets from ctdb Signed-off-by: Ronnie Sahlberg (This used to be ctdb commit 0a38ea11af9237501f2951fee698a59b46f8750d) --- ctdb/common/system_linux.c | 158 ++++++++++++++++++++++++++----------- ctdb/tests/nodes.txt | 3 - 2 files changed, 112 insertions(+), 49 deletions(-) diff --git a/ctdb/common/system_linux.c b/ctdb/common/system_linux.c index 760877fe3ad..c7e66eb9761 100644 --- a/ctdb/common/system_linux.c +++ b/ctdb/common/system_linux.c @@ -26,9 +26,58 @@ #include "lib/events/events.h" #include #include +#include #include +#ifndef ETHERTYPE_IP6 +#define ETHERTYPE_IP6 0x86dd +#endif + +/* + uint16 checksum for n bytes + */ +static uint32_t uint16_checksum(uint16_t *data, size_t n) +{ + uint32_t sum=0; + while (n>=2) { + sum += (uint32_t)ntohs(*data); + data++; + n -= 2; + } + if (n == 1) { + sum += (uint32_t)ntohs(*(uint8_t *)data); + } + return sum; +} + +/* + calculate the tcp checksum for tcp over ipv6 +*/ +static uint16_t tcp_checksum6(uint16_t *data, size_t n, struct ip6_hdr *ip6) +{ + uint32_t phdr[2]; + uint32_t sum = 0; + uint16_t sum2; + + sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_src, 16); + sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_dst, 16); + + phdr[0] = htonl(n); + phdr[1] = htonl(ip6->ip6_nxt); + sum += uint16_checksum((uint16_t *)phdr, 8); + + sum += uint16_checksum(data, n); + + sum = (sum & 0xFFFF) + (sum >> 16); + sum = (sum & 0xFFFF) + (sum >> 16); + sum2 = htons(sum); + sum2 = ~sum2; + if (sum2 == 0) { + return 0xFFFF; + } + return sum2; +} /* send gratuitous arp reply after we have taken over an ip address @@ -42,8 +91,10 @@ int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface) struct sockaddr sa; struct ether_header *eh; struct arphdr *ah; + struct ip6_hdr *ip6; + struct icmp6_hdr *icmp6; struct ifreq if_hwaddr; - unsigned char buffer[64]; /*minimum eth frame size */ + unsigned char buffer[78]; /* ipv6 neigh solicitation size */ char *ptr; ZERO_STRUCT(sa); @@ -129,10 +180,68 @@ int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface) return -1; } + close(s); + break; + case AF_INET6: + s = socket(AF_INET, SOCK_PACKET, htons(ETHERTYPE_IP6)); + if (s == -1){ + DEBUG(DEBUG_CRIT,(__location__ " failed to open raw socket\n")); + return -1; + } + + /* get the mac address */ + strcpy(if_hwaddr.ifr_name, iface); + ret = ioctl(s, SIOCGIFHWADDR, &if_hwaddr); + if ( ret < 0 ) { + close(s); + DEBUG(DEBUG_CRIT,(__location__ " ioctl failed\n")); + return -1; + } + if (ARPHRD_LOOPBACK == if_hwaddr.ifr_hwaddr.sa_family) { + DEBUG(DEBUG_DEBUG,("Ignoring loopback arp request\n")); + close(s); + return 0; + } + if (if_hwaddr.ifr_hwaddr.sa_family != AF_LOCAL) { + close(s); + errno = EINVAL; + DEBUG(DEBUG_CRIT,(__location__ " not an ethernet address family (0x%x)\n", + if_hwaddr.ifr_hwaddr.sa_family)); + return -1; + } + + memset(buffer, 0 , sizeof(buffer)); + eh = (struct ether_header *)buffer; + memset(eh->ether_dhost, 0xff, ETH_ALEN); + memcpy(eh->ether_shost, if_hwaddr.ifr_hwaddr.sa_data, ETH_ALEN); + eh->ether_type = htons(ETHERTYPE_IP6); + + ip6 = (struct ip6_hdr *)(eh+1); + ip6->ip6_vfc = 0x60; + ip6->ip6_plen = htons(24); + ip6->ip6_nxt = IPPROTO_ICMPV6; + ip6->ip6_hlim = 255; + ip6->ip6_dst = addr->ip6.sin6_addr; + + icmp6 = (struct icmp6_hdr *)(ip6+1); + icmp6->icmp6_type = ND_NEIGHBOR_SOLICIT; + icmp6->icmp6_code = 0; + memcpy(&icmp6->icmp6_data32[1], &addr->ip6.sin6_addr, 16); + + icmp6->icmp6_cksum = tcp_checksum6((uint16_t *)icmp6, ntohs(ip6->ip6_plen), ip6); + + strncpy(sa.sa_data, iface, sizeof(sa.sa_data)); + ret = sendto(s, buffer, 78, 0, &sa, sizeof(sa)); + if (ret < 0 ){ + close(s); + DEBUG(DEBUG_CRIT,(__location__ " failed sendto\n")); + return -1; + } + close(s); break; default: - DEBUG(DEBUG_CRIT,(__location__ " not an ipv4 address (family is %u)\n", addr->ip.sin_family)); + DEBUG(DEBUG_CRIT,(__location__ " not an ipv4/ipv6 address (family is %u)\n", addr->ip.sin_family)); return -1; } @@ -140,23 +249,6 @@ int ctdb_sys_send_arp(const ctdb_sock_addr *addr, const char *iface) } -/* - uint16 checksum for n bytes - */ -static uint32_t uint16_checksum(uint16_t *data, size_t n) -{ - uint32_t sum=0; - while (n>=2) { - sum += (uint32_t)ntohs(*data); - data++; - n -= 2; - } - if (n == 1) { - sum += (uint32_t)ntohs(*(uint8_t *)data); - } - return sum; -} - /* simple TCP checksum - assumes data is multiple of 2 bytes long */ @@ -179,29 +271,6 @@ static uint16_t tcp_checksum(uint16_t *data, size_t n, struct iphdr *ip) return sum2; } -/* - calculate the tcp checksum for tcp over ipv6 -*/ -static uint16_t tcp_checksum6(uint16_t *data, size_t n, struct ip6_hdr *ip6) -{ - uint32_t sum = uint16_checksum(data, n); - uint16_t sum2; - - sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_src, 16); - sum += uint16_checksum((uint16_t *)(void *)&ip6->ip6_dst, 16); - sum += ip6->ip6_plen; - sum += ip6->ip6_nxt; - - sum = (sum & 0xFFFF) + (sum >> 16); - sum = (sum & 0xFFFF) + (sum >> 16); - sum2 = htons(sum); - sum2 = ~sum2; - if (sum2 == 0) { - return 0xFFFF; - } - return sum2; -} - /* Send tcp segment from the specified IP/port to the specified destination IP/port. @@ -285,7 +354,7 @@ int ctdb_sys_send_tcp(const ctdb_sock_addr *dest, case AF_INET6: ZERO_STRUCT(ip6pkt); ip6pkt.ip6.ip6_vfc = 0x60; - ip6pkt.ip6.ip6_plen = 20; + ip6pkt.ip6.ip6_plen = htons(20); ip6pkt.ip6.ip6_nxt = IPPROTO_TCP; ip6pkt.ip6.ip6_hlim = 64; ip6pkt.ip6.ip6_src = src->ip6.sin6_addr; @@ -451,9 +520,6 @@ int ctdb_sys_read_tcp_packet(int s, void *private_data, *seq = tcp->seq; return 0; -#ifndef ETHERTYPE_IP6 -#define ETHERTYPE_IP6 0x86dd -#endif } else if (ntohs(eth->ether_type) == ETHERTYPE_IP6) { /* IP6 */ ip6 = (struct ip6_hdr *)(eth+1); diff --git a/ctdb/tests/nodes.txt b/ctdb/tests/nodes.txt index 2563adcc702..20e29b14832 100644 --- a/ctdb/tests/nodes.txt +++ b/ctdb/tests/nodes.txt @@ -1,4 +1 @@ ::1 -::2 -::3 -::4 From 8e17e75eac461805447e0927cccd7f290d68648f Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 20 Aug 2008 09:23:31 +1000 Subject: [PATCH 72/90] fix a bug in the tcp socketkiller for ipv6 (This used to be ctdb commit 83735951352a243da185031e4853e7e40c43a0fb) --- ctdb/common/system_linux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ctdb/common/system_linux.c b/ctdb/common/system_linux.c index c7e66eb9761..1bf49033306 100644 --- a/ctdb/common/system_linux.c +++ b/ctdb/common/system_linux.c @@ -538,7 +538,7 @@ int ctdb_sys_read_tcp_packet(int s, void *private_data, src->ip6.sin6_addr = ip6->ip6_src; dst->ip6.sin6_family = AF_INET6; - dst->ip6.sin6_port = tcp->source; + dst->ip6.sin6_port = tcp->dest; dst->ip6.sin6_addr = ip6->ip6_dst; *ack_seq = tcp->ack_seq; From 43536648c57a5f0da35ff8c60a12b25638662763 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 20 Aug 2008 09:47:00 +1000 Subject: [PATCH 73/90] update the socketkiller in the eventscripts to be able to handle ipv6 (This used to be ctdb commit 6da7b36b7ccc4ee9b809867ea32036f09a801bb3) --- ctdb/config/functions | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/ctdb/config/functions b/ctdb/config/functions index 5df121f7310..36f0e9e05e0 100644 --- a/ctdb/config/functions +++ b/ctdb/config/functions @@ -224,11 +224,12 @@ kill_tcp_connections() { _killcount=0 connfile="$CTDB_BASE/state/connections.$_IP" netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile + while read dest src; do - srcip=`echo $src | cut -d: -f1` - srcport=`echo $src | cut -d: -f2` - destip=`echo $dest | cut -d: -f1` - destport=`echo $dest | cut -d: -f2` + srcip=`echo $src | sed -e "s/:[^:]*$//"` + srcport=`echo $src | sed -e "s/^.*://"` + destip=`echo $dest | sed -e "s/:[^:]*$//"` + destport=`echo $dest | sed -e "s/^.*://"` echo "Killing TCP connection $srcip:$srcport $destip:$destport" ctdb killtcp $srcip:$srcport $destip:$destport >/dev/null 2>&1 || _failed=1 case $destport in @@ -240,8 +241,9 @@ kill_tcp_connections() { ;; esac _killcount=`expr $_killcount + 1` - done < $connfile + done < $connfile /bin/rm -f $connfile + [ $_failed = 0 ] || { echo "Failed to send killtcp control" return; @@ -352,3 +354,4 @@ startstop_nfslock() { [ -x $CTDB_BASE/rc.local ] && { . $CTDB_BASE/rc.local } + From da1c17bf46d266a57c5a37f752ebc6dd34c47c2b Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 20 Aug 2008 11:52:36 +1000 Subject: [PATCH 74/90] when we compare ip addresses in ctdb_same_ip we must first canonicalize the addresses so that we realize that 127.0.0.1:22 is really the same thing as ::ffff:127.0.0.1:22 Downgrade all AF_INET6 ::ffff:xxxx:xxxx sockaddresses into AF_INET ones (This used to be ctdb commit b0fe4c45fc5ba1ecf62ebb921092c8a34e28a2bd) --- ctdb/common/ctdb_util.c | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/ctdb/common/ctdb_util.c b/ctdb/common/ctdb_util.c index 20238363b60..53e5d87df89 100644 --- a/ctdb/common/ctdb_util.c +++ b/ctdb/common/ctdb_util.c @@ -502,21 +502,47 @@ bool parse_ip_mask(const char *str, ctdb_sock_addr *addr, unsigned *mask) return ret; } -bool ctdb_same_ip(const ctdb_sock_addr *ip1, const ctdb_sock_addr *ip2) +/* + This is used to canonicalize a ctdb_sock_addr structure. +*/ +static void canonicalize_ip(const ctdb_sock_addr *ip, ctdb_sock_addr *cip) { - if (ip1->sa.sa_family != ip2->sa.sa_family) { + char prefix[12] = { 0,0,0,0,0,0,0,0,0,0,0xff,0xff }; + + memcpy(cip, ip, sizeof (*cip)); + + if ( (ip->sa.sa_family == AF_INET6) + && !memcmp(&ip->ip6.sin6_addr, prefix, 12)) { + memset(cip, 0, sizeof(*cip)); +#ifdef HAVE_SOCK_SIN_LEN + cip->ip.sin_len = sizeof(*cip); +#endif + cip->ip.sin_family = AF_INET; + cip->ip.sin_port = ip->ip6.sin6_port; + memcpy(&cip->ip.sin_addr, &ip->ip6.sin6_addr.s6_addr32[3], 4); + } +} + +bool ctdb_same_ip(const ctdb_sock_addr *tip1, const ctdb_sock_addr *tip2) +{ + ctdb_sock_addr ip1, ip2; + + canonicalize_ip(tip1, &ip1); + canonicalize_ip(tip2, &ip2); + + if (ip1.sa.sa_family != ip2.sa.sa_family) { return false; } - switch (ip1->sa.sa_family) { + switch (ip1.sa.sa_family) { case AF_INET: - return ip1->ip.sin_addr.s_addr == ip2->ip.sin_addr.s_addr; + return ip1.ip.sin_addr.s_addr == ip2.ip.sin_addr.s_addr; case AF_INET6: - return !memcmp(&ip1->ip6.sin6_addr.s6_addr[0], - &ip2->ip6.sin6_addr.s6_addr[0], + return !memcmp(&ip1.ip6.sin6_addr.s6_addr[0], + &ip2.ip6.sin6_addr.s6_addr[0], 16); default: - DEBUG(DEBUG_ERR, (__location__ " CRITICAL Can not compare sockaddr structures of type %u\n", ip1->sa.sa_family)); + DEBUG(DEBUG_ERR, (__location__ " CRITICAL Can not compare sockaddr structures of type %u\n", ip1.sa.sa_family)); return false; } From 5193caec6da1a90a0cb1e52d751361c477f6f0c8 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 20 Aug 2008 11:58:27 +1000 Subject: [PATCH 75/90] make the function to canonicalize a sockaddr structure public (This used to be ctdb commit 1157d61a0bc557d8ffc453c518dfc48473492bfd) --- ctdb/common/ctdb_util.c | 6 +++--- ctdb/include/ctdb.h | 1 - ctdb/include/ctdb_private.h | 2 ++ 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/ctdb/common/ctdb_util.c b/ctdb/common/ctdb_util.c index 53e5d87df89..792ff3c79d2 100644 --- a/ctdb/common/ctdb_util.c +++ b/ctdb/common/ctdb_util.c @@ -505,7 +505,7 @@ bool parse_ip_mask(const char *str, ctdb_sock_addr *addr, unsigned *mask) /* This is used to canonicalize a ctdb_sock_addr structure. */ -static void canonicalize_ip(const ctdb_sock_addr *ip, ctdb_sock_addr *cip) +void ctdb_canonicalize_ip(const ctdb_sock_addr *ip, ctdb_sock_addr *cip) { char prefix[12] = { 0,0,0,0,0,0,0,0,0,0,0xff,0xff }; @@ -527,8 +527,8 @@ bool ctdb_same_ip(const ctdb_sock_addr *tip1, const ctdb_sock_addr *tip2) { ctdb_sock_addr ip1, ip2; - canonicalize_ip(tip1, &ip1); - canonicalize_ip(tip2, &ip2); + ctdb_canonicalize_ip(tip1, &ip1); + ctdb_canonicalize_ip(tip2, &ip2); if (ip1.sa.sa_family != ip2.sa.sa_family) { return false; diff --git a/ctdb/include/ctdb.h b/ctdb/include/ctdb.h index c6f257433b3..d43ab507070 100644 --- a/ctdb/include/ctdb.h +++ b/ctdb/include/ctdb.h @@ -566,5 +566,4 @@ int ctdb_transaction_store(struct ctdb_transaction_handle *h, TDB_DATA key, TDB_DATA data); int ctdb_transaction_commit(struct ctdb_transaction_handle *h); - #endif diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index f73f9ef963f..8834ec4d52c 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -1376,5 +1376,7 @@ int32_t ctdb_control_trans2_error(struct ctdb_context *ctdb, struct ctdb_req_control *c); char *ctdb_addr_to_str(ctdb_sock_addr *addr); +void ctdb_canonicalize_ip(const ctdb_sock_addr *ip, ctdb_sock_addr *cip); + #endif From eb23d7b6d4b5fc18e45342030d302e3f7f93b7ed Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 20 Aug 2008 12:02:54 +1000 Subject: [PATCH 76/90] we must canonicalize the sockaddr structures in killtcp so that we do the necessary downgrade if required (This used to be ctdb commit 2f8b33948e395228cbac3450c0c684e49069abf0) --- ctdb/server/ctdb_takeover.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index 54581909961..b33471ddaf2 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -1453,21 +1453,25 @@ static void *add_killtcp_callback(void *parm, void *data) add a tcp socket to the list of connections we want to RST */ static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, - ctdb_sock_addr *src, - ctdb_sock_addr *dst) + ctdb_sock_addr *s, + ctdb_sock_addr *d) { + ctdb_sock_addr src, dst; struct ctdb_kill_tcp *killtcp; struct ctdb_killtcp_con *con; struct ctdb_vnn *vnn; - vnn = find_public_ip_vnn(ctdb, dst); + ctdb_canonicalize_ip(s, &src); + ctdb_canonicalize_ip(d, &dst); + + vnn = find_public_ip_vnn(ctdb, &dst); if (vnn == NULL) { - vnn = find_public_ip_vnn(ctdb, src); + vnn = find_public_ip_vnn(ctdb, &src); } if (vnn == NULL) { /* if it is not a public ip it could be our 'single ip' */ if (ctdb->single_ip_vnn) { - if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, dst)) { + if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) { vnn = ctdb->single_ip_vnn; } } @@ -1502,8 +1506,8 @@ static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, */ con = talloc(killtcp, struct ctdb_killtcp_con); CTDB_NO_MEMORY(ctdb, con); - con->src_addr = *src; - con->dst_addr = *dst; + con->src_addr = src; + con->dst_addr = dst; con->count = 0; con->killtcp = killtcp; From 9ce657b044e15b77096addfa5b635ea3055d417e Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 20 Aug 2008 12:50:50 +1000 Subject: [PATCH 77/90] When we harvest all tcp connections to kill off after a takeip/releaseip event we must also harvest the ipv4 connections which may be presented in ::ff:xxxx:xxxx form by netstat (This used to be ctdb commit 293d12a40501320a21efaf592b8f20e8590a5197) --- ctdb/config/functions | 1 + 1 file changed, 1 insertion(+) diff --git a/ctdb/config/functions b/ctdb/config/functions index 36f0e9e05e0..2c744d0c068 100644 --- a/ctdb/config/functions +++ b/ctdb/config/functions @@ -224,6 +224,7 @@ kill_tcp_connections() { _killcount=0 connfile="$CTDB_BASE/state/connections.$_IP" netstat -tn |egrep "^tcp.*[[:space:]]+$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' > $connfile + netstat -tn |egrep "^tcp.*[[:space:]]+::ffff:$_IP:.*ESTABLISHED" | awk '{print $4" "$5}' >> $connfile while read dest src; do srcip=`echo $src | sed -e "s/:[^:]*$//"` From d83fc7e3894a5485b748d04df253d2ed42ba80a8 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Fri, 22 Aug 2008 09:09:08 +1000 Subject: [PATCH 78/90] when we collect all ip addresses and sort them for the "ctdb ip -n all" output we must look at more than just the first 4 bytes of the sockaddr address or ipv6 wont work (This used to be ctdb commit 4dfbfb4618433d9ed79ca1bdb1e2e51d96d4ee62) --- ctdb/tests/nodes.txt | 2 +- ctdb/tools/ctdb.c | 43 ++++++++++++++++++++++++++++++++++++++----- 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/ctdb/tests/nodes.txt b/ctdb/tests/nodes.txt index 20e29b14832..7b9ad531d26 100644 --- a/ctdb/tests/nodes.txt +++ b/ctdb/tests/nodes.txt @@ -1 +1 @@ -::1 +127.0.0.1 diff --git a/ctdb/tools/ctdb.c b/ctdb/tools/ctdb.c index 90af2a8e1bd..8aa934d3fc2 100644 --- a/ctdb/tools/ctdb.c +++ b/ctdb/tools/ctdb.c @@ -574,12 +574,42 @@ void getips_count_callback(void *param, void *data) (*count)++; } +#define IP_KEYLEN 4 +static uint32_t *ip_key(ctdb_sock_addr *ip) +{ + static uint32_t key[IP_KEYLEN]; + + bzero(key, sizeof(key)); + + switch (ip->sa.sa_family) { + case AF_INET: + key[0] = ip->ip.sin_addr.s_addr; + break; + case AF_INET6: + key[0] = ip->ip6.sin6_addr.s6_addr32[3]; + key[1] = ip->ip6.sin6_addr.s6_addr32[2]; + key[2] = ip->ip6.sin6_addr.s6_addr32[1]; + key[3] = ip->ip6.sin6_addr.s6_addr32[0]; + break; + default: + DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family)); + return key; + } + + return key; +} + +static void *add_ip_callback(void *parm, void *data) +{ + return parm; +} + static int control_get_all_public_ips(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx, struct ctdb_all_public_ips **ips) { struct ctdb_all_public_ips *tmp_ips; struct ctdb_node_map *nodemap=NULL; - trbt_tree_t *tree; + trbt_tree_t *ip_tree; int i, j, len, ret; uint32_t count; @@ -589,7 +619,7 @@ control_get_all_public_ips(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx, struc return ret; } - tree = trbt_create(tmp_ctx, 0); + ip_tree = trbt_create(tmp_ctx, 0); for(i=0;inum;i++){ if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) { @@ -610,19 +640,22 @@ control_get_all_public_ips(struct ctdb_context *ctdb, TALLOC_CTX *tmp_ctx, struc node_ip->pnn = tmp_ips->ips[j].pnn; node_ip->addr = tmp_ips->ips[j].addr; - trbt_insert32(tree, tmp_ips->ips[j].addr.ip.sin_addr.s_addr, node_ip); + trbt_insertarray32_callback(ip_tree, + IP_KEYLEN, ip_key(&tmp_ips->ips[j].addr), + add_ip_callback, + node_ip); } talloc_free(tmp_ips); } /* traverse */ count = 0; - trbt_traversearray32(tree, 1, getips_count_callback, &count); + trbt_traversearray32(ip_tree, IP_KEYLEN, getips_count_callback, &count); len = offsetof(struct ctdb_all_public_ips, ips) + count*sizeof(struct ctdb_public_ip); tmp_ips = talloc_zero_size(tmp_ctx, len); - trbt_traversearray32(tree, 1, getips_store_callback, tmp_ips); + trbt_traversearray32(ip_tree, IP_KEYLEN, getips_store_callback, tmp_ips); *ips = tmp_ips; From ddf2de215416dfe0253db0a21cc4e05ff838bc93 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Fri, 22 Aug 2008 09:25:47 +1000 Subject: [PATCH 79/90] Do not fail the takeip event if the "ip addr add ..." command failed. Let the event complete successfully. the local recovery daemon will check that we have the address and reissue takip othervise. There are several reasons why "ip addr add " can fail, one is a misconfiguration anothe ris that for ipv6 the stack is a lot more picky than for ipv4. for examplke this WILL fail in ipv6 if there is a duplicate ip address on the network. thus this check could cause rolling-recoveries which is why it has to go (This used to be ctdb commit 12bc85c90a640a72ff538c003eb81da9dd1f2e3f) --- ctdb/config/events.d/10.interface | 1 - 1 file changed, 1 deletion(-) diff --git a/ctdb/config/events.d/10.interface b/ctdb/config/events.d/10.interface index 15e55618d15..4d070777269 100755 --- a/ctdb/config/events.d/10.interface +++ b/ctdb/config/events.d/10.interface @@ -59,7 +59,6 @@ case $cmd in } /sbin/ip addr add $ip/$maskbits dev $iface || { echo "Failed to add $ip/$maskbits on dev $iface" - exit 1 } # cope with the script being killed while we have the interface blocked iptables -D INPUT -i $iface -d $ip -j DROP 2> /dev/null From c1be066611cafcab00389906d642564a62603142 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Mon, 25 Aug 2008 08:52:29 +1000 Subject: [PATCH 80/90] version 1.0.57 : initial ipv6 support (This used to be ctdb commit 7da0c65c8526d66d4f2a788bd646d39237befa54) --- ctdb/packaging/RPM/ctdb.spec | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ctdb/packaging/RPM/ctdb.spec b/ctdb/packaging/RPM/ctdb.spec index 93b9d3fbff3..e0abc7c519a 100644 --- a/ctdb/packaging/RPM/ctdb.spec +++ b/ctdb/packaging/RPM/ctdb.spec @@ -5,7 +5,7 @@ Vendor: Samba Team Packager: Samba Team Name: ctdb Version: 1.0 -Release: 56 +Release: 57 Epoch: 0 License: GNU GPL version 3 Group: System Environment/Daemons @@ -118,6 +118,8 @@ fi %{_includedir}/ctdb_private.h %changelog +* Mon Aug 25 2008 : Version 1.0.57 + - initial support for IPv6 * Mon Aug 11 2008 : Version 1.0.56 - fix a memory leak in the recovery daemon. * Mon Aug 11 2008 : Version 1.0.55 From 413b28bfddce5654779024dd378b14cc31f80957 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Mon, 25 Aug 2008 09:41:08 +1000 Subject: [PATCH 81/90] add a link to my webpage (This used to be ctdb commit 0709093af11e0735ed1e13b06ac073990631e692) --- ctdb/web/index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ctdb/web/index.html b/ctdb/web/index.html index 4008f46298b..5b6cd42e84e 100644 --- a/ctdb/web/index.html +++ b/ctdb/web/index.html @@ -129,7 +129,7 @@ projects that want to make their services cluster aware using CTDB.

Developers

From b99a88f0b354f49aa7be716a18e1665f8f3a8ca8 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Mon, 25 Aug 2008 10:03:16 +1000 Subject: [PATCH 82/90] Add a "reload" option to the initscript. (This used to be ctdb commit 2a8bf5e7dc7364a8280d96db0f9579d2582a8524) --- ctdb/config/ctdb.init | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ctdb/config/ctdb.init b/ctdb/config/ctdb.init index 72de84d0824..2fe1185e312 100755 --- a/ctdb/config/ctdb.init +++ b/ctdb/config/ctdb.init @@ -188,7 +188,7 @@ case "$1" in stop) stop ;; - restart) + restart|reload) restart ;; status) From 18b10d400dc9a18ed9a04c8d81fa5030a2bbdb54 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Mon, 25 Aug 2008 10:13:18 +1000 Subject: [PATCH 83/90] From Abhijith Das : Fixup the initscript sdo it passes rpm-lint (This used to be ctdb commit f84d0a9a8c7e9589e8833f21e1f977a0adab356b) --- ctdb/config/ctdb.init | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/ctdb/config/ctdb.init b/ctdb/config/ctdb.init index 2fe1185e312..eb56ef615db 100755 --- a/ctdb/config/ctdb.init +++ b/ctdb/config/ctdb.init @@ -1,21 +1,21 @@ #!/bin/sh # ############################## -# init info for redhat distros -# chkconfig: - 90 36 -# description: Starts and stops the clustered tdb daemon -# pidfile: /var/run/ctdbd/ctdbd.pid -############################## +# ctdb: Starts the clustered tdb daemon +# +# chkconfig: - 90 36 +# +# description: Starts and stops the clustered tdb daemon +# pidfile: /var/run/ctdbd/ctdbd.pid +# -############################## -# SLES/OpenSuSE init info ### BEGIN INIT INFO -# Provides: ctdb -# Required-Start: $network +# Provides: ctdb +# Required-Start: $network # Required-Stop: -# Default-Start: 3 5 # Default-Stop: -# Description: initscript for the ctdb service +# Short-Description: start and stop ctdb service +# Description: initscript for the ctdb service ### END INIT INFO # Source function library. From a35fa0aa8f72ce5ef473d2d1244b13e5eae88670 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 27 Aug 2008 10:24:35 +1000 Subject: [PATCH 84/90] rename ctdb_tcp_client back to the original name ctdb_control_tcp (This used to be ctdb commit 4d1c0418cfe6170bc081684dbe45908a5d285f0b) --- ctdb/include/ctdb_private.h | 2 +- ctdb/server/ctdb_control.c | 2 +- ctdb/server/ctdb_takeover.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 8834ec4d52c..a25674c9b4b 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -565,7 +565,7 @@ struct ctdb_control_set_call { struct for tcp_client control used by samba can not modify */ -struct ctdb_tcp_client { +struct ctdb_control_tcp { struct sockaddr_in src; // samba uses this struct sockaddr_in dest;// samba uses this }; diff --git a/ctdb/server/ctdb_control.c b/ctdb/server/ctdb_control.c index 68635d6b439..41287978667 100644 --- a/ctdb/server/ctdb_control.c +++ b/ctdb/server/ctdb_control.c @@ -296,7 +296,7 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb, return ctdb_control_get_public_ips(ctdb, c, outdata); case CTDB_CONTROL_TCP_CLIENT: - CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_tcp_client)); + CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_control_tcp)); return ctdb_control_tcp_client(ctdb, client_id, indata); case CTDB_CONTROL_STARTUP: diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index b33471ddaf2..fb20ff17969 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -956,7 +956,7 @@ int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id, TDB_DATA indata) { struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client); - struct ctdb_tcp_client *p = (struct ctdb_tcp_client *)indata.dptr; + struct ctdb_control_tcp *p = (struct ctdb_control_tcp *)indata.dptr; struct ctdb_tcp_list *tcp; struct ctdb_control_tcp_vnn t; int ret; From 589dcbe8cb16a02dcb22716f17804080a12f1a70 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 27 Aug 2008 10:26:34 +1000 Subject: [PATCH 85/90] new version 1.0.58 (This used to be ctdb commit c4bc4317df4106e81b238dbfaf861ca6104304c1) --- ctdb/packaging/RPM/ctdb.spec | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ctdb/packaging/RPM/ctdb.spec b/ctdb/packaging/RPM/ctdb.spec index e0abc7c519a..070f96737aa 100644 --- a/ctdb/packaging/RPM/ctdb.spec +++ b/ctdb/packaging/RPM/ctdb.spec @@ -5,7 +5,7 @@ Vendor: Samba Team Packager: Samba Team Name: ctdb Version: 1.0 -Release: 57 +Release: 58 Epoch: 0 License: GNU GPL version 3 Group: System Environment/Daemons @@ -118,6 +118,10 @@ fi %{_includedir}/ctdb_private.h %changelog +* Wed Aug 27 2008 : Version 1.0.58 + - revert the name change tcp_tcp_client back to tcp_control_tcp so + samba can build. + - Updates to the init script from Abhijith Das * Mon Aug 25 2008 : Version 1.0.57 - initial support for IPv6 * Mon Aug 11 2008 : Version 1.0.56 From 70c7525a02079c429bdc9fa5278fd9bc8ffee879 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Fri, 29 Aug 2008 12:26:02 +1000 Subject: [PATCH 86/90] zero out the address structure to keep valgrind happy (This used to be ctdb commit 8060e591b0eb2d184b5a7444487477225d2e1dbf) --- ctdb/server/ctdb_takeover.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index fb20ff17969..8bdf3082018 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -965,6 +965,7 @@ int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id, struct ctdb_vnn *vnn; ctdb_sock_addr addr; + ZERO_STRUCT(addr); addr.ip = p->dest; vnn = find_public_ip_vnn(ctdb, &addr); if (vnn == NULL) { From 7a78a78a1c0de5ef3f470326eee2c1ca7e57e607 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Mon, 8 Sep 2008 08:57:42 +1000 Subject: [PATCH 87/90] From C Cowan. Patch to make AIX compile with the new ipv6 additions. (This used to be ctdb commit e26ce5140ed005725f8b7ac8ba23a180fd7d5337) --- ctdb/common/system_aix.c | 45 ++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/ctdb/common/system_aix.c b/ctdb/common/system_aix.c index 03d997ff6fc..8fe630d8a39 100644 --- a/ctdb/common/system_aix.c +++ b/ctdb/common/system_aix.c @@ -199,7 +199,7 @@ bool ctdb_sys_have_ip(ctdb_sock_addr *addr) int s; int ret; - addr->sa.sa_port = 0; + addr->ip.sin_port = 0; s = socket(addr->sa.sa_family, SOCK_STREAM, IPPROTO_TCP); if (s == -1) { return false; @@ -312,6 +312,7 @@ int ctdb_sys_read_tcp_packet(int s, void *private_data, int ret; struct ether_header *eth; struct ip *ip; + struct ip6_hdr *ip6; struct tcphdr *tcp; struct ctdb_killtcp_connection *conn; struct pcap_pkthdr pkthdr; @@ -353,22 +354,44 @@ int ctdb_sys_read_tcp_packet(int s, void *private_data, tcp = (struct tcphdr *)((ip->ip_hl*4) + (char *)ip); /* tell the caller which one we've found */ - src->ip.sin_family = AF_INET; - src->sin_addr.s_addr = ip->ip_src.s_addr; - src->sin_port = tcp->th_sport; - dst->ip.sin_family = AF_INET; - dst->sin_addr.s_addr = ip->ip_dst.s_addr; - dst->sin_port = tcp->th_dport; - *ack_seq = tcp->th_ack; - *seq = tcp->th_seq; + src->ip.sin_family = AF_INET; + src->ip.sin_addr.s_addr = ip->ip_src.s_addr; + src->ip.sin_port = tcp->th_sport; + dst->ip.sin_family = AF_INET; + dst->ip.sin_addr.s_addr = ip->ip_dst.s_addr; + dst->ip.sin_port = tcp->th_dport; + *ack_seq = tcp->th_ack; + *seq = tcp->th_seq; return 0; #ifndef ETHERTYPE_IP6 #define ETHERTYPE_IP6 0x86dd #endif - } else if (eth->ether_type == htons(ETHERTYPE_IP)) { -see system_linux.c for what should go in here + } else if (eth->ether_type == htons(ETHERTYPE_IP6)) { + /* IP6 */ + ip6 = (struct ip6_hdr *)(eth+1); + + /* we only want TCP */ + if (ip6->ip6_nxt != IPPROTO_TCP) { + return -1; + } + + /* TCP */ + tcp = (struct tcphdr *)(ip6+1); + + /* tell the caller which one we've found */ + src->ip6.sin6_family = AF_INET6; + src->ip6.sin6_port = tcp->th_sport; + src->ip6.sin6_addr = ip6->ip6_src; + + dst->ip6.sin6_family = AF_INET6; + dst->ip6.sin6_port = tcp->th_dport; + dst->ip6.sin6_addr = ip6->ip6_dst; + + *ack_seq = tcp->th_ack; + *seq = tcp->th_seq; + return 0; } From 6474f3278d33107bee8fb499d71c2a682c8c3403 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 9 Sep 2008 13:44:46 +1000 Subject: [PATCH 88/90] additional monitoring between the two daemons. we currently only monitor that the dameons are running by kill(0, pid) and verifying the the domain socket between them is ok. this is not sufficient since we can have a situation where the recovery daemon is hung. this new code monitors that the recovery daemon is operating. if the recovery hangs, we log this and shut down the main daemon (This used to be ctdb commit cd69d292292eaab3aac0e9d9fc57cb621597c63c) --- ctdb/client/ctdb_client.c | 18 ++++++++++++++++++ ctdb/include/ctdb.h | 2 ++ ctdb/include/ctdb_private.h | 4 ++++ ctdb/server/ctdb_control.c | 4 ++++ ctdb/server/ctdb_daemon.c | 3 +++ ctdb/server/ctdb_recover.c | 38 +++++++++++++++++++++++++++++++++++++ ctdb/server/ctdb_recoverd.c | 3 +++ ctdb/server/ctdb_tunables.c | 1 + 8 files changed, 73 insertions(+) diff --git a/ctdb/client/ctdb_client.c b/ctdb/client/ctdb_client.c index dfcd4d90e79..6d80efc2053 100644 --- a/ctdb/client/ctdb_client.c +++ b/ctdb/client/ctdb_client.c @@ -3280,3 +3280,21 @@ again: talloc_free(h); return 0; } + +/* + recovery daemon ping to main daemon + */ +int ctdb_ctrl_recd_ping(struct ctdb_context *ctdb) +{ + int ret; + int32_t res; + + ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_RECD_PING, 0, tdb_null, + ctdb, NULL, &res, NULL, NULL); + if (ret != 0 || res != 0) { + DEBUG(DEBUG_ERR,("Failed to send recd ping\n")); + return -1; + } + + return 0; +} diff --git a/ctdb/include/ctdb.h b/ctdb/include/ctdb.h index d43ab507070..60fa60be589 100644 --- a/ctdb/include/ctdb.h +++ b/ctdb/include/ctdb.h @@ -566,4 +566,6 @@ int ctdb_transaction_store(struct ctdb_transaction_handle *h, TDB_DATA key, TDB_DATA data); int ctdb_transaction_commit(struct ctdb_transaction_handle *h); +int ctdb_ctrl_recd_ping(struct ctdb_context *ctdb); + #endif diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index a25674c9b4b..b2ded310b51 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -114,6 +114,7 @@ struct ctdb_tunable { uint32_t reclock_ping_period; uint32_t no_ip_failback; uint32_t verbose_memory_names; + uint32_t recd_ping_timeout; }; /* @@ -417,6 +418,7 @@ struct ctdb_context { int start_as_disabled; uint32_t event_script_timeouts; /* counting how many consecutive times an eventscript has timedout */ TALLOC_CTX *eventscripts_ctx; /* a context to hold data for the RUN_EVENTSCRIPTS control */ + TALLOC_CTX *recd_ping_ctx; }; struct ctdb_db_context { @@ -550,6 +552,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0, CTDB_CONTROL_TRANS2_FINISHED = 84, CTDB_CONTROL_TRANS2_ERROR = 85, CTDB_CONTROL_TRANS2_COMMIT_RETRY = 86, + CTDB_CONTROL_RECD_PING = 87, }; /* @@ -1378,5 +1381,6 @@ int32_t ctdb_control_trans2_error(struct ctdb_context *ctdb, char *ctdb_addr_to_str(ctdb_sock_addr *addr); void ctdb_canonicalize_ip(const ctdb_sock_addr *ip, ctdb_sock_addr *cip); +int32_t ctdb_control_recd_ping(struct ctdb_context *ctdb); #endif diff --git a/ctdb/server/ctdb_control.c b/ctdb/server/ctdb_control.c index 41287978667..94736fb568a 100644 --- a/ctdb/server/ctdb_control.c +++ b/ctdb/server/ctdb_control.c @@ -406,6 +406,10 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb, case CTDB_CONTROL_TRANS2_FINISHED: return ctdb_control_trans2_finished(ctdb, c); + case CTDB_CONTROL_RECD_PING: + CHECK_CONTROL_DATA_SIZE(0); + return ctdb_control_recd_ping(ctdb); + default: DEBUG(DEBUG_CRIT,(__location__ " Unknown CTDB control opcode %u\n", opcode)); return -1; diff --git a/ctdb/server/ctdb_daemon.c b/ctdb/server/ctdb_daemon.c index efe3d753498..885ce7e6f63 100644 --- a/ctdb/server/ctdb_daemon.c +++ b/ctdb/server/ctdb_daemon.c @@ -103,6 +103,9 @@ static void ctdb_start_transport(struct ctdb_context *ctdb) /* start periodic update of tcp tickle lists */ ctdb_start_tcp_tickle_update(ctdb); + + /* start listening for recovery daemon pings */ + ctdb_control_recd_ping(ctdb); } static void block_signal(int signum) diff --git a/ctdb/server/ctdb_recover.c b/ctdb/server/ctdb_recover.c index 3243f42faa7..6b207d55bc6 100644 --- a/ctdb/server/ctdb_recover.c +++ b/ctdb/server/ctdb_recover.c @@ -971,3 +971,41 @@ int32_t ctdb_control_get_capabilities(struct ctdb_context *ctdb, TDB_DATA *outda return 0; } +static void ctdb_recd_ping_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p) +{ + struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context); + + DEBUG(DEBUG_ERR, (__location__ " Recovery daemon ping timeout. Shutting down ctdb daemon\n")); + + ctdb_stop_recoverd(ctdb); + ctdb_stop_keepalive(ctdb); + ctdb_stop_monitoring(ctdb); + ctdb_release_all_ips(ctdb); + if (ctdb->methods != NULL) { + ctdb->methods->shutdown(ctdb); + } + ctdb_event_script(ctdb, "shutdown"); + DEBUG(DEBUG_ERR, (__location__ " Recovery daemon ping timeout. Daemon has been shut down.\n")); + exit(0); +} + +/* The recovery daemon will ping us at regular intervals. + If we havent been pinged for a while we assume the recovery + daemon is inoperable and we shut down. +*/ +int32_t ctdb_control_recd_ping(struct ctdb_context *ctdb) +{ + talloc_free(ctdb->recd_ping_ctx); + + ctdb->recd_ping_ctx = talloc_new(ctdb); + CTDB_NO_MEMORY(ctdb, ctdb->recd_ping_ctx); + + if (ctdb->tunable.recd_ping_timeout != 0) { + event_add_timed(ctdb->ev, ctdb->recd_ping_ctx, + timeval_current_ofs(ctdb->tunable.recd_ping_timeout, 0), + ctdb_recd_ping_timeout, ctdb); + } + + return 0; +} + diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index c6a4ab322a3..a8c004ae0c5 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -2317,6 +2317,9 @@ again: exit(-1); } + /* ping the local daemon to tell it we are alive */ + ctdb_ctrl_recd_ping(ctdb); + if (rec->election_timeout) { /* an election is in progress */ goto again; diff --git a/ctdb/server/ctdb_tunables.c b/ctdb/server/ctdb_tunables.c index d138137afdc..de3e46667cc 100644 --- a/ctdb/server/ctdb_tunables.c +++ b/ctdb/server/ctdb_tunables.c @@ -50,6 +50,7 @@ static const struct { { "ReclockPingPeriod", 60, offsetof(struct ctdb_tunable, reclock_ping_period) }, { "NoIPFailback", 0, offsetof(struct ctdb_tunable, no_ip_failback) }, { "VerboseMemoryNames", 0, offsetof(struct ctdb_tunable, verbose_memory_names) }, + { "RecdPingTimeout", 60, offsetof(struct ctdb_tunable, recd_ping_timeout) }, }; /* From 3bedb7f6d1e59a7886b05cd96a8eccf817720616 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 9 Sep 2008 13:55:31 +1000 Subject: [PATCH 89/90] lower the debug level for when printing that the nodeflags have changed (This used to be ctdb commit a89977f8cb2463a87147dcc0ad936cb5d4131670) --- ctdb/server/ctdb_recoverd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index a8c004ae0c5..47539427a05 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -2633,7 +2633,7 @@ again: } if ((remote_nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) != (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE)) { - DEBUG(DEBUG_ERR, (__location__ " Remote node:%u has different nodemap flag for %d (0x%x vs 0x%x)\n", + DEBUG(DEBUG_WARNING, (__location__ " Remote node:%u has different nodemap flag for %d (0x%x vs 0x%x)\n", nodemap->nodes[j].pnn, i, remote_nodemap->nodes[i].flags, nodemap->nodes[i].flags)); do_recovery(rec, mem_ctx, pnn, nodemap, From 348cad7bc1c319d19e9c36efa72cb3a2490f31dc Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 9 Sep 2008 13:59:48 +1000 Subject: [PATCH 90/90] lower the debuglevel when logging unknown idr in responses (This used to be ctdb commit a72f5b7d1560e427e18b1c55a2932a7fb037f4c7) --- ctdb/common/ctdb_util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ctdb/common/ctdb_util.c b/ctdb/common/ctdb_util.c index 792ff3c79d2..cc68291c0e0 100644 --- a/ctdb/common/ctdb_util.c +++ b/ctdb/common/ctdb_util.c @@ -147,7 +147,7 @@ void *_ctdb_reqid_find(struct ctdb_context *ctdb, uint32_t reqid, const char *ty p = _idr_find_type(ctdb->idr, (reqid>>16)&0xFFFF, type, location); if (p == NULL) { - DEBUG(DEBUG_ERR, ("Could not find idr:%u\n",reqid)); + DEBUG(DEBUG_WARNING, ("Could not find idr:%u\n",reqid)); } return p;