From 12ebb74838ded09420f087e555c6efffef6db98b Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 4 Sep 2007 09:50:07 +1000 Subject: [PATCH] change how we do public addresses and takeover so that we can have multiple public addresses spread across multiple interfaces on each node. this is a massive patch since we have previously made the assumtion that we only have one public address per node. get rid of the public_interface argument. the public addresses file now explicitely lists which interface the address belongs to (This used to be ctdb commit 462ebbc791e906a6b874c862defea43235597ca8) --- ctdb/client/ctdb_client.c | 9 +- ctdb/config/ctdb.init | 1 - ctdb/config/ctdb.sysconfig | 13 +- ctdb/config/events.d/10.interface | 24 +- ctdb/doc/ctdb.1 | 6 +- ctdb/doc/ctdb.1.html | 2 +- ctdb/doc/ctdb.1.xml | 2 +- ctdb/doc/ctdbd.1 | 33 +- ctdb/doc/ctdbd.1.html | 59 +-- ctdb/doc/ctdbd.1.xml | 52 +-- ctdb/include/ctdb_private.h | 69 +-- ctdb/server/ctdb_control.c | 2 +- ctdb/server/ctdb_monitor.c | 10 +- ctdb/server/ctdb_recover.c | 2 +- ctdb/server/ctdb_recoverd.c | 128 +++--- ctdb/server/ctdb_server.c | 8 +- ctdb/server/ctdb_takeover.c | 730 ++++++++++++++++++------------ ctdb/server/ctdbd.c | 8 - ctdb/server/eventscript.c | 12 +- ctdb/tcp/tcp_connect.c | 2 +- ctdb/tools/ctdb.c | 26 +- ctdb/web/configuring.html | 50 +- 22 files changed, 674 insertions(+), 574 deletions(-) diff --git a/ctdb/client/ctdb_client.c b/ctdb/client/ctdb_client.c index 85c55971342..cdec93d804b 100644 --- a/ctdb/client/ctdb_client.c +++ b/ctdb/client/ctdb_client.c @@ -1540,7 +1540,7 @@ uint32_t *ctdb_get_connected_nodes(struct ctdb_context *ctdb, for (i=0;inum;i++) { if (!(map->nodes[i].flags & NODE_FLAGS_DISCONNECTED)) { - nodes[*num_nodes] = map->nodes[i].vnn; + nodes[*num_nodes] = map->nodes[i].pnn; (*num_nodes)++; } } @@ -2300,15 +2300,16 @@ int ctdb_ctrl_killtcp(struct ctdb_context *ctdb, */ int ctdb_ctrl_get_tcp_tickles(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, - TALLOC_CTX *mem_ctx, uint32_t vnn, + TALLOC_CTX *mem_ctx, + struct sockaddr_in *ip, struct ctdb_control_tcp_tickle_list **list) { int ret; TDB_DATA data, outdata; int32_t status; - data.dptr = (uint8_t*)&vnn; - data.dsize = sizeof(vnn); + data.dptr = (uint8_t*)ip; + data.dsize = sizeof(struct sockaddr_in); ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_TCP_TICKLE_LIST, 0, data, diff --git a/ctdb/config/ctdb.init b/ctdb/config/ctdb.init index ffe56241acb..fb4314a0808 100755 --- a/ctdb/config/ctdb.init +++ b/ctdb/config/ctdb.init @@ -52,7 +52,6 @@ CTDB_OPTIONS="$CTDB_OPTIONS --reclock=$CTDB_RECOVERY_LOCK" [ -z "$CTDB_NODES" ] || CTDB_OPTIONS="$CTDB_OPTIONS --nlist=$CTDB_NODES" [ -z "$CTDB_SOCKET" ] || CTDB_OPTIONS="$CTDB_OPTIONS --socket=$CTDB_SOCKET" [ -z "$CTDB_PUBLIC_ADDRESSES" ] || CTDB_OPTIONS="$CTDB_OPTIONS --public-addresses=$CTDB_PUBLIC_ADDRESSES" -[ -z "$CTDB_PUBLIC_INTERFACE" ] || CTDB_OPTIONS="$CTDB_OPTIONS --public-interface=$CTDB_PUBLIC_INTERFACE" [ -z "$CTDB_DBDIR" ] || CTDB_OPTIONS="$CTDB_OPTIONS --dbdir=$CTDB_DBDIR" [ -z "$CTDB_EVENT_SCRIPT_DIR" ] || CTDB_OPTIONS="$CTDB_OPTIONS --event-script-dir $CTDB_EVENT_SCRIPT_DIR" [ -z "$CTDB_TRANSPORT" ] || CTDB_OPTIONS="$CTDB_OPTIONS --transport $CTDB_TRANSPORT" diff --git a/ctdb/config/ctdb.sysconfig b/ctdb/config/ctdb.sysconfig index 5116b040f48..23a861170fc 100644 --- a/ctdb/config/ctdb.sysconfig +++ b/ctdb/config/ctdb.sysconfig @@ -5,17 +5,16 @@ # there is no default # CTDB_RECOVERY_LOCK="/some/place/on/shared/storage" -# should ctdb do IP takeover? If it should, then specify a file +# Should ctdb do IP takeover? If it should, then specify a file # containing the list of public IP addresses that ctdb will manage # Note that these IPs must be different from those in $NODES above -# there is no default +# there is no default. +# The syntax is one line per public address of the form : +# / +# Example: 10.1.1.1/24 eth0 +# # CTDB_PUBLIC_ADDRESSES=/etc/ctdb/public_addresses -# when doing IP takeover you also must specify what network interface -# to use for the public addresses -# there is no default -# CTDB_PUBLIC_INTERFACE=eth0 - # should ctdb manage starting/stopping the Samba service for you? # default is to not manage Samba # CTDB_MANAGES_SAMBA=yes diff --git a/ctdb/config/events.d/10.interface b/ctdb/config/events.d/10.interface index ce9fdc6465d..fa01f5ba837 100755 --- a/ctdb/config/events.d/10.interface +++ b/ctdb/config/events.d/10.interface @@ -11,13 +11,14 @@ loadconfig ctdb cmd="$1" shift -[ -z "$CTDB_PUBLIC_INTERFACE" ] && { - [ "$cmd" = "startup" ] && { - echo "Event script $0 : CTDB_PUBLIC_INTERFACE not set. Nothing to do." - } - exit 0 +[ -z "$CTDB_PUBLIC_ADDRESSES" ] && { + CTDB_PUBLIC_ADDRESSES=/etc/ctdb/public_addresses } +[ ! -f "$CTDB_PUBLIC_ADDRESSES" ] && { + echo "No public addresses file found. Nothing to do for 10.interfaces" + exit 0 +} case $cmd in ############################# @@ -84,13 +85,18 @@ case $cmd in monitor) [ -x /usr/sbin/ethtool ] && { - /usr/sbin/ethtool $CTDB_PUBLIC_INTERFACE | grep 'Link detected: yes' > /dev/null || { - echo "`date` ERROR: No link on the public network interface $CTDB_PUBLIC_INTERFACE" - exit 1 - } + cat $CTDB_PUBLIC_ADDRESSES | sed -e "s/^[^\t ]*[\t ]*//" -e "s/[\t ]*$//" | sort | uniq | while read IFACE; do + /usr/sbin/ethtool $IFACE | grep 'Link detected: yes' > /dev/null || { + echo "`date` ERROR: No link on the public network interface $IFACE" + exit 1 + } + done } ;; esac exit 0 + + + diff --git a/ctdb/doc/ctdb.1 b/ctdb/doc/ctdb.1 index a39de9b6ca7..127e9407803 100644 --- a/ctdb/doc/ctdb.1 +++ b/ctdb/doc/ctdb.1 @@ -1,11 +1,11 @@ .\" Title: ctdb .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 -.\" Date: 08/23/2007 +.\" Date: 09/03/2007 .\" Manual: .\" Source: .\" -.TH "CTDB" "1" "08/23/2007" "" "" +.TH "CTDB" "1" "09/03/2007" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -180,7 +180,7 @@ Example output: .sp .RS 3n .nf -Number of nodes:4 +Number of addresses:4 12.1.1.1 0 12.1.1.2 1 12.1.1.3 2 diff --git a/ctdb/doc/ctdb.1.html b/ctdb/doc/ctdb.1.html index 23dfda09da7..e3c665f6a5b 100644 --- a/ctdb/doc/ctdb.1.html +++ b/ctdb/doc/ctdb.1.html @@ -89,7 +89,7 @@ response from 3 time=0.000114 sec (2 clients)

Example output:

-Number of nodes:4
+Number of addresses:4
 12.1.1.1         0
 12.1.1.2         1
 12.1.1.3         2
diff --git a/ctdb/doc/ctdb.1.xml b/ctdb/doc/ctdb.1.xml
index 3f92f510957..bcb6646d684 100644
--- a/ctdb/doc/ctdb.1.xml
+++ b/ctdb/doc/ctdb.1.xml
@@ -228,7 +228,7 @@ response from 3 time=0.000114 sec  (2 clients)
 	Example output:
       
       
-Number of nodes:4
+Number of addresses:4
 12.1.1.1         0
 12.1.1.2         1
 12.1.1.3         2
diff --git a/ctdb/doc/ctdbd.1 b/ctdb/doc/ctdbd.1
index d853126e6e7..ac83b3944a0 100644
--- a/ctdb/doc/ctdbd.1
+++ b/ctdb/doc/ctdbd.1
@@ -1,11 +1,11 @@
 .\"     Title: ctdbd
 .\"    Author: 
 .\" Generator: DocBook XSL Stylesheets v1.71.0 
-.\"      Date: 08/23/2007
+.\"      Date: 09/03/2007
 .\"    Manual: 
 .\"    Source: 
 .\"
-.TH "CTDBD" "1" "08/23/2007" "" ""
+.TH "CTDBD" "1" "09/03/2007" "" ""
 .\" disable hyphenation
 .nh
 .\" disable justification (adjust text to left margin only)
@@ -16,7 +16,7 @@ ctdbd \- The CTDB cluster daemon
 .HP 6
 \fBctdbd\fR
 .HP 6
-\fBctdbd\fR {\-\-reclock=} {\-\-nlist=} {\-\-dbdir=} [\-?\ \-\-help] [\-\-usage] [\-i\ \-\-interactive] [\-\-public\-addresses=] [\-\-public\-interface=] [\-\-event\-script=] [\-\-logfile=] [\-\-listen=
] [\-\-transport=] [\-\-socket=] [\-d\ \-\-debug=] [\-\-torture] +\fBctdbd\fR {\-\-reclock=} {\-\-nlist=} {\-\-dbdir=} [\-?\ \-\-help] [\-\-usage] [\-i\ \-\-interactive] [\-\-public\-addresses=] [\-\-event\-script=] [\-\-logfile=] [\-\-listen=
] [\-\-transport=] [\-\-socket=] [\-d\ \-\-debug=] [\-\-torture] .SH "DESCRIPTION" .PP ctdbd is the main ctdb daemon. @@ -66,16 +66,11 @@ By default ctdbd will detach itself from the shell and run in the background as .PP \-\-public_addresses= .RS 3n -When used with IP takeover this specifies a file containing the public ip addresses to use on the cluster. This file contains one entry for each node in the cluster. +When used with IP takeover this specifies a file containing the public ip addresses to use on the cluster. This file contains a list of ip addresses netmasks and interfaces. When ctdb is operational it iwll distribute these public ip addresses evenly across the availabel nodes. .sp This is usually the file /etc/ctdb/public_addresses .RE .PP -\-\-public\-interface= -.RS 3n -When used with IP takeover this option specifies which physical interface should be used to attach the public addresses to. -.RE -.PP \-\-event\-script= .RS 3n This option is used to specify which events script that ctdbd will use to manage services when the cluster configuration changes. @@ -122,7 +117,7 @@ You do NOT want to use this option unless you are developing and testing new fun .RE .SH "PRIVATE VS PUBLIC ADDRESSES" .PP -When used for ip takeover in a HA environment, each node in a ctdb cluster has two ip addresses assigned to it. One private and one public. +When used for ip takeover in a HA environment, each node in a ctdb cluster has multiple ip addresses assigned to it. One private and one or more public. .SS "Private address" .PP This is the physical ip address of the node which is configured in linux and attached to a physical interface. This address uniquely identifies a physical node in the cluster and is the ip addresses that ctdbd will use to communicate with the ctdbd daemons on the other nodes in the cluster. @@ -149,28 +144,28 @@ Since the private addresses are only available to the network when the correspon .PP A public address on the other hand is not attached to an interface. This address is managed by ctdbd itself and is attached/detached to a physical node at runtime. You should NOT have this address configured to an interface in linux. Let ctdbd manage these addresses. .PP -The ctdb cluster will assign/reassign these public addresses across the available nodes in the cluster. When one node fails, its public address will be migrated to and taken over by a different node in the cluster to ensure that all public addresses are always available to clients. +The ctdb cluster will assign/reassign these public addresses across the available healthy nodes in the cluster. When one node fails, its public address will be migrated to and taken over by a different node in the cluster to ensure that all public addresses are always available to clients. .PP These addresses are not physically attached to a specific node. The 'ctdb ip' command can be used to view the current assignment of public addresses and which physical node is currently serving it. .PP -By default, each node will when operational always serve its primary public address which is the corresponding line for that node number in the public addresses file. I.e. as long as node X is available and fully oprational it will always be the node that serves the corresponding public address. -.PP -The list of public addresses also contain the netmask for that address. the reason for this is because ctdbd needs to know which mask to use when it adds/removes the address from a physical node. This netmask is also used by ctdbd when making decisions on which node should take over a public ip address for a failed node. A node will only be allowed to take over a public address from a different node IFF that public address resides in the same subnet as the primary public address for that node. +The list of public addresses also contain the netmask and the interface where this address should be attached. Example /etc/ctdb/public_addresses for a four node cluster: .sp .RS 3n .nf - 11.1.1.1/24 - 11.1.1.2/24 - 11.1.2.1/24 - 11.1.2.2/24 + 11.1.1.1/24 eth0 + 11.1.1.2/24 eth0 + 11.1.2.1/24 eth1 + 11.1.2.2/24 eth1 .fi .RE .PP -In this example, if node 3 fails, its public address can be taken over by node 2 since node 2 is on the same subnet as 3 but not by node 0 or node 1 since node 0 and 1 are both on a different subnet from node 3. +In this example, two nodes in the cluster will serve 11.1.1.1 and 11.1.1.2 through interface eth0 and two (possibly other) nodes will serve 11.1.2.1 and 11.1.2.2 through eth1. +.PP +The public address file must be the same on all nodes. Since this file also specifies which interface the address should be attached to it is imporant that all nodes use the same naming convention for interfaces. .SH "NODE STATUS" .PP The current status of each node in the cluster can be viewed by the 'ctdb status' command. diff --git a/ctdb/doc/ctdbd.1.html b/ctdb/doc/ctdbd.1.html index 526aff350e1..b600785f891 100644 --- a/ctdb/doc/ctdbd.1.html +++ b/ctdb/doc/ctdbd.1.html @@ -1,4 +1,4 @@ -ctdbd

Name

ctdbd — The CTDB cluster daemon

Synopsis

ctdbd

ctdbd {--reclock=<filename>} {--nlist=<filename>} {--dbdir=<directory>} [-? --help] [--usage] [-i --interactive] [--public-addresses=<filename>] [--public-interface=<interface>] [--event-script=<filename>] [--logfile=<filename>] [--listen=<address>] [--transport=<STRING>] [--socket=<filename>] [-d --debug=<INTEGER>] [--torture]

DESCRIPTION

+ctdbd

Name

ctdbd — The CTDB cluster daemon

Synopsis

ctdbd

ctdbd {--reclock=<filename>} {--nlist=<filename>} {--dbdir=<directory>} [-? --help] [--usage] [-i --interactive] [--public-addresses=<filename>] [--event-script=<filename>] [--logfile=<filename>] [--listen=<address>] [--transport=<STRING>] [--socket=<filename>] [-d --debug=<INTEGER>] [--torture]

DESCRIPTION

ctdbd is the main ctdb daemon.

ctdbd provides a clustered version of the TDB database with automatic rebuild/recovery of the databases upon nodefailures. @@ -8,7 +8,7 @@ ctdbd provides monitoring of all nodes in the cluster and automatically reconfigures the cluster and recovers upon node failures.

ctdbd is the main component in clustered Samba that provides a high-awailability load-sharing CIFS server cluster. -

OPTIONS

-? --help

+

OPTIONS

-? --help

Print some help text to the screen.

--usage

Print useage information to the screen. @@ -28,11 +28,9 @@ By default ctdbd will detach itself from the shell and run in the background as a daemon. This option makes ctdbd to start in interactive mode.

--public_addresses=<filename>

- When used with IP takeover this specifies a file containing the public ip addresses to use on the cluster. This file contains one entry for each node in the cluster. + When used with IP takeover this specifies a file containing the public ip addresses to use on the cluster. This file contains a list of ip addresses netmasks and interfaces. When ctdb is operational it iwll distribute these public ip addresses evenly across the availabel nodes.

This is usually the file /etc/ctdb/public_addresses -

--public-interface=<interface>

- When used with IP takeover this option specifies which physical interface should be used to attach the public addresses to.

--event-script=<filename>

This option is used to specify which events script that ctdbd will use to manage services when the cluster configuration changes. @@ -58,10 +56,10 @@ This option is only used for development and testing of ctdbd. It adds artificial errors and failures to the common codepaths in ctdbd to verify that ctdbd can recover correctly for failures.

You do NOT want to use this option unless you are developing and testing new functionality in ctdbd. -

Private vs Public addresses

+

Private vs Public addresses

When used for ip takeover in a HA environment, each node in a ctdb - cluster has two ip addresses assigned to it. One private and one public. -

Private address

+ cluster has multiple ip addresses assigned to it. One private and one or more public. +

Private address

This is the physical ip address of the node which is configured in linux and attached to a physical interface. This address uniquely identifies a physical node in the cluster and is the ip addresses @@ -89,14 +87,14 @@ 10.1.1.2 10.1.1.3 10.1.1.4 -

Public address

+

Public address

A public address on the other hand is not attached to an interface. This address is managed by ctdbd itself and is attached/detached to a physical node at runtime. You should NOT have this address configured to an interface in linux. Let ctdbd manage these addresses.

The ctdb cluster will assign/reassign these public addresses across the - available nodes in the cluster. When one node fails, its public address + available healthy nodes in the cluster. When one node fails, its public address will be migrated to and taken over by a different node in the cluster to ensure that all public addresses are always available to clients.

@@ -104,32 +102,25 @@ The 'ctdb ip' command can be used to view the current assignment of public addresses and which physical node is currently serving it.

- By default, each node will when operational always serve its primary - public address which is the corresponding line for that node number - in the public addresses file. I.e. as long as node X is available and - fully oprational it will always be the node that serves the - corresponding public address. -

- The list of public addresses also contain the netmask for that address. - the reason for this is because ctdbd needs to know which mask to use - when it adds/removes the address from a physical node. This netmask - is also used by ctdbd when making decisions on which node should take - over a public ip address for a failed node. - A node will only be allowed to take over a public address from a - different node IFF that public address resides in the same subnet - as the primary public address for that node. + The list of public addresses also contain the netmask and the + interface where this address should be attached.

Example /etc/ctdb/public_addresses for a four node cluster:
-        11.1.1.1/24
-        11.1.1.2/24
-        11.1.2.1/24
-        11.1.2.2/24
+        11.1.1.1/24 eth0
+        11.1.1.2/24 eth0
+        11.1.2.1/24 eth1
+        11.1.2.2/24 eth1
       

- In this example, if node 3 fails, its public address can be taken over - by node 2 since node 2 is on the same subnet as 3 but not by node 0 or - node 1 since node 0 and 1 are both on a different subnet from node 3. -

Node status

+ In this example, two nodes in the cluster will serve 11.1.1.1 and + 11.1.1.2 through interface eth0 and two (possibly other) nodes will + serve 11.1.2.1 and 11.1.2.2 through eth1. +

+ The public address file must be the same on all nodes. + Since this file also specifies which interface the address should be + attached to it is imporant that all nodes use the same naming convention + for interfaces. +

Node status

The current status of each node in the cluster can be viewed by the 'ctdb status' command.

@@ -160,10 +151,10 @@ investigated and require an administrative action to rectify. This node does not perticipate in the CTDB cluster but can still be communicated with. I.e. ctdb commands can be sent to it. -

SEE ALSO

+

SEE ALSO

ctdb(1), onnode(1) http://ctdb.samba.org/ -

COPYRIGHT/LICENSE


+

COPYRIGHT/LICENSE


Copyright (C) Andrew Tridgell 2007
Copyright (C) Ronnie sahlberg 2007

diff --git a/ctdb/doc/ctdbd.1.xml b/ctdb/doc/ctdbd.1.xml index a1352af47d8..fdda489a574 100644 --- a/ctdb/doc/ctdbd.1.xml +++ b/ctdb/doc/ctdbd.1.xml @@ -27,7 +27,6 @@ --usage -i --interactive --public-addresses=<filename> - --public-interface=<interface> --event-script=<filename> --logfile=<filename> --listen=<address> @@ -122,7 +121,7 @@ --public_addresses=<filename> - When used with IP takeover this specifies a file containing the public ip addresses to use on the cluster. This file contains one entry for each node in the cluster. + When used with IP takeover this specifies a file containing the public ip addresses to use on the cluster. This file contains a list of ip addresses netmasks and interfaces. When ctdb is operational it iwll distribute these public ip addresses evenly across the availabel nodes. This is usually the file /etc/ctdb/public_addresses @@ -130,14 +129,6 @@ - --public-interface=<interface> - - - When used with IP takeover this option specifies which physical interface should be used to attach the public addresses to. - - - - --event-script=<filename> @@ -216,7 +207,7 @@ Private vs Public addresses When used for ip takeover in a HA environment, each node in a ctdb - cluster has two ip addresses assigned to it. One private and one public. + cluster has multiple ip addresses assigned to it. One private and one or more public. Private address @@ -262,7 +253,7 @@ The ctdb cluster will assign/reassign these public addresses across the - available nodes in the cluster. When one node fails, its public address + available healthy nodes in the cluster. When one node fails, its public address will be migrated to and taken over by a different node in the cluster to ensure that all public addresses are always available to clients. @@ -272,33 +263,26 @@ public addresses and which physical node is currently serving it. - By default, each node will when operational always serve its primary - public address which is the corresponding line for that node number - in the public addresses file. I.e. as long as node X is available and - fully oprational it will always be the node that serves the - corresponding public address. - - - The list of public addresses also contain the netmask for that address. - the reason for this is because ctdbd needs to know which mask to use - when it adds/removes the address from a physical node. This netmask - is also used by ctdbd when making decisions on which node should take - over a public ip address for a failed node. - A node will only be allowed to take over a public address from a - different node IFF that public address resides in the same subnet - as the primary public address for that node. + The list of public addresses also contain the netmask and the + interface where this address should be attached. Example /etc/ctdb/public_addresses for a four node cluster: - 11.1.1.1/24 - 11.1.1.2/24 - 11.1.2.1/24 - 11.1.2.2/24 + 11.1.1.1/24 eth0 + 11.1.1.2/24 eth0 + 11.1.2.1/24 eth1 + 11.1.2.2/24 eth1 - In this example, if node 3 fails, its public address can be taken over - by node 2 since node 2 is on the same subnet as 3 but not by node 0 or - node 1 since node 0 and 1 are both on a different subnet from node 3. + In this example, two nodes in the cluster will serve 11.1.1.1 and + 11.1.1.2 through interface eth0 and two (possibly other) nodes will + serve 11.1.2.1 and 11.1.2.2 through eth1. + + + The public address file must be the same on all nodes. + Since this file also specifies which interface the address should be + attached to it is imporant that all nodes use the same naming convention + for interfaces. diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index 516e3111c39..251c7dd75a6 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -53,7 +53,7 @@ struct ctdb_tcp_wire_array { /* the list of tcp tickles used by get/set tcp tickle list */ struct ctdb_control_tcp_tickle_list { - uint32_t vnn; + struct sockaddr_in ip; struct ctdb_tcp_wire_array tickles; }; @@ -136,6 +136,35 @@ struct ctdb_client { }; +/* state associated with a public ip address */ +struct ctdb_vnn { + struct ctdb_vnn *next; + + struct ctdb_vnn_list *vnn_list; + + const char *public_address; + uint8_t public_netmask_bits; + + /* the node number that is serving this public address, if any. + If no node serves this ip it is set to -1 */ + int32_t pnn; + + /* List of clients to tickle for this public address */ + struct ctdb_tcp_array *tcp_array; + + /* whether we need to update the other nodes with changes to our list + of connected clients */ + bool tcp_update_needed; +}; + +struct ctdb_vnn_list { + struct ctdb_vnn_list *next; + const char *iface; + uint32_t num_ips; + struct ctdb_vnn *vnn; + struct ctdb_kill_tcp *killtcp; +}; + /* state associated with one node */ @@ -144,7 +173,7 @@ struct ctdb_node { struct ctdb_address address; const char *name; /* for debug messages */ void *private_data; /* private to transport */ - uint32_t vnn; + uint32_t pnn; #define NODE_FLAGS_DISCONNECTED 0x00000001 /* node isn't connected */ #define NODE_FLAGS_UNHEALTHY 0x00000002 /* monitoring says node is unhealthy */ #define NODE_FLAGS_PERMANENTLY_DISABLED 0x00000004 /* administrator has disabled node */ @@ -161,21 +190,6 @@ struct ctdb_node { /* a list of controls pending to this node, so we can time them out quickly if the node becomes disconnected */ struct daemon_control_state *pending_controls; - - /* the public address of this node, if known */ - const char *public_address; - uint8_t public_netmask_bits; - - /* the node number that has taken over this nodes public address, if any. - If not taken over, then set to -1 */ - int32_t takeover_vnn; - - /* List of clients to tickle for this public address */ - struct ctdb_tcp_array *tcp_array; - - /* whether we need to update the other nodes with changes to our list - of connected clients */ - bool tcp_update_needed; }; /* @@ -299,14 +313,6 @@ enum ctdb_freeze_mode {CTDB_FREEZE_NONE, CTDB_FREEZE_PENDING, CTDB_FREEZE_FROZEN #define CTDB_MONITORING_ACTIVE 0 #define CTDB_MONITORING_DISABLED 1 -/* information about IP takeover */ -struct ctdb_takeover { - bool enabled; - const char *interface; - const char *event_script_dir; - TALLOC_CTX *last_ctx; -}; - /* main state of the ctdb daemon */ struct ctdb_context { struct event_context *ev; @@ -332,6 +338,7 @@ struct ctdb_context { struct idr_context *idr; uint16_t idr_cnt; struct ctdb_node **nodes; /* array of nodes in the cluster - indexed by vnn */ + struct ctdb_vnn_list *vnn_list; /* list of public ip addresses and interfaces */ char *err_msg; const struct ctdb_methods *methods; /* transport methods */ const struct ctdb_upcalls *upcalls; /* transport upcalls */ @@ -344,12 +351,12 @@ struct ctdb_context { uint32_t num_clients; uint32_t recovery_master; struct ctdb_call_state *pending_calls; - struct ctdb_takeover takeover; struct ctdb_client_ip *client_ip_list; bool do_setsched; void *saved_scheduler_param; - struct ctdb_kill_tcp *killtcp; struct _trbt_tree_t *server_ids; + const char *event_script_dir; + TALLOC_CTX *takeover_ctx; }; struct ctdb_db_context { @@ -503,7 +510,6 @@ struct ctdb_control_killtcp { struct for tcp_add and tcp_remove controls */ struct ctdb_control_tcp_vnn { - uint32_t vnn; struct sockaddr_in src; struct sockaddr_in dest; }; @@ -954,7 +960,7 @@ struct ctdb_control_list_tunable { status */ struct ctdb_node_and_flags { - uint32_t vnn; + uint32_t pnn; uint32_t flags; struct sockaddr_in sin; @@ -1031,8 +1037,7 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, bool *async_reply); struct ctdb_public_ip { - uint32_t vnn; - uint32_t takeover_vnn; + uint32_t pnn; struct sockaddr_in sin; }; int ctdb_ctrl_takeover_ip(struct ctdb_context *ctdb, struct timeval timeout, @@ -1122,7 +1127,7 @@ int ctdb_ctrl_get_tcp_tickles(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, - uint32_t vnn, + struct sockaddr_in *ip, struct ctdb_control_tcp_tickle_list **list); diff --git a/ctdb/server/ctdb_control.c b/ctdb/server/ctdb_control.c index 326b8edca66..deecdd44627 100644 --- a/ctdb/server/ctdb_control.c +++ b/ctdb/server/ctdb_control.c @@ -289,7 +289,7 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb, return ctdb_control_kill_tcp(ctdb, indata); case CTDB_CONTROL_GET_TCP_TICKLE_LIST: - CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t)); + CHECK_CONTROL_DATA_SIZE(sizeof(struct sockaddr_in)); return ctdb_control_get_tcp_tickle_list(ctdb, indata, outdata); case CTDB_CONTROL_SET_TCP_TICKLE_LIST: diff --git a/ctdb/server/ctdb_monitor.c b/ctdb/server/ctdb_monitor.c index 444f6e1f3c6..e56b525279d 100644 --- a/ctdb/server/ctdb_monitor.c +++ b/ctdb/server/ctdb_monitor.c @@ -43,7 +43,7 @@ static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_eve /* send a keepalive to all other nodes, unless */ for (i=0;inum_nodes;i++) { struct ctdb_node *node = ctdb->nodes[i]; - if (node->vnn == ctdb->vnn) { + if (node->pnn == ctdb->vnn) { continue; } @@ -65,9 +65,9 @@ static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_eve node->rx_cnt = 0; if (node->dead_count >= ctdb->tunable.keepalive_limit) { - DEBUG(0,("dead count reached for node %u\n", node->vnn)); + DEBUG(0,("dead count reached for node %u\n", node->pnn)); ctdb_node_dead(node); - ctdb_send_keepalive(ctdb, node->vnn); + ctdb_send_keepalive(ctdb, node->pnn); /* maybe tell the transport layer to kill the sockets as well? */ @@ -75,8 +75,8 @@ static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_eve } if (node->tx_cnt == 0) { - DEBUG(5,("sending keepalive to %u\n", node->vnn)); - ctdb_send_keepalive(ctdb, node->vnn); + DEBUG(5,("sending keepalive to %u\n", node->pnn)); + ctdb_send_keepalive(ctdb, node->pnn); } node->tx_cnt = 0; diff --git a/ctdb/server/ctdb_recover.c b/ctdb/server/ctdb_recover.c index 82338d48ce0..f639ee47ec2 100644 --- a/ctdb/server/ctdb_recover.c +++ b/ctdb/server/ctdb_recover.c @@ -162,7 +162,7 @@ ctdb_control_getnodemap(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA ind node_map->num = num_nodes; for (i=0; inodes[i]->address.address, &node_map->nodes[i].sin.sin_addr); - node_map->nodes[i].vnn = ctdb->nodes[i]->vnn; + node_map->nodes[i].pnn = ctdb->nodes[i]->pnn; node_map->nodes[i].flags = ctdb->nodes[i]->flags; } diff --git a/ctdb/server/ctdb_recoverd.c b/ctdb/server/ctdb_recoverd.c index 64bb0cf5193..c1a2a866f5b 100644 --- a/ctdb/server/ctdb_recoverd.c +++ b/ctdb/server/ctdb_recoverd.c @@ -166,7 +166,7 @@ static enum monitor_result freeze_all_nodes(struct ctdb_context *ctdb, struct ct } state = ctdb_ctrl_freeze_send(ctdb, mem_ctx, CONTROL_TIMEOUT(), - nodemap->nodes[j].vnn); + nodemap->nodes[j].pnn); if (state == NULL) { /* we failed to send the control, treat this as an error and try again next iteration @@ -222,16 +222,16 @@ static int set_recovery_mode(struct ctdb_context *ctdb, struct ctdb_node_map *no continue; } - ret = ctdb_ctrl_setrecmode(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, rec_mode); + ret = ctdb_ctrl_setrecmode(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, rec_mode); if (ret != 0) { - DEBUG(0, (__location__ " Unable to set recmode on node %u\n", nodemap->nodes[j].vnn)); + DEBUG(0, (__location__ " Unable to set recmode on node %u\n", nodemap->nodes[j].pnn)); return -1; } if (rec_mode == CTDB_RECOVERY_NORMAL) { - ret = ctdb_ctrl_thaw(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn); + ret = ctdb_ctrl_thaw(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn); if (ret != 0) { - DEBUG(0, (__location__ " Unable to thaw node %u\n", nodemap->nodes[j].vnn)); + DEBUG(0, (__location__ " Unable to thaw node %u\n", nodemap->nodes[j].pnn)); return -1; } } @@ -254,9 +254,9 @@ static int set_recovery_master(struct ctdb_context *ctdb, struct ctdb_node_map * continue; } - ret = ctdb_ctrl_setrecmaster(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, vnn); + ret = ctdb_ctrl_setrecmaster(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, vnn); if (ret != 0) { - DEBUG(0, (__location__ " Unable to set recmaster on node %u\n", nodemap->nodes[j].vnn)); + DEBUG(0, (__location__ " Unable to set recmaster on node %u\n", nodemap->nodes[j].pnn)); return -1; } } @@ -277,7 +277,7 @@ static int create_missing_remote_databases(struct ctdb_context *ctdb, struct ctd /* verify that all other nodes have all our databases */ for (j=0; jnum; j++) { /* we dont need to ourself ourselves */ - if (nodemap->nodes[j].vnn == vnn) { + if (nodemap->nodes[j].pnn == vnn) { continue; } /* dont check nodes that are unavailable */ @@ -285,7 +285,7 @@ static int create_missing_remote_databases(struct ctdb_context *ctdb, struct ctd continue; } - ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, + ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, mem_ctx, &remote_dbmap); if (ret != 0) { DEBUG(0, (__location__ " Unable to get dbids from node %u\n", vnn)); @@ -312,7 +312,7 @@ static int create_missing_remote_databases(struct ctdb_context *ctdb, struct ctd DEBUG(0, (__location__ " Unable to get dbname from node %u\n", vnn)); return -1; } - ctdb_ctrl_createdb(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, mem_ctx, name); + ctdb_ctrl_createdb(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, mem_ctx, name); if (ret != 0) { DEBUG(0, (__location__ " Unable to create remote db:%s\n", name)); return -1; @@ -336,7 +336,7 @@ static int create_missing_local_databases(struct ctdb_context *ctdb, struct ctdb /* verify that we have all database any other node has */ for (j=0; jnum; j++) { /* we dont need to ourself ourselves */ - if (nodemap->nodes[j].vnn == vnn) { + if (nodemap->nodes[j].pnn == vnn) { continue; } /* dont check nodes that are unavailable */ @@ -344,7 +344,7 @@ static int create_missing_local_databases(struct ctdb_context *ctdb, struct ctdb continue; } - ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, + ret = ctdb_ctrl_getdbmap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, mem_ctx, &remote_dbmap); if (ret != 0) { DEBUG(0, (__location__ " Unable to get dbids from node %u\n", vnn)); @@ -367,11 +367,11 @@ static int create_missing_local_databases(struct ctdb_context *ctdb, struct ctdb /* ok so we need to create this database and rebuild dbmap */ - ctdb_ctrl_getdbname(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, + ctdb_ctrl_getdbname(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, remote_dbmap->dbids[db], mem_ctx, &name); if (ret != 0) { DEBUG(0, (__location__ " Unable to get dbname from node %u\n", - nodemap->nodes[j].vnn)); + nodemap->nodes[j].pnn)); return -1; } ctdb_ctrl_createdb(ctdb, CONTROL_TIMEOUT(), vnn, mem_ctx, name); @@ -405,18 +405,18 @@ static int pull_all_remote_databases(struct ctdb_context *ctdb, struct ctdb_node for (i=0;inum;i++) { for (j=0; jnum; j++) { /* we dont need to merge with ourselves */ - if (nodemap->nodes[j].vnn == vnn) { + if (nodemap->nodes[j].pnn == vnn) { continue; } /* dont merge from nodes that are unavailable */ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { continue; } - ret = ctdb_ctrl_copydb(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, + ret = ctdb_ctrl_copydb(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, vnn, dbmap->dbids[i], CTDB_LMASTER_ANY, mem_ctx); if (ret != 0) { DEBUG(0, (__location__ " Unable to copy db from node %u to node %u\n", - nodemap->nodes[j].vnn, vnn)); + nodemap->nodes[j].pnn, vnn)); return -1; } } @@ -441,9 +441,9 @@ static int update_dmaster_on_all_databases(struct ctdb_context *ctdb, struct ctd if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { continue; } - ret = ctdb_ctrl_setdmaster(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, ctdb, dbmap->dbids[i], vnn); + ret = ctdb_ctrl_setdmaster(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, ctdb, dbmap->dbids[i], vnn); if (ret != 0) { - DEBUG(0, (__location__ " Unable to set dmaster for node %u db:0x%08x\n", nodemap->nodes[j].vnn, dbmap->dbids[i])); + DEBUG(0, (__location__ " Unable to set dmaster for node %u db:0x%08x\n", nodemap->nodes[j].pnn, dbmap->dbids[i])); return -1; } } @@ -463,7 +463,7 @@ static int update_flags_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_node struct ctdb_node_flag_change c; TDB_DATA data; - c.vnn = nodemap->nodes[i].vnn; + c.vnn = nodemap->nodes[i].pnn; c.old_flags = nodemap->nodes[i].flags; c.new_flags = nodemap->nodes[i].flags; @@ -496,11 +496,11 @@ static int vacuum_db(struct ctdb_context *ctdb, uint32_t db_id, struct ctdb_node if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) { continue; } - ret = ctdb_ctrl_set_rsn_nonempty(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[i].vnn, + ret = ctdb_ctrl_set_rsn_nonempty(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[i].pnn, db_id, max_rsn+1); if (ret != 0) { DEBUG(0,(__location__ " Failed to set rsn on node %u to %llu\n", - nodemap->nodes[i].vnn, (unsigned long long)max_rsn+1)); + nodemap->nodes[i].pnn, (unsigned long long)max_rsn+1)); return -1; } } @@ -510,11 +510,11 @@ static int vacuum_db(struct ctdb_context *ctdb, uint32_t db_id, struct ctdb_node if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) { continue; } - ret = ctdb_ctrl_delete_low_rsn(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[i].vnn, + ret = ctdb_ctrl_delete_low_rsn(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[i].pnn, db_id, max_rsn+1); if (ret != 0) { DEBUG(0,(__location__ " Failed to delete records on node %u with rsn below %llu\n", - nodemap->nodes[i].vnn, (unsigned long long)max_rsn+1)); + nodemap->nodes[i].pnn, (unsigned long long)max_rsn+1)); return -1; } } @@ -554,18 +554,18 @@ static int push_all_local_databases(struct ctdb_context *ctdb, struct ctdb_node_ for (i=0;inum;i++) { for (j=0; jnum; j++) { /* we dont need to push to ourselves */ - if (nodemap->nodes[j].vnn == vnn) { + if (nodemap->nodes[j].pnn == vnn) { continue; } /* dont push to nodes that are unavailable */ if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { continue; } - ret = ctdb_ctrl_copydb(ctdb, CONTROL_TIMEOUT(), vnn, nodemap->nodes[j].vnn, + ret = ctdb_ctrl_copydb(ctdb, CONTROL_TIMEOUT(), vnn, nodemap->nodes[j].pnn, dbmap->dbids[i], CTDB_LMASTER_ANY, mem_ctx); if (ret != 0) { DEBUG(0, (__location__ " Unable to copy db from node %u to node %u\n", - vnn, nodemap->nodes[j].vnn)); + vnn, nodemap->nodes[j].pnn)); return -1; } } @@ -590,7 +590,7 @@ static int update_vnnmap_on_all_nodes(struct ctdb_context *ctdb, struct ctdb_nod continue; } - ret = ctdb_ctrl_setvnnmap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, mem_ctx, vnnmap); + ret = ctdb_ctrl_setvnnmap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, mem_ctx, vnnmap); if (ret != 0) { DEBUG(0, (__location__ " Unable to set vnnmap for node %u\n", vnn)); return -1; @@ -843,7 +843,7 @@ static int do_recovery(struct ctdb_recoverd *rec, vnnmap->map = talloc_zero_array(vnnmap, uint32_t, vnnmap->size); for (i=j=0;inum;i++) { if (!(nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE)) { - vnnmap->map[j++] = nodemap->nodes[i].vnn; + vnnmap->map[j++] = nodemap->nodes[i].pnn; } } @@ -903,7 +903,7 @@ static int do_recovery(struct ctdb_recoverd *rec, /* if enabled, tell nodes to takeover their public IPs */ - if (ctdb->takeover.enabled) { + if (ctdb->vnn_list) { ret = ctdb_takeover_run(ctdb, nodemap); if (ret != 0) { DEBUG(0, (__location__ " Unable to setup public takeover addresses\n")); @@ -1053,7 +1053,7 @@ static void unban_all_nodes(struct ctdb_context *ctdb) for (i=0;inum;i++) { if ( (!(nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED)) && (nodemap->nodes[i].flags & NODE_FLAGS_BANNED) ) { - ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[i].vnn, 0, NODE_FLAGS_BANNED); + ctdb_ctrl_modflags(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[i].pnn, 0, NODE_FLAGS_BANNED); } } @@ -1166,7 +1166,7 @@ static void monitor_handler(struct ctdb_context *ctdb, uint64_t srvid, ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, tmp_ctx, &nodemap); for (i=0;inum;i++) { - if (nodemap->nodes[i].vnn == c->vnn) break; + if (nodemap->nodes[i].pnn == c->vnn) break; } if (i == nodemap->num) { @@ -1203,7 +1203,7 @@ static void monitor_handler(struct ctdb_context *ctdb, uint64_t srvid, if (ret == 0 && ctdb->recovery_master == ctdb->vnn && ctdb->recovery_mode == CTDB_RECOVERY_NORMAL && - ctdb->takeover.enabled) { + ctdb->vnn_list) { /* Only do the takeover run if the perm disabled or unhealthy flags changed since these will cause an ip failover but not a recovery. @@ -1284,7 +1284,7 @@ static enum monitor_result verify_recmode(struct ctdb_context *ctdb, struct ctdb } state = ctdb_ctrl_getrecmode_send(ctdb, mem_ctx, CONTROL_TIMEOUT(), - nodemap->nodes[j].vnn); + nodemap->nodes[j].pnn); if (state == NULL) { /* we failed to send the control, treat this as an error and try again next iteration @@ -1375,7 +1375,7 @@ static enum monitor_result verify_recmaster(struct ctdb_context *ctdb, struct ct } state = ctdb_ctrl_getrecmaster_send(ctdb, mem_ctx, CONTROL_TIMEOUT(), - nodemap->nodes[j].vnn); + nodemap->nodes[j].pnn); if (state == NULL) { /* we failed to send the control, treat this as an error and try again next iteration @@ -1491,7 +1491,7 @@ again: /* count how many active nodes there are */ num_active = 0; for (i=0; inum; i++) { - if (rec->banned_nodes[nodemap->nodes[i].vnn] != NULL) { + if (rec->banned_nodes[nodemap->nodes[i].pnn] != NULL) { nodemap->nodes[i].flags |= NODE_FLAGS_BANNED; } else { nodemap->nodes[i].flags &= ~NODE_FLAGS_BANNED; @@ -1517,7 +1517,7 @@ again: /* verify that the recmaster node is still active */ for (j=0; jnum; j++) { - if (nodemap->nodes[j].vnn==recmaster) { + if (nodemap->nodes[j].pnn==recmaster) { break; } } @@ -1529,7 +1529,7 @@ again: } if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { - DEBUG(0, ("Recmaster node %u no longer available. Force reelection\n", nodemap->nodes[j].vnn)); + DEBUG(0, ("Recmaster node %u no longer available. Force reelection\n", nodemap->nodes[j].pnn)); force_election(rec, mem_ctx, vnn, nodemap); goto again; } @@ -1563,7 +1563,7 @@ again: */ switch (verify_recmode(ctdb, nodemap)) { case MONITOR_RECOVERY_NEEDED: - do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].vnn); + do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].pnn); goto again; case MONITOR_FAILED: goto again; @@ -1582,15 +1582,15 @@ again: if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { continue; } - if (nodemap->nodes[j].vnn == vnn) { + if (nodemap->nodes[j].pnn == vnn) { continue; } - ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, + ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, mem_ctx, &remote_nodemap); if (ret != 0) { DEBUG(0, (__location__ " Unable to get nodemap from remote node %u\n", - nodemap->nodes[j].vnn)); + nodemap->nodes[j].pnn)); goto again; } @@ -1599,8 +1599,8 @@ again: */ if (remote_nodemap->num != nodemap->num) { DEBUG(0, (__location__ " Remote node:%u has different node count. %u vs %u of the local node\n", - nodemap->nodes[j].vnn, remote_nodemap->num, nodemap->num)); - do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].vnn); + nodemap->nodes[j].pnn, remote_nodemap->num, nodemap->num)); + do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].pnn); goto again; } @@ -1608,21 +1608,21 @@ again: active, then that is also a good reason to do recovery */ for (i=0;inum;i++) { - if (remote_nodemap->nodes[i].vnn != nodemap->nodes[i].vnn) { + if (remote_nodemap->nodes[i].pnn != nodemap->nodes[i].pnn) { DEBUG(0, (__location__ " Remote node:%u has different nodemap vnn for %d (%u vs %u).\n", - nodemap->nodes[j].vnn, i, - remote_nodemap->nodes[i].vnn, nodemap->nodes[i].vnn)); + nodemap->nodes[j].pnn, i, + remote_nodemap->nodes[i].pnn, nodemap->nodes[i].pnn)); do_recovery(rec, mem_ctx, vnn, num_active, nodemap, - vnnmap, nodemap->nodes[j].vnn); + vnnmap, nodemap->nodes[j].pnn); goto again; } if ((remote_nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) != (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE)) { DEBUG(0, (__location__ " Remote node:%u has different nodemap flag for %d (0x%x vs 0x%x)\n", - nodemap->nodes[j].vnn, i, + nodemap->nodes[j].pnn, i, remote_nodemap->nodes[i].flags, nodemap->nodes[i].flags)); do_recovery(rec, mem_ctx, vnn, num_active, nodemap, - vnnmap, nodemap->nodes[j].vnn); + vnnmap, nodemap->nodes[j].pnn); goto again; } } @@ -1656,19 +1656,19 @@ again: if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { continue; } - if (nodemap->nodes[j].vnn == vnn) { + if (nodemap->nodes[j].pnn == vnn) { continue; } for (i=0; isize; i++) { - if (vnnmap->map[i] == nodemap->nodes[j].vnn) { + if (vnnmap->map[i] == nodemap->nodes[j].pnn) { break; } } if (i == vnnmap->size) { DEBUG(0, (__location__ " Node %u is active in the nodemap but did not exist in the vnnmap\n", - nodemap->nodes[j].vnn)); - do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].vnn); + nodemap->nodes[j].pnn)); + do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].pnn); goto again; } } @@ -1681,31 +1681,31 @@ again: if (nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { continue; } - if (nodemap->nodes[j].vnn == vnn) { + if (nodemap->nodes[j].pnn == vnn) { continue; } - ret = ctdb_ctrl_getvnnmap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].vnn, + ret = ctdb_ctrl_getvnnmap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, mem_ctx, &remote_vnnmap); if (ret != 0) { DEBUG(0, (__location__ " Unable to get vnnmap from remote node %u\n", - nodemap->nodes[j].vnn)); + nodemap->nodes[j].pnn)); goto again; } /* verify the vnnmap generation is the same */ if (vnnmap->generation != remote_vnnmap->generation) { DEBUG(0, (__location__ " Remote node %u has different generation of vnnmap. %u vs %u (ours)\n", - nodemap->nodes[j].vnn, remote_vnnmap->generation, vnnmap->generation)); - do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].vnn); + nodemap->nodes[j].pnn, remote_vnnmap->generation, vnnmap->generation)); + do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].pnn); goto again; } /* verify the vnnmap size is the same */ if (vnnmap->size != remote_vnnmap->size) { DEBUG(0, (__location__ " Remote node %u has different size of vnnmap. %u vs %u (ours)\n", - nodemap->nodes[j].vnn, remote_vnnmap->size, vnnmap->size)); - do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].vnn); + nodemap->nodes[j].pnn, remote_vnnmap->size, vnnmap->size)); + do_recovery(rec, mem_ctx, vnn, num_active, nodemap, vnnmap, nodemap->nodes[j].pnn); goto again; } @@ -1713,16 +1713,16 @@ again: for (i=0;isize;i++) { if (remote_vnnmap->map[i] != vnnmap->map[i]) { DEBUG(0, (__location__ " Remote node %u has different vnnmap.\n", - nodemap->nodes[j].vnn)); + nodemap->nodes[j].pnn)); do_recovery(rec, mem_ctx, vnn, num_active, nodemap, - vnnmap, nodemap->nodes[j].vnn); + vnnmap, nodemap->nodes[j].pnn); goto again; } } } /* we might need to change who has what IP assigned */ - if (need_takeover_run && ctdb->takeover.enabled) { + if (need_takeover_run && ctdb->vnn_list) { ret = ctdb_takeover_run(ctdb, nodemap); if (ret != 0) { DEBUG(0, (__location__ " Unable to setup public takeover addresses\n")); diff --git a/ctdb/server/ctdb_server.c b/ctdb/server/ctdb_server.c index 25e2d221160..240e71a257b 100644 --- a/ctdb/server/ctdb_server.c +++ b/ctdb/server/ctdb_server.c @@ -105,14 +105,14 @@ static int ctdb_add_node(struct ctdb_context *ctdb, char *nstr) node->address.address, node->address.port); /* this assumes that the nodes are kept in sorted order, and no gaps */ - node->vnn = ctdb->num_nodes; + node->pnn = ctdb->num_nodes; /* nodes start out disconnected */ node->flags |= NODE_FLAGS_DISCONNECTED; if (ctdb->address.address && ctdb_same_address(&ctdb->address, &node->address)) { - ctdb->vnn = node->vnn; + ctdb->vnn = node->pnn; node->flags &= ~NODE_FLAGS_DISCONNECTED; } @@ -401,7 +401,7 @@ static void ctdb_broadcast_packet_all(struct ctdb_context *ctdb, { int i; for (i=0;inum_nodes;i++) { - hdr->destnode = ctdb->nodes[i]->vnn; + hdr->destnode = ctdb->nodes[i]->pnn; ctdb_queue_packet(ctdb, hdr); } } @@ -428,7 +428,7 @@ static void ctdb_broadcast_packet_connected(struct ctdb_context *ctdb, int i; for (i=0;inum_nodes;i++) { if (!(ctdb->nodes[i]->flags & NODE_FLAGS_DISCONNECTED)) { - hdr->destnode = ctdb->nodes[i]->vnn; + hdr->destnode = ctdb->nodes[i]->pnn; ctdb_queue_packet(ctdb, hdr); } } diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index 89787caf1be..d042ba75b3a 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -38,6 +38,7 @@ struct ctdb_takeover_arp { uint32_t count; struct sockaddr_in sin; struct ctdb_tcp_array *tcparray; + struct ctdb_vnn *vnn; }; @@ -72,7 +73,7 @@ static void ctdb_control_send_arp(struct event_context *ev, struct timed_event * struct ctdb_tcp_array *tcparray; - ret = ctdb_sys_send_arp(&arp->sin, arp->ctdb->takeover.interface); + ret = ctdb_sys_send_arp(&arp->sin, arp->vnn->vnn_list->iface); if (ret != 0) { DEBUG(0,(__location__ " sending of arp failed (%s)\n", strerror(errno))); } @@ -107,7 +108,7 @@ static void ctdb_control_send_arp(struct event_context *ev, struct timed_event * return; } - event_add_timed(arp->ctdb->ev, arp->ctdb->takeover.last_ctx, + event_add_timed(arp->ctdb->ev, arp->ctdb->takeover_ctx, timeval_current_ofs(CTDB_ARP_INTERVAL, 0), ctdb_control_send_arp, arp); } @@ -115,7 +116,7 @@ static void ctdb_control_send_arp(struct event_context *ev, struct timed_event * struct takeover_callback_state { struct ctdb_req_control *c; struct sockaddr_in *sin; - struct ctdb_node *node; + struct ctdb_vnn *vnn; }; /* @@ -134,34 +135,35 @@ static void takeover_ip_callback(struct ctdb_context *ctdb, int status, if (status != 0) { DEBUG(0,(__location__ " Failed to takeover IP %s on interface %s\n", - ip, ctdb->takeover.interface)); + ip, state->vnn->vnn_list->iface)); ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL); talloc_free(state); return; } - if (!ctdb->takeover.last_ctx) { - ctdb->takeover.last_ctx = talloc_new(ctdb); - if (!ctdb->takeover.last_ctx) goto failed; + if (!ctdb->takeover_ctx) { + ctdb->takeover_ctx = talloc_new(ctdb); + if (!ctdb->takeover_ctx) goto failed; } - arp = talloc_zero(ctdb->takeover.last_ctx, struct ctdb_takeover_arp); + arp = talloc_zero(ctdb->takeover_ctx, struct ctdb_takeover_arp); if (!arp) goto failed; arp->ctdb = ctdb; arp->sin = *state->sin; + arp->vnn = state->vnn; - tcparray = state->node->tcp_array; + tcparray = state->vnn->tcp_array; if (tcparray) { /* add all of the known tcp connections for this IP to the list of tcp connections to send tickle acks for */ arp->tcparray = talloc_steal(arp, tcparray); - state->node->tcp_array = NULL; - state->node->tcp_update_needed = true; + state->vnn->tcp_array = NULL; + state->vnn->tcp_update_needed = true; } - event_add_timed(arp->ctdb->ev, arp->ctdb->takeover.last_ctx, + event_add_timed(arp->ctdb->ev, arp->ctdb->takeover_ctx, timeval_zero(), ctdb_control_send_arp, arp); /* the control succeeded */ @@ -179,19 +181,19 @@ failed: Find the vnn of the node that has a public ip address returns -1 if the address is not known as a public address */ -static int32_t find_public_ip_vnn(struct ctdb_context *ctdb, char *ip) +static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, char *ip) { - int32_t vnn = -1; - int i; + struct ctdb_vnn_list *vnn_list; + struct ctdb_vnn *vnn; - for (i=0;inum_nodes;i++) { - if (ctdb->nodes[i]->public_address && !strcmp(ip, ctdb->nodes[i]->public_address)) { - vnn = i; - break; + for (vnn_list=ctdb->vnn_list;vnn_list;vnn_list=vnn_list->next) { + for (vnn=vnn_list->vnn;vnn;vnn=vnn->next) { + if (!strcmp(vnn->public_address, ip)) { + return vnn; + } } } - - return vnn; + return NULL; } @@ -207,10 +209,16 @@ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, struct takeover_callback_state *state; struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr; char *ip = inet_ntoa(pip->sin.sin_addr); - struct ctdb_node *node = ctdb->nodes[pip->vnn]; + struct ctdb_vnn *vnn; - /* update out node table */ - node->takeover_vnn = pip->takeover_vnn; + /* update out vnn list */ + vnn->pnn = pip->pnn; + vnn = find_public_ip_vnn(ctdb, ip); + if (vnn == NULL) { + DEBUG(0,("takeoverip called for an ip '%s' that is not a public address\n", ip)); + return 0; + } + vnn->pnn = pip->pnn; /* if our kernel already has this IP, do nothing */ if (ctdb_sys_have_ip(ip)) { @@ -225,11 +233,11 @@ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, CTDB_NO_MEMORY(ctdb, state->sin); *state->sin = pip->sin; - state->node = node; + state->vnn = vnn; DEBUG(0,("Takover of IP %s/%u on interface %s\n", - ip, ctdb->nodes[ctdb->vnn]->public_netmask_bits, - ctdb->takeover.interface)); + ip, vnn->public_netmask_bits, + vnn->vnn_list->iface)); ctdb_stop_monitoring(ctdb); @@ -237,12 +245,12 @@ int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, timeval_current_ofs(ctdb->tunable.script_timeout, 0), state, takeover_ip_callback, state, "takeip %s %s %u", - ctdb->takeover.interface, + vnn->vnn_list->iface, ip, - ctdb->nodes[ctdb->vnn]->public_netmask_bits); + vnn->public_netmask_bits); if (ret != 0) { DEBUG(0,(__location__ " Failed to takeover IP %s on interface %s\n", - ip, ctdb->takeover.interface)); + ip, vnn->vnn_list->iface)); talloc_free(state); return -1; } @@ -316,22 +324,27 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, struct takeover_callback_state *state; struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr; char *ip = inet_ntoa(pip->sin.sin_addr); - struct ctdb_node *node = ctdb->nodes[pip->vnn]; + struct ctdb_vnn *vnn; - /* update out node table */ - ctdb->nodes[pip->vnn]->takeover_vnn = pip->takeover_vnn; + /* update our vnn list */ + vnn = find_public_ip_vnn(ctdb, ip); + if (vnn == NULL) { + DEBUG(0,("releaseip called for an ip '%s' that is not a public address\n", ip)); + return 0; + } + vnn->pnn = pip->pnn; if (!ctdb_sys_have_ip(ip)) { return 0; } DEBUG(0,("Release of IP %s/%u on interface %s\n", - ip, ctdb->nodes[ctdb->vnn]->public_netmask_bits, - ctdb->takeover.interface)); + ip, vnn->public_netmask_bits, + vnn->vnn_list->iface)); /* stop any previous arps */ - talloc_free(ctdb->takeover.last_ctx); - ctdb->takeover.last_ctx = NULL; + talloc_free(ctdb->takeover_ctx); + ctdb->takeover_ctx = NULL; state = talloc(ctdb, struct takeover_callback_state); CTDB_NO_MEMORY(ctdb, state); @@ -341,7 +354,7 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, CTDB_NO_MEMORY(ctdb, state->sin); *state->sin = pip->sin; - state->node = node; + state->vnn = vnn; ctdb_stop_monitoring(ctdb); @@ -349,12 +362,12 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, timeval_current_ofs(ctdb->tunable.script_timeout, 0), state, release_ip_callback, state, "releaseip %s %s %u", - ctdb->takeover.interface, + vnn->vnn_list->iface, ip, - ctdb->nodes[ctdb->vnn]->public_netmask_bits); + vnn->public_netmask_bits); if (ret != 0) { DEBUG(0,(__location__ " Failed to release IP %s on interface %s\n", - ip, ctdb->takeover.interface)); + ip, vnn->vnn_list->iface)); talloc_free(state); return -1; } @@ -366,18 +379,75 @@ int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, } + +static int add_public_address(struct ctdb_context *ctdb, int ip0, int ip1, int ip2, int ip3, int nm, char *iface) +{ + struct ctdb_vnn *vnn; + struct ctdb_vnn_list *vnn_list; + const char *public_address; + + /* first find the entry for this interface if we have one */ + for (vnn_list=ctdb->vnn_list;vnn_list;vnn_list=vnn_list->next) { + if (!strcmp(iface, vnn_list->iface)) { + break; + } + } + + /* If we dont have a vnn_list for this interface, we must create one */ + if (vnn_list == NULL) { + vnn_list = talloc_zero(ctdb, struct ctdb_vnn_list); + CTDB_NO_MEMORY_FATAL(ctdb, vnn_list); + vnn_list->iface = talloc_strdup(vnn_list, iface); + vnn_list->next = ctdb->vnn_list; + ctdb->vnn_list = vnn_list; + } + + + /* Verify that we dont have an entry for this ip yet */ + public_address = talloc_asprintf(vnn_list, "%d.%d.%d.%d", ip0, ip1, ip2, ip3); + CTDB_NO_MEMORY_FATAL(ctdb, public_address); + for (vnn=vnn_list->vnn;vnn;vnn=vnn->next) { + if (!strcmp(public_address, vnn->public_address)) { + DEBUG(0,("Same ip '%s' specified multiple times in the public address list \n", public_address)); + exit(1); + } + } + + + /* make sure the netmask is ok */ + if (nm > 32) { + DEBUG(0, ("Illegal netmask for IP %s\n", public_address)); + return -1; + } + + /* create a new vnn structure for this ip address */ + vnn = talloc_zero(vnn_list, struct ctdb_vnn); + CTDB_NO_MEMORY_FATAL(ctdb, vnn); + vnn->vnn_list = vnn_list; + vnn->public_address = talloc_steal(vnn, public_address); + vnn->public_netmask_bits = nm; + vnn->pnn = -1; + + vnn->next = vnn_list->vnn; + vnn_list->vnn = vnn; + + vnn_list->num_ips++; + return 0; +} + + /* setup the event script directory */ int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir) { - ctdb->takeover.event_script_dir = talloc_strdup(ctdb, script_dir); - CTDB_NO_MEMORY(ctdb, ctdb->takeover.event_script_dir); + ctdb->event_script_dir = talloc_strdup(ctdb, script_dir); + CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir); return 0; } /* - setup the public address list from a file + setup the public address lists from a file */ int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist) { @@ -394,37 +464,19 @@ int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist) nlines--; } - if (nlines != ctdb->num_nodes) { - DEBUG(0,("Number of lines in %s does not match number of nodes!\n", alist)); - talloc_free(lines); - return -1; - } - for (i=0;inodes[i]->public_address = talloc_strdup(ctdb->nodes[i], lines[i]); - CTDB_NO_MEMORY(ctdb, ctdb->nodes[i]->public_address); - ctdb->nodes[i]->takeover_vnn = -1; - - /* see if they supplied a netmask length */ - p = strchr(ctdb->nodes[i]->public_address, '/'); - if (!p) { - DEBUG(0,("You must supply a netmask for public address %s\n", - ctdb->nodes[i]->public_address)); - return -1; - } - *p = 0; - ctdb->nodes[i]->public_netmask_bits = atoi(p+1); - - if (ctdb->nodes[i]->public_netmask_bits > 32) { - DEBUG(0, ("Illegal netmask for IP %s\n", ctdb->nodes[i]->public_address)); + if (sscanf(lines[i], "%d.%d.%d.%d/%d %255s", &ip0, &ip1, &ip2, &ip3, &nm, iface) != 6) { + DEBUG(0,("Badly formed line '%s' in public address list\n", lines[i])); + talloc_free(lines); return -1; } - if (inet_aton(ctdb->nodes[i]->public_address, &in) == 0) { - DEBUG(0,("Badly formed IP '%s' in public address list\n", ctdb->nodes[i]->public_address)); + if (add_public_address(ctdb, ip0, ip1, ip2, ip3, nm, iface)) { + DEBUG(0,("Failed to add '%s' to the public address list\n", lines[i])); + talloc_free(lines); return -1; } } @@ -433,131 +485,187 @@ int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist) return 0; } -/* - see if two IPs are on the same subnet - */ -static bool ctdb_same_subnet(const char *ip1, const char *ip2, uint8_t netmask_bits) + +/* Given a physical node and an interface, return the number of + public addresses that is currently assigned to this node/interface. +*/ +static int node_ip_coverage(struct ctdb_context *ctdb, + int32_t pnn, struct ctdb_vnn_list *vnn_list) { - struct in_addr in1, in2; - uint32_t mask; + int num=0; + struct ctdb_vnn *vnn; - inet_aton(ip1, &in1); - inet_aton(ip2, &in2); - - mask = ~((1LL<<(32-netmask_bits))-1); - - if ((ntohl(in1.s_addr) & mask) != (ntohl(in2.s_addr) & mask)) { - return false; - } - - return true; -} - - -/* - try to find an available node to take a given nodes IP that meets the - criterion given by the flags - */ -static void ctdb_takeover_find_node(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap, - int node, uint32_t mask_flags) -{ - static int start_node=0; - int j; - - /* If we add facilities to add/remove nodes to a cluster at runtime - we must make sure that start_node is suddently not beyond the - end of the nodelist - */ - if (start_node >= nodemap->num) { - start_node = 0; - } - - j=start_node; - while (1) { - if (!(nodemap->nodes[j].flags & mask_flags) && - ctdb_same_subnet(ctdb->nodes[j]->public_address, - ctdb->nodes[node]->public_address, - ctdb->nodes[j]->public_netmask_bits)) { - ctdb->nodes[node]->takeover_vnn = nodemap->nodes[j].vnn; - /* We found a node to take over - also update the startnode so that we start at a - different node next time we are called. - */ - start_node = (j+1)%nodemap->num;; - return; - } - - /* Try the next node */ - j=(j+1)%nodemap->num; - - /* We tried all the nodes and got back to where we started, - there is no node that can take over - */ - if (j == start_node) { - break; + for (vnn=vnn_list->vnn;vnn;vnn=vnn->next) { + if (vnn->pnn == pnn) { + num++; } } - - /* No takeover node found */ - return; + return num; } +/* search the vnn list for a node to takeover vnn. + pick the node that currently are serving the least number of vnns for + this interface so that the vnns get spread out evenly. +*/ +static int find_takeover_node(struct ctdb_context *ctdb, + struct ctdb_node_map *nodemap, uint32_t mask, + struct ctdb_vnn_list *vnn_list, struct ctdb_vnn *vnn) +{ + int pnn, min, num; + int i; + + pnn = -1; + for (i=0;inum;i++) { + if (nodemap->nodes[i].flags & mask) { + /* This node is not healty and can not be used to serve + a public address + */ + continue; + } + + num = node_ip_coverage(ctdb, i, vnn_list); + /* was this the first node we checked ? */ + if (pnn == -1) { + pnn = i; + min = num; + } else { + if (num < min) { + pnn = i; + min = num; + } + } + } + if (pnn == -1) { + DEBUG(0,(__location__ " Could not find node to take over public address '%s'\n", vnn->public_address)); + return -1; + } + + vnn->pnn = pnn; + return 0; +} /* make any IP alias changes for public addresses that are necessary */ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap) { - int i, j; + int i, num_healthy; int ret; struct ctdb_public_ip ip; + uint32_t mask; + struct ctdb_vnn_list *vnn_list; + struct ctdb_vnn *vnn; + int maxnode, maxnum, minnode, minnum, num; ZERO_STRUCT(ip); - /* Work out which node will look after each public IP. - * takeover_node cycles over the nodes and is incremented each time a - * node has been assigned to take over for another node. - * This spreads the failed nodes out across the remaining - * nodes more evenly - */ + /* Count how many completely healthy nodes we have */ + num_healthy = 0; for (i=0;inum;i++) { if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) { - ctdb->nodes[i]->takeover_vnn = nodemap->nodes[i].vnn; - } else { - uint32_t takeover_vnn; + num_healthy++; + } + } - /* If this public address has already been taken over - by a node and that node is still healthy, then - leave the public address at that node. - */ - takeover_vnn = ctdb->nodes[i]->takeover_vnn; - if ( ctdb_validate_vnn(ctdb, takeover_vnn) - && (!(nodemap->nodes[takeover_vnn].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) ) { + if (num_healthy > 0) { + /* We have healthy nodes, so only consider them for + serving public addresses + */ + mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED; + } else { + /* We didnt have any completely healthy nodes so + use "disabled" nodes as a fallback + */ + mask = NODE_FLAGS_INACTIVE; + } + + + /* mark all public addresses with a masked node as being served by + node -1 + */ + for (vnn_list=ctdb->vnn_list;vnn_list;vnn_list=vnn_list->next) { + for (vnn=vnn_list->vnn;vnn;vnn=vnn->next) { + if (vnn->pnn == -1) { continue; } - - - ctdb->nodes[i]->takeover_vnn = (uint32_t)-1; - - ctdb_takeover_find_node(ctdb, nodemap, i, NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED); - - /* if no enabled node can take it, then we - might as well use any active node. It - probably means that some subsystem (such as - NFS) is sick on all nodes. Best we can do - is to keep the other services up. */ - if (ctdb->nodes[i]->takeover_vnn == (uint32_t)-1) { - ctdb_takeover_find_node(ctdb, nodemap, i, NODE_FLAGS_INACTIVE); - } - - if (ctdb->nodes[i]->takeover_vnn == (uint32_t)-1) { - DEBUG(0,(__location__ " No node available on same network to take %s\n", - ctdb->nodes[i]->public_address)); + if (nodemap->nodes[vnn->pnn].flags & mask) { + vnn->pnn = -1; } } - } + } - /* at this point ctdb->nodes[i]->takeover_vnn is the vnn which will own each IP */ + + /* now we must redistribute all public addresses with takeover node + -1 among the nodes available + */ + for (vnn_list=ctdb->vnn_list;vnn_list;vnn_list=vnn_list->next) { +try_again: + /* loop over all vnn's and find a physical node to cover for + each unassigned vnn. + */ + for (vnn=vnn_list->vnn;vnn;vnn=vnn->next) { + if (vnn->pnn == -1) { + if (find_takeover_node(ctdb, nodemap, mask, vnn_list, vnn)) { + DEBUG(0,("Failed to find node to cover ip %s\n", vnn->public_address)); + return -1; + } + } + } + + /* Get the highest and lowes number of vnn's a valid node + covers for this interface + */ + maxnode = -1; + minnode = -1; + for (i=0;inum;i++) { + if (nodemap->nodes[i].flags & mask) { + continue; + } + num = node_ip_coverage(ctdb, i, vnn_list); + if (maxnode == -1) { + maxnode = i; + maxnum = num; + } else { + if (num > maxnum) { + maxnode = i; + maxnum = num; + } + } + if (minnode == -1) { + minnode = i; + minnum = num; + } else { + if (num < minnum) { + minnode = i; + minnum = num; + } + } + } + if (maxnode == -1) { + DEBUG(0,(__location__ " Could not find maxnode\n")); + return -1; + } + + /* if the spread between the smallest and largest coverage by + a node is >=2 we steal one of the ips from the node with the + most coverage to even things out a bit + */ + if (maxnum > minnum+1) { + /* mark one of maxnode's vnn's as unassigned and try + again + */ + for (vnn=vnn_list->vnn;vnn;vnn=vnn->next) { + if (vnn->pnn == maxnode) { + vnn->pnn = -1; + goto try_again; + } + } + } + } + + + + /* at this point ->pnn is the node which will own each IP */ /* now tell all nodes to delete any alias that they should not have. This will be a NOOP on nodes that don't currently @@ -568,21 +676,26 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap) continue; } - /* tell this node to delete all of the aliases that it should not have */ - for (j=0;jnum;j++) { - if (ctdb->nodes[j]->takeover_vnn != nodemap->nodes[i].vnn) { - ip.vnn = j; - ip.takeover_vnn = ctdb->nodes[j]->takeover_vnn; + for (vnn_list=ctdb->vnn_list;vnn_list;vnn_list=vnn_list->next) { + for (vnn=vnn_list->vnn;vnn;vnn=vnn->next) { + if (vnn->pnn == nodemap->nodes[i].pnn) { + /* This node should be serving this + vnn so dont tell it to release the ip + */ + continue; + } + + ip.pnn = vnn->pnn; ip.sin.sin_family = AF_INET; - inet_aton(ctdb->nodes[j]->public_address, &ip.sin.sin_addr); + inet_aton(vnn->public_address, &ip.sin.sin_addr); ret = ctdb_ctrl_release_ip(ctdb, TAKEOVER_TIMEOUT(), - nodemap->nodes[i].vnn, + nodemap->nodes[i].pnn, &ip); if (ret != 0) { DEBUG(0,("Failed to tell vnn %u to release IP %s\n", - nodemap->nodes[i].vnn, - ctdb->nodes[j]->public_address)); + nodemap->nodes[i].pnn, + vnn->public_address)); return -1; } } @@ -590,24 +703,25 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap) } /* tell all nodes to get their own IPs */ - for (i=0;inum;i++) { - if (ctdb->nodes[i]->takeover_vnn == -1) { - /* this IP won't be taken over */ - continue; - } - ip.vnn = i; - ip.takeover_vnn = ctdb->nodes[i]->takeover_vnn; - ip.sin.sin_family = AF_INET; - inet_aton(ctdb->nodes[i]->public_address, &ip.sin.sin_addr); + for (vnn_list=ctdb->vnn_list;vnn_list;vnn_list=vnn_list->next) { + for (vnn=vnn_list->vnn;vnn;vnn=vnn->next) { + if (vnn->pnn == -1) { + /* this IP won't be taken over */ + continue; + } + ip.pnn = vnn->pnn; + ip.sin.sin_family = AF_INET; + inet_aton(vnn->public_address, &ip.sin.sin_addr); - ret = ctdb_ctrl_takeover_ip(ctdb, TAKEOVER_TIMEOUT(), - ctdb->nodes[i]->takeover_vnn, + ret = ctdb_ctrl_takeover_ip(ctdb, TAKEOVER_TIMEOUT(), + vnn->pnn, &ip); - if (ret != 0) { - DEBUG(0,("Failed asking vnn %u to take over IP %s\n", - ctdb->nodes[i]->takeover_vnn, - ctdb->nodes[i]->public_address)); - return -1; + if (ret != 0) { + DEBUG(0,("Failed asking vnn %u to take over IP %s\n", + vnn->pnn, + vnn->public_address)); + return -1; + } } } @@ -639,12 +753,12 @@ int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id, TDB_DATA data; struct ctdb_client_ip *ip; char *addr; - int32_t takeover_vnn; + struct ctdb_vnn *vnn; addr = inet_ntoa(p->dest.sin_addr); - takeover_vnn = find_public_ip_vnn(ctdb, addr); - if (takeover_vnn == -1) { + vnn = find_public_ip_vnn(ctdb, addr); + if (vnn == NULL) { DEBUG(3,("Could not add client IP %s. This is not a public address.\n", addr)); return 0; } @@ -666,7 +780,6 @@ int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id, DLIST_ADD(client->tcp_list, tcp); - t.vnn = takeover_vnn; t.src = p->src; t.dest = p->dest; @@ -731,8 +844,18 @@ int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata) struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr; struct ctdb_tcp_array *tcparray; struct ctdb_tcp_connection tcp; + struct ctdb_vnn *vnn; + char *addr; - tcparray = ctdb->nodes[p->vnn]->tcp_array; + addr = inet_ntoa(p->dest.sin_addr); + vnn = find_public_ip_vnn(ctdb, addr); + if (vnn == NULL) { + DEBUG(0,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n", addr)); + return-1; + } + + + tcparray = vnn->tcp_array; /* If this is the first tickle */ if (tcparray == NULL) { @@ -740,7 +863,7 @@ int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata) offsetof(struct ctdb_tcp_array, connections) + sizeof(struct ctdb_tcp_connection) * 1); CTDB_NO_MEMORY(ctdb, tcparray); - ctdb->nodes[p->vnn]->tcp_array = tcparray; + vnn->tcp_array = tcparray; tcparray->num = 0; tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection)); @@ -756,11 +879,11 @@ int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata) /* Do we already have this tickle ?*/ tcp.saddr = p->src; tcp.daddr = p->dest; - if (ctdb_tcp_find(ctdb->nodes[p->vnn]->tcp_array, &tcp) != NULL) { - DEBUG(4,("Already had tickle info for %s:%u from vnn %u\n", + if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) { + DEBUG(4,("Already had tickle info for %s:%u for vnn:%u\n", inet_ntoa(tcp.daddr.sin_addr), ntohs(tcp.daddr.sin_port), - p->vnn)); + vnn->pnn)); return 0; } @@ -770,7 +893,7 @@ int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata) tcparray->num+1); CTDB_NO_MEMORY(ctdb, tcparray->connections); - ctdb->nodes[p->vnn]->tcp_array = tcparray; + vnn->tcp_array = tcparray; tcparray->connections[tcparray->num].saddr = p->src; tcparray->connections[tcparray->num].daddr = p->dest; tcparray->num++; @@ -778,7 +901,7 @@ int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata) DEBUG(2,("Added tickle info for %s:%u from vnn %u\n", inet_ntoa(tcp.daddr.sin_addr), ntohs(tcp.daddr.sin_port), - p->vnn)); + vnn->pnn)); return 0; } @@ -792,20 +915,17 @@ int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata) static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn) { struct ctdb_tcp_connection *tcpp; - int32_t vnn = find_public_ip_vnn(ctdb, inet_ntoa(conn->daddr.sin_addr)); - struct ctdb_node *node; + struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, inet_ntoa(conn->daddr.sin_addr)); - if (vnn == -1) { + if (vnn == NULL) { DEBUG(0,(__location__ " unable to find public address %s\n", inet_ntoa(conn->daddr.sin_addr))); return; } - node = ctdb->nodes[vnn]; - /* if the array is empty we cant remove it and we dont need to do anything */ - if (node->tcp_array == NULL) { + if (vnn->tcp_array == NULL) { DEBUG(2,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n", inet_ntoa(conn->daddr.sin_addr), ntohs(conn->daddr.sin_port))); @@ -816,7 +936,7 @@ static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tc /* See if we know this connection if we dont know this connection then we dont need to do anything */ - tcpp = ctdb_tcp_find(node->tcp_array, conn); + tcpp = ctdb_tcp_find(vnn->tcp_array, conn); if (tcpp == NULL) { DEBUG(2,("Trying to remove tickle that doesnt exist %s:%u\n", inet_ntoa(conn->daddr.sin_addr), @@ -831,17 +951,17 @@ static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tc to the entry that is to be removed and just shring the ->num field */ - *tcpp = node->tcp_array->connections[node->tcp_array->num - 1]; - node->tcp_array->num--; + *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1]; + vnn->tcp_array->num--; /* If we deleted the last entry we also need to remove the entire array */ - if (node->tcp_array->num == 0) { - talloc_free(node->tcp_array); - node->tcp_array = NULL; + if (vnn->tcp_array->num == 0) { + talloc_free(vnn->tcp_array); + vnn->tcp_array = NULL; } - node->tcp_update_needed = true; + vnn->tcp_update_needed = true; DEBUG(2,("Removed tickle info for %s:%u\n", inet_ntoa(conn->saddr.sin_addr), @@ -879,22 +999,20 @@ void ctdb_takeover_client_destructor_hook(struct ctdb_client *client) */ void ctdb_release_all_ips(struct ctdb_context *ctdb) { - int i; + struct ctdb_vnn_list *vnn_list; + struct ctdb_vnn *vnn; - if (!ctdb->takeover.enabled) { - return; - } - - for (i=0;inum_nodes;i++) { - struct ctdb_node *node = ctdb->nodes[i]; - if (ctdb_sys_have_ip(node->public_address)) { - struct in_addr in; - ctdb_event_script(ctdb, "releaseip %s %s %u", - ctdb->takeover.interface, - node->public_address, - node->public_netmask_bits); - if (inet_aton(node->public_address, &in) != 0) { - release_kill_clients(ctdb, in); + for (vnn_list=ctdb->vnn_list;vnn_list;vnn_list=vnn_list->next) { + for (vnn=vnn_list->vnn;vnn;vnn=vnn->next) { + if (ctdb_sys_have_ip(vnn->public_address)) { + struct in_addr in; + ctdb_event_script(ctdb, "releaseip %s %s %u", + vnn_list->iface, + vnn->public_address, + vnn->public_netmask_bits); + if (inet_aton(vnn->public_address, &in) != 0) { + release_kill_clients(ctdb, in); + } } } } @@ -907,26 +1025,36 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb) int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, struct ctdb_req_control *c, TDB_DATA *outdata) { - int i, len; + int i, num, len; struct ctdb_all_public_ips *ips; + struct ctdb_vnn_list *vnn_list; + struct ctdb_vnn *vnn; + + /* count how many public ip structures we have */ + num = 0; + for (vnn_list=ctdb->vnn_list;vnn_list;vnn_list=vnn_list->next) { + for (vnn=vnn_list->vnn;vnn;vnn=vnn->next) { + num++; + } + } len = offsetof(struct ctdb_all_public_ips, ips) + - ctdb->num_nodes*sizeof(struct ctdb_public_ip); - + num*sizeof(struct ctdb_public_ip); ips = talloc_zero_size(outdata, len); CTDB_NO_MEMORY(ctdb, ips); outdata->dsize = len; outdata->dptr = (uint8_t *)ips; - ips->num = ctdb->num_nodes; - for(i=0;inum_nodes;i++){ - ips->ips[i].vnn = i; - ips->ips[i].takeover_vnn = ctdb->nodes[i]->takeover_vnn; - ips->ips[i].sin.sin_family = AF_INET; - if (ctdb->nodes[i]->public_address) { - inet_aton(ctdb->nodes[i]->public_address, + ips->num = num; + i = 0; + for (vnn_list=ctdb->vnn_list;vnn_list;vnn_list=vnn_list->next) { + for (vnn=vnn_list->vnn;vnn;vnn=vnn->next) { + ips->ips[i].pnn = vnn->pnn; + ips->ips[i].sin.sin_family = AF_INET; + inet_aton(vnn->public_address, &ips->ips[i].sin.sin_addr); + i++; } } @@ -940,6 +1068,7 @@ int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, that the ctdb daemon is to kill */ struct ctdb_kill_tcp { + struct ctdb_vnn_list *vnn_list; struct ctdb_context *ctdb; int capture_fd; int sending_fd; @@ -1079,7 +1208,7 @@ static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp) close(killtcp->sending_fd); killtcp->sending_fd = -1; } - killtcp->ctdb->killtcp = NULL; + killtcp->vnn_list->killtcp = NULL; return 0; } @@ -1102,8 +1231,22 @@ static void *add_killtcp_callback(void *parm, void *data) static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, struct sockaddr_in *src, struct sockaddr_in *dst) { - struct ctdb_kill_tcp *killtcp = ctdb->killtcp; + struct ctdb_kill_tcp *killtcp; struct ctdb_killtcp_con *con; + char *addr; + struct ctdb_vnn *vnn; + struct ctdb_vnn_list *vnn_list; + + addr = inet_ntoa(dst->sin_addr); + + vnn = find_public_ip_vnn(ctdb, addr); + if (vnn == NULL) { + DEBUG(0,(__location__ " Could not killtcp, '%s' is not a public address\n", addr)); + return 0; + } + + vnn_list = vnn->vnn_list; + killtcp = vnn_list->killtcp; /* If this is the first connection to kill we must allocate a new structure @@ -1112,12 +1255,13 @@ static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp); CTDB_NO_MEMORY(ctdb, killtcp); + killtcp->vnn_list = vnn_list; killtcp->ctdb = ctdb; killtcp->capture_fd = -1; killtcp->sending_fd = -1; killtcp->connections= trbt_create(killtcp, 0); - ctdb->killtcp = killtcp; + vnn_list->killtcp = killtcp; talloc_set_destructor(killtcp, ctdb_killtcp_destructor); } @@ -1153,7 +1297,7 @@ static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, If we dont have a socket to listen on yet we must create it */ if (killtcp->capture_fd == -1) { - killtcp->capture_fd = ctdb_sys_open_capture_socket(ctdb->takeover.interface, &killtcp->private_data); + killtcp->capture_fd = ctdb_sys_open_capture_socket(vnn_list->iface, &killtcp->private_data); if (killtcp->capture_fd == -1) { DEBUG(0,(__location__ " Failed to open capturing socket for killtcp\n")); goto failed; @@ -1179,8 +1323,8 @@ static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, return 0; failed: - talloc_free(ctdb->killtcp); - ctdb->killtcp = NULL; + talloc_free(vnn_list->killtcp); + vnn_list->killtcp = NULL; return -1; } @@ -1207,6 +1351,8 @@ int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA ind { struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr; struct ctdb_tcp_array *tcparray; + char *addr; + struct ctdb_vnn *vnn; /* We must at least have tickles.num or else we cant verify the size of the received data blob @@ -1226,16 +1372,17 @@ int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA ind return -1; } - /* Make sure the vnn looks sane */ - if (!ctdb_validate_vnn(ctdb, list->vnn)) { - DEBUG(0,("Bad indata in ctdb_control_set_tcp_tickle_list. Invalid vnn: %u\n", list->vnn)); - return -1; + addr = inet_ntoa(list->ip.sin_addr); + + vnn = find_public_ip_vnn(ctdb, addr); + if (vnn == NULL) { + DEBUG(0,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n", addr)); + return 1; } - /* remove any old ticklelist we might have */ - talloc_free(ctdb->nodes[list->vnn]->tcp_array); - ctdb->nodes[list->vnn]->tcp_array = NULL; + talloc_free(vnn->tcp_array); + vnn->tcp_array = NULL; tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array); CTDB_NO_MEMORY(ctdb, tcparray); @@ -1249,7 +1396,7 @@ int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA ind sizeof(struct ctdb_tcp_connection)*tcparray->num); /* We now have a new fresh tickle list array for this vnn */ - ctdb->nodes[list->vnn]->tcp_array = tcparray; + vnn->tcp_array = talloc_steal(vnn, tcparray); return 0; } @@ -1260,19 +1407,23 @@ int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA ind */ int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata) { - uint32_t vnn = *(uint32_t *)indata.dptr; + struct sockaddr_in *ip = (struct sockaddr_in *)indata.dptr; struct ctdb_control_tcp_tickle_list *list; struct ctdb_tcp_array *tcparray; int num; + char *addr; + struct ctdb_vnn *vnn; - /* Make sure the vnn looks sane */ - if (!ctdb_validate_vnn(ctdb, vnn)) { - DEBUG(0,("Bad indata in ctdb_control_get_tcp_tickle_list. Invalid vnn: %u\n", vnn)); - return -1; + addr = inet_ntoa(ip->sin_addr); + + vnn = find_public_ip_vnn(ctdb, addr); + if (vnn == NULL) { + DEBUG(0,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n", addr)); + return 1; } - tcparray = ctdb->nodes[vnn]->tcp_array; + tcparray = vnn->tcp_array; if (tcparray) { num = tcparray->num; } else { @@ -1287,7 +1438,7 @@ int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA ind CTDB_NO_MEMORY(ctdb, outdata->dptr); list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr; - list->vnn = vnn; + list->ip = *ip; list->tickles.num = num; if (num) { memcpy(&list->tickles.connections[0], tcparray->connections, @@ -1303,7 +1454,7 @@ int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA ind */ static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, - uint32_t vnn, + struct sockaddr_in *ip, struct ctdb_tcp_array *tcparray) { int ret, num; @@ -1323,7 +1474,7 @@ static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, CTDB_NO_MEMORY(ctdb, data.dptr); list = (struct ctdb_control_tcp_tickle_list *)data.dptr; - list->vnn = vnn; + list->ip = *ip; list->tickles.num = num; if (tcparray) { memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num); @@ -1351,29 +1502,34 @@ static void ctdb_update_tcp_tickles(struct event_context *ev, struct timeval t, void *private_data) { struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context); - int i, ret; + int ret; + struct ctdb_vnn_list *vnn_list; + struct ctdb_vnn *vnn; - for (i=0;inum_nodes;i++) { - struct ctdb_node *node = ctdb->nodes[i]; + for (vnn_list=ctdb->vnn_list;vnn_list;vnn_list=vnn_list->next) { + struct sockaddr_in ip; - /* we only send out updates for public addresses that we - have taken over - */ - if (ctdb->vnn != node->takeover_vnn) { - continue; - } - /* We only send out the updates if we need to */ - if (!node->tcp_update_needed) { - continue; - } + for (vnn=vnn_list->vnn;vnn;vnn=vnn->next) { + /* we only send out updates for public addresses that + we have taken over + */ + if (ctdb->vnn != vnn->pnn) { + continue; + } + /* We only send out the updates if we need to */ + if (!vnn->tcp_update_needed) { + continue; + } + inet_aton(vnn->public_address, &ip.sin_addr); + ret = ctdb_ctrl_set_tcp_tickles(ctdb, + TAKEOVER_TIMEOUT(), + CTDB_BROADCAST_CONNECTED, + &ip, + vnn->tcp_array); - ret = ctdb_ctrl_set_tcp_tickles(ctdb, - TAKEOVER_TIMEOUT(), - CTDB_BROADCAST_CONNECTED, - node->takeover_vnn, - node->tcp_array); - if (ret != 0) { - DEBUG(0,("Failed to send the tickle update for public address %s\n", node->public_address)); + if (ret != 0) { + DEBUG(0,("Failed to send the tickle update for public address %s\n", vnn->public_address)); + } } } diff --git a/ctdb/server/ctdbd.c b/ctdb/server/ctdbd.c index b4170063910..9c9cc394251 100644 --- a/ctdb/server/ctdbd.c +++ b/ctdb/server/ctdbd.c @@ -42,7 +42,6 @@ static struct { const char *transport; const char *myaddress; const char *public_address_list; - const char *public_interface; const char *event_script_dir; const char *logfile; const char *recovery_lock_file; @@ -101,7 +100,6 @@ int main(int argc, const char *argv[]) POPT_CTDB_CMDLINE { "interactive", 'i', POPT_ARG_NONE, &interactive, 0, "don't fork", NULL }, { "public-addresses", 0, POPT_ARG_STRING, &options.public_address_list, 0, "public address list file", "filename" }, - { "public-interface", 0, POPT_ARG_STRING, &options.public_interface, 0, "public interface", "interface"}, { "event-script-dir", 0, POPT_ARG_STRING, &options.event_script_dir, 0, "event script directory", "dirname" }, { "logfile", 0, POPT_ARG_STRING, &options.logfile, 0, "log file location", "filename" }, { "nlist", 0, POPT_ARG_STRING, &options.nlist, 0, "node list file", "filename" }, @@ -200,18 +198,12 @@ int main(int argc, const char *argv[]) } } - if (options.public_interface) { - ctdb->takeover.interface = talloc_strdup(ctdb, options.public_interface); - CTDB_NO_MEMORY(ctdb, ctdb->takeover.interface); - } - if (options.public_address_list) { ret = ctdb_set_public_addresses(ctdb, options.public_address_list); if (ret == -1) { DEBUG(0,("Unable to setup public address list\n")); exit(1); } - ctdb->takeover.enabled = true; } ret = ctdb_set_event_script_dir(ctdb, options.event_script_dir); diff --git a/ctdb/server/eventscript.c b/ctdb/server/eventscript.c index c7339a355a6..d82d9d7f7cd 100644 --- a/ctdb/server/eventscript.c +++ b/ctdb/server/eventscript.c @@ -75,9 +75,9 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *fmt, va_li /* the service specific event scripts */ - if (stat(ctdb->takeover.event_script_dir, &st) != 0 && + if (stat(ctdb->event_script_dir, &st) != 0 && errno == ENOENT) { - DEBUG(0,("No event script directory found at '%s'\n", ctdb->takeover.event_script_dir)); + DEBUG(0,("No event script directory found at '%s'\n", ctdb->event_script_dir)); talloc_free(tmp_ctx); return -1; } @@ -88,9 +88,9 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *fmt, va_li /* scan all directory entries and insert all valid scripts into the tree */ - dir = opendir(ctdb->takeover.event_script_dir); + dir = opendir(ctdb->event_script_dir); if (dir == NULL) { - DEBUG(0,("Failed to open event script directory '%s'\n", ctdb->takeover.event_script_dir)); + DEBUG(0,("Failed to open event script directory '%s'\n", ctdb->event_script_dir)); talloc_free(tmp_ctx); return -1; } @@ -120,7 +120,7 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *fmt, va_li } /* Make sure the event script is executable */ - str = talloc_asprintf(tree, "%s/%s", ctdb->takeover.event_script_dir, de->d_name); + str = talloc_asprintf(tree, "%s/%s", ctdb->event_script_dir, de->d_name); if (stat(str, &st) != 0) { DEBUG(0,("Could not stat event script %s. Ignoring this event script\n", str)); continue; @@ -152,7 +152,7 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *fmt, va_li CTDB_NO_MEMORY(ctdb, options); cmdstr = talloc_asprintf(tmp_ctx, "%s/%s %s", - ctdb->takeover.event_script_dir, + ctdb->event_script_dir, script, options); CTDB_NO_MEMORY(ctdb, cmdstr); diff --git a/ctdb/tcp/tcp_connect.c b/ctdb/tcp/tcp_connect.c index 2f828e57174..0b0f35abb13 100644 --- a/ctdb/tcp/tcp_connect.c +++ b/ctdb/tcp/tcp_connect.c @@ -286,7 +286,7 @@ static int ctdb_tcp_listen_automatic(struct ctdb_context *ctdb) ctdb->name = talloc_asprintf(ctdb, "%s:%u", ctdb->address.address, ctdb->address.port); - ctdb->vnn = ctdb->nodes[i]->vnn; + ctdb->vnn = ctdb->nodes[i]->pnn; ctdb->nodes[i]->flags &= ~NODE_FLAGS_DISCONNECTED; DEBUG(1,("ctdb chose network address %s:%u vnn %u\n", ctdb->address.address, diff --git a/ctdb/tools/ctdb.c b/ctdb/tools/ctdb.c index 3e56a97f6e9..be6b81e3922 100644 --- a/ctdb/tools/ctdb.c +++ b/ctdb/tools/ctdb.c @@ -233,7 +233,7 @@ static int control_status(struct ctdb_context *ctdb, int argc, const char **argv if(options.machinereadable){ printf(":Node:IP:Disonnected:Disabled:Permanently Disabled:\n"); for(i=0;inum;i++){ - printf(":%d:%s:%d:%d:%d:\n", nodemap->nodes[i].vnn, + printf(":%d:%s:%d:%d:%d:\n", nodemap->nodes[i].pnn, inet_ntoa(nodemap->nodes[i].sin.sin_addr), !!(nodemap->nodes[i].flags&NODE_FLAGS_DISCONNECTED), !!(nodemap->nodes[i].flags&NODE_FLAGS_UNHEALTHY), @@ -270,10 +270,10 @@ static int control_status(struct ctdb_context *ctdb, int argc, const char **argv flags_str = talloc_strdup(ctdb, "OK"); CTDB_NO_MEMORY_FATAL(ctdb, flags_str); } - printf("vnn:%d %-16s %s%s\n", nodemap->nodes[i].vnn, + printf("pnn:%d %-16s %s%s\n", nodemap->nodes[i].pnn, inet_ntoa(nodemap->nodes[i].sin.sin_addr), flags_str, - nodemap->nodes[i].vnn == myvnn?" (THIS NODE)":""); + nodemap->nodes[i].pnn == myvnn?" (THIS NODE)":""); talloc_free(flags_str); } @@ -315,22 +315,26 @@ static int control_status(struct ctdb_context *ctdb, int argc, const char **argv static int control_get_tickles(struct ctdb_context *ctdb, int argc, const char **argv) { struct ctdb_control_tcp_tickle_list *list; - uint32_t vnn; + struct sockaddr_in ip; int i, ret; if (argc < 1) { usage(); } - vnn = strtoul(argv[0], NULL, 0); + ip.sin_family = AF_INET; + if (inet_aton(argv[0], &ip.sin_addr) == 0) { + DEBUG(0,("Wrongly formed ip address '%s'\n", argv[0])); + return -1; + } - ret = ctdb_ctrl_get_tcp_tickles(ctdb, TIMELIMIT(), options.vnn, ctdb, vnn, &list); + ret = ctdb_ctrl_get_tcp_tickles(ctdb, TIMELIMIT(), options.vnn, ctdb, &ip, &list); if (ret == -1) { DEBUG(0, ("Unable to list tickles\n")); return -1; } - printf("Tickles for vnn:%u\n", list->vnn); + printf("Tickles for ip:%s\n", inet_ntoa(list->ip.sin_addr)); printf("Num tickles:%u\n", list->tickles.num); for (i=0;itickles.num;i++) { printf("SRC: %s:%u ", inet_ntoa(list->tickles.connections[i].saddr.sin_addr), ntohs(list->tickles.connections[i].saddr.sin_port)); @@ -540,17 +544,17 @@ static int control_ip(struct ctdb_context *ctdb, int argc, const char **argv) for(i=0;inum;i++){ printf(":%s:%d:\n", inet_ntoa(ips->ips[i].sin.sin_addr), - ips->ips[i].takeover_vnn); + ips->ips[i].pnn); } return 0; } - printf("Number of nodes:%d\n", ips->num); + printf("Number of addresses:%d\n", ips->num); for(i=0;inum;i++){ printf("%-16s %d\n", inet_ntoa(ips->ips[i].sin.sin_addr), - ips->ips[i].takeover_vnn); + ips->ips[i].pnn); } return 0; @@ -1067,7 +1071,7 @@ static const struct { { "thaw", control_thaw, true, "thaw all databases" }, { "killtcp", kill_tcp, false, "kill a tcp connection.", " " }, { "tickle", tickle_tcp, false, "send a tcp tickle ack", " " }, - { "gettickles", control_get_tickles, false, "get the list of tickles registered for this vnn", "" }, + { "gettickles", control_get_tickles, false, "get the list of tickles registered for this ip", "" }, { "regsrvid", regsrvid, false, "register a server id", " " }, { "unregsrvid", unregsrvid, false, "unregister a server id", " " }, diff --git a/ctdb/web/configuring.html b/ctdb/web/configuring.html index 219edac19b9..825605c234b 100644 --- a/ctdb/web/configuring.html +++ b/ctdb/web/configuring.html @@ -38,7 +38,6 @@ The most important options are:

  • CTDB_NODES
  • CTDB_RECOVERY_LOCK -
  • CTDB_PUBLIC_INTERFACE
  • CTDB_PUBLIC_ADDRESSES
@@ -82,34 +81,12 @@ Content of /etc/ctdb/nodes: The default for this file is /etc/ctdb/nodes. -

CTDB_PUBLIC_INTERFACE

- -This parameter is used to tell CTDB which network interface is used to -hold the public ip addresses when CTDB is used to manage IP -takeover.

- -This can be the same network interface as is used for the private -addresses in the CTDB_NODES list but it is recommended that you use a -different interface.

- -Example using eth0 for the public interface: -

-  CTDB_PUBLIC_INTERFACE=eth0
-
- -It is strongly recommended that you use CTDB with IP takeover.

- -When you use this parameter you must also specify the -CTDB_PUBLIC_ADDRESSES parameter. -

CTDB_PUBLIC_ADDRESSES

-In order to use IP takeover you must specify a file containing a list -of public IP addresses. One IP address for each node.

+This file specifies a list of public ip addresses which the cluster will +serve. This file must be the same on all nodes.

-This file contains a list of public cluster addresses.

- These are the addresses that the SMBD daemons and other services will bind to and which clients will use to connect to the cluster. This file must contain one address for each node, i.e. it must have the @@ -122,10 +99,10 @@ Example 4 node cluster: Content of /etc/ctdb/public_addresses:

- 192.168.1.1/24
- 192.168.1.2/24
- 192.168.2.1/24
- 192.168.2.2/24
+ 192.168.1.1/24 eth0
+ 192.168.1.2/24 eth0
+ 192.168.2.1/24 eth1
+ 192.168.2.2/24 eth1
 
These are the IP addresses that you should configure in DNS for the @@ -138,28 +115,19 @@ cluster.

The CTDB cluster utilizes IP takeover techniques to ensure that as long as at least one node in the cluster is available, all the public IP addresses will always be available to clients.

-This means that if one physical node fails, the public address of that -node will be taken over by a different node in the cluster. This +This means that if one physical node fails, the public addresses that +node was serving will be taken over by a different node in the cluster. This provides a guarantee that all ip addresses exposed to clients will always be reachable by clients even if a node has been powered off or has crashed.

-CTDB nodes will only take over IP addresses that are inside the same -subnet as its own public IP address. In the example above, nodes 0 and -1 would be able to take over each others public ip and analog for -nodes 2 and 3, but node 0 and 1 would NOT be able to take over the IP -addresses for nodes 2 or 3 since they are on a different -subnet.

- Do not assign these addresses to any of the interfaces on the host. CTDB will add and remove these addresses automatically at runtime.

This parameter is used when CTDB operated in takeover ip mode.

-The usual location for this file is /etc/ctdb/public_addresses. If you -use this you must also specify the -CTDB_PUBLIC_INTERFACE parameter.

+The usual location for this file is /etc/ctdb/public_addresses.

Event scripts