mirror of
https://github.com/samba-team/samba.git
synced 2025-01-11 05:18:09 +03:00
add an initial test version of an ip multiplex tool that allows us
to have one single public ip address for the entire cluster. this ip address is attached to lo on all nodes but only the recmaster will respond to arp requests for this address. the recmaster then runs an ipmux process that will pass any incoming packets to this ip address onto the other node sin the cluster based on the ip address of the client host to use this feature one must 1, have one fixed ip address in the customers network attached permanently attached to an interface 2, set CTDB_PUBLI_INTERFACE= to specify on which interface the clients attach to the node 3, CTDB_SINGLE_PUBLI_IP=ip-address to specify which ipaddress should be the "single public ip address" to test with only one single client, attach several ip addresses to the client and ping the public address from the client with different -I options. look in network trace to see to which node the packet is passed onto. (This used to be ctdb commit 50d648c95e4e6d7c2867a034c2b550086d853320)
This commit is contained in:
parent
ab5d098bf6
commit
292e9d9109
@ -54,7 +54,7 @@ CTDB_SERVER_OBJ = server/ctdbd.o server/ctdb_daemon.o server/ctdb_lockwait.o \
|
||||
TEST_BINS=bin/ctdb_bench bin/ctdb_fetch bin/ctdb_store bin/ctdb_persistent bin/rb_test \
|
||||
@INFINIBAND_BINS@
|
||||
|
||||
BINS = bin/ctdb @CTDB_SCSI_IO@ bin/smnotify
|
||||
BINS = bin/ctdb @CTDB_SCSI_IO@ @CTDB_IPMUX@ bin/smnotify
|
||||
SBINS = bin/ctdbd
|
||||
|
||||
DIRS = lib bin
|
||||
@ -84,6 +84,10 @@ bin/scsi_io: $(CTDB_CLIENT_OBJ) utils/scsi_io/scsi_io.o
|
||||
@echo Linking $@
|
||||
@$(CC) $(CFLAGS) -o $@ utils/scsi_io/scsi_io.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
|
||||
|
||||
bin/ctdb_ipmux: $(CTDB_CLIENT_OBJ) utils/ipmux/ipmux.o
|
||||
@echo Linking $@
|
||||
@$(CC) $(CFLAGS) -o $@ utils/ipmux/ipmux.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS) -lipq
|
||||
|
||||
bin/ctdb: $(CTDB_CLIENT_OBJ) tools/ctdb.o
|
||||
@echo Linking $@
|
||||
@$(CC) $(CFLAGS) -o $@ tools/ctdb.o $(CTDB_CLIENT_OBJ) $(LIB_FLAGS)
|
||||
@ -156,6 +160,7 @@ install: all
|
||||
mkdir -p $(DESTDIR)$(etcdir)/ctdb
|
||||
mkdir -p $(DESTDIR)$(etcdir)/ctdb/events.d
|
||||
${INSTALLCMD} -m 755 bin/ctdb $(DESTDIR)$(bindir)
|
||||
${INSTALLCMD} -m 755 bin/ctdb_ipmux $(DESTDIR)$(bindir)
|
||||
${INSTALLCMD} -m 755 bin/ctdbd $(DESTDIR)$(sbindir)
|
||||
${INSTALLCMD} -m 755 bin/smnotify $(DESTDIR)$(bindir)
|
||||
${INSTALLCMD} -m 644 include/ctdb.h $(DESTDIR)$(includedir)
|
||||
@ -169,6 +174,7 @@ install: all
|
||||
${INSTALLCMD} -m 755 config/events.d/50.samba $(DESTDIR)$(etcdir)/ctdb/events.d
|
||||
${INSTALLCMD} -m 755 config/events.d/60.nfs $(DESTDIR)$(etcdir)/ctdb/events.d
|
||||
${INSTALLCMD} -m 755 config/events.d/61.nfstickle $(DESTDIR)$(etcdir)/ctdb/events.d
|
||||
${INSTALLCMD} -m 755 config/events.d/90.ipmux $(DESTDIR)$(etcdir)/ctdb/events.d
|
||||
${INSTALLCMD} -m 755 tools/ctdb_diagnostics $(DESTDIR)$(bindir)
|
||||
${INSTALLCMD} -m 755 tools/onnode.ssh $(DESTDIR)$(bindir)
|
||||
${INSTALLCMD} -m 755 tools/onnode.rsh $(DESTDIR)$(bindir)
|
||||
|
64
ctdb/config/events.d/90.ipmux
Executable file
64
ctdb/config/events.d/90.ipmux
Executable file
@ -0,0 +1,64 @@
|
||||
#!/bin/sh
|
||||
# script to manage the ip multiplexer for a single public address cluster
|
||||
|
||||
. /etc/ctdb/functions
|
||||
loadconfig ctdb
|
||||
|
||||
[ -z "$CTDB_SINGLE_PUBLIC_IP" ] && exit 0
|
||||
[ -z "$CTDB_PUBLIC_INTERFACE" ] && exit 0
|
||||
|
||||
cmd="$1"
|
||||
shift
|
||||
|
||||
PATH=/usr/bin:/bin:/usr/sbin:/sbin:$PATH
|
||||
|
||||
case $cmd in
|
||||
startup)
|
||||
modprobe ip_queue
|
||||
ip addr add $CTDB_SINGLE_PUBLIC_IP/32 dev lo scope host >/dev/null 2>/dev/null
|
||||
|
||||
# do not respond to ARPs that are for ip addresses with scope 'host'
|
||||
echo 3 > /proc/sys/net/ipv4/conf/all/arp_ignore
|
||||
;;
|
||||
|
||||
shutdown)
|
||||
;;
|
||||
|
||||
takeip)
|
||||
;;
|
||||
|
||||
releaseip)
|
||||
;;
|
||||
|
||||
recovered)
|
||||
# remove any previous rule for queueing
|
||||
iptables -D INPUT -d $CTDB_SINGLE_PUBLIC_IP -i $CTDB_PUBLIC_INTERFACE -j QUEUE >/dev/null 2>/dev/null
|
||||
|
||||
# kill off any ipmux processes
|
||||
killall -9 ctdb_ipmux >/dev/null 2>/dev/null
|
||||
|
||||
# are we the recmaster ?
|
||||
ctdb isnotrecmaster >/dev/null 2>/dev/null || {
|
||||
# change the ip address to have scope host so we wont respond
|
||||
# to arps
|
||||
ip addr del $CTDB_SINGLE_PUBLIC_IP/32 dev lo >/dev/null 2>/dev/null
|
||||
ip addr add $CTDB_SINGLE_PUBLIC_IP/32 dev lo scope host >/dev/null 2>/dev/null
|
||||
exit 0
|
||||
}
|
||||
|
||||
# change the scope so we start responding to arps
|
||||
ip addr del $CTDB_SINGLE_PUBLIC_IP/32 dev lo >/dev/null 2>/dev/null
|
||||
ip addr add $CTDB_SINGLE_PUBLIC_IP/32 dev lo >/dev/null 2>/dev/null
|
||||
|
||||
# mark all these for queueing
|
||||
iptables -I INPUT 1 -d $CTDB_SINGLE_PUBLIC_IP -i $CTDB_PUBLIC_INTERFACE -j QUEUE >/dev/null 2>/dev/null
|
||||
|
||||
ctdb_ipmux &
|
||||
;;
|
||||
|
||||
monitor)
|
||||
;;
|
||||
|
||||
esac
|
||||
|
||||
exit 0
|
@ -18,11 +18,13 @@ case `uname` in
|
||||
Linux*)
|
||||
CTDB_SYSTEM_OBJ=common/system_linux.o
|
||||
CTDB_SCSI_IO=bin/scsi_io
|
||||
CTDB_IPMUX=bin/ctdb_ipmux
|
||||
CTDB_PCAP_LDFLAGS=
|
||||
;;
|
||||
AIX*)
|
||||
CTDB_SYSTEM_OBJ=common/system_aix.o
|
||||
CTDB_SCSI_IO=
|
||||
CTDB_IPMUX=
|
||||
CTDB_PCAP_LDFLAGS=-lpcap
|
||||
;;
|
||||
*)
|
||||
@ -63,6 +65,7 @@ fi
|
||||
AC_SUBST(EXTRA_OBJ)
|
||||
AC_SUBST(CTDB_SYSTEM_OBJ)
|
||||
AC_SUBST(CTDB_SCSI_IO)
|
||||
AC_SUBST(CTDB_IPMUX)
|
||||
AC_SUBST(CTDB_PCAP_LDFLAGS)
|
||||
|
||||
AC_OUTPUT(Makefile)
|
||||
|
@ -100,10 +100,12 @@ fi
|
||||
%{_sysconfdir}/ctdb/events.d/50.samba
|
||||
%{_sysconfdir}/ctdb/events.d/60.nfs
|
||||
%{_sysconfdir}/ctdb/events.d/61.nfstickle
|
||||
%{_sysconfdir}/ctdb/events.d/90.ipmux
|
||||
%{_sysconfdir}/ctdb/statd-callout
|
||||
%{_sbindir}/ctdbd
|
||||
%{_bindir}/ctdb
|
||||
%{_bindir}/smnotify
|
||||
%{_bindir}/ctdb_ipmux
|
||||
%{_bindir}/ctdb_diagnostics
|
||||
%{_bindir}/onnode.ssh
|
||||
%{_bindir}/onnode.rsh
|
||||
|
229
ctdb/utils/ipmux/ipmux.c
Normal file
229
ctdb/utils/ipmux/ipmux.c
Normal file
@ -0,0 +1,229 @@
|
||||
/*
|
||||
simple ip multiplexer
|
||||
|
||||
Copyright (C) Ronnie Sahlberg 2007
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "includes.h"
|
||||
#include "lib/events/events.h"
|
||||
#include "system/filesys.h"
|
||||
#include "system/network.h"
|
||||
#include "popt.h"
|
||||
#include "cmdline.h"
|
||||
#include "ctdb.h"
|
||||
#include "ctdb_private.h"
|
||||
#include <linux/netfilter.h>
|
||||
#include <libipq.h>
|
||||
|
||||
#define CONTROL_TIMEOUT() timeval_current_ofs(5, 0)
|
||||
|
||||
struct ipmux_node {
|
||||
uint32_t pnn;
|
||||
struct sockaddr_in sin;
|
||||
};
|
||||
struct ipmux_node *ipmux_nodes;
|
||||
|
||||
|
||||
/*
|
||||
main program
|
||||
*/
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
struct ctdb_context *ctdb;
|
||||
struct poptOption popt_options[] = {
|
||||
POPT_AUTOHELP
|
||||
POPT_CTDB_CMDLINE
|
||||
POPT_TABLEEND
|
||||
};
|
||||
int opt;
|
||||
const char **extra_argv;
|
||||
int extra_argc = 0;
|
||||
int ret;
|
||||
poptContext pc;
|
||||
struct event_context *ev;
|
||||
uint32_t mypnn, recmaster;
|
||||
TALLOC_CTX *mem_ctx=NULL;
|
||||
struct ctdb_node_map *nodemap;
|
||||
int i, num_nodes;
|
||||
int s;
|
||||
struct ipq_handle *ipqh;
|
||||
#define PKTSIZE 65535
|
||||
unsigned char pktbuf[PKTSIZE];
|
||||
ipq_packet_msg_t *ipqp;
|
||||
struct iphdr *ip;
|
||||
int hash;
|
||||
|
||||
pc = poptGetContext(argv[0], argc, argv, popt_options, POPT_CONTEXT_KEEP_FIRST);
|
||||
|
||||
while ((opt = poptGetNextOpt(pc)) != -1) {
|
||||
switch (opt) {
|
||||
default:
|
||||
fprintf(stderr, "Invalid option %s: %s\n",
|
||||
poptBadOption(pc, 0), poptStrerror(opt));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
/* talloc_enable_leak_report_full(); */
|
||||
|
||||
/* setup the remaining options for the main program to use */
|
||||
extra_argv = poptGetArgs(pc);
|
||||
if (extra_argv) {
|
||||
extra_argv++;
|
||||
while (extra_argv[extra_argc]) extra_argc++;
|
||||
}
|
||||
|
||||
ev = event_context_init(NULL);
|
||||
|
||||
ctdb = ctdb_cmdline_client(ev);
|
||||
|
||||
|
||||
mem_ctx = talloc_new(ctdb);
|
||||
|
||||
/* get our pnn */
|
||||
mypnn = ctdb_ctrl_getpnn(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE);
|
||||
if (mypnn == (uint32_t)-1) {
|
||||
DEBUG(0,("IPMUX: Failed to get local pnn - exiting\n"));
|
||||
talloc_free(mem_ctx);
|
||||
exit(10);
|
||||
}
|
||||
|
||||
|
||||
/* get the recmaster */
|
||||
ret = ctdb_ctrl_getrecmaster(ctdb, mem_ctx, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, &recmaster);
|
||||
if (ret != 0) {
|
||||
DEBUG(0,("IPMUX: Failed to get recmaster - exiting\n"));
|
||||
talloc_free(mem_ctx);
|
||||
exit(10);
|
||||
}
|
||||
|
||||
|
||||
/* verify we are the recmaster */
|
||||
if (recmaster != mypnn) {
|
||||
DEBUG(0,("IPMUX: we are not the recmaster - exiting\n"));
|
||||
talloc_free(mem_ctx);
|
||||
exit(10);
|
||||
}
|
||||
|
||||
|
||||
/* get the list of nodes */
|
||||
ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), CTDB_CURRENT_NODE, mem_ctx, &nodemap);
|
||||
if (ret != 0) {
|
||||
DEBUG(0,("IPMUX: failed to get the nodemap - exiting\n"));
|
||||
talloc_free(mem_ctx);
|
||||
exit(10);
|
||||
}
|
||||
|
||||
|
||||
/* count how many connected nodes we have */
|
||||
num_nodes = 0;
|
||||
for (i=0; i<nodemap->num; i++) {
|
||||
if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
|
||||
continue;
|
||||
}
|
||||
|
||||
num_nodes++;
|
||||
}
|
||||
if (num_nodes == 0) {
|
||||
DEBUG(0,("IPMUX: no connected nodes - exiting\n"));
|
||||
talloc_free(mem_ctx);
|
||||
exit(10);
|
||||
}
|
||||
|
||||
ipmux_nodes = talloc_array(mem_ctx, struct ipmux_node, num_nodes);
|
||||
if (ipmux_nodes == NULL) {
|
||||
DEBUG(0,("IPMUX: failed to allocate ipmux node array - exiting\n"));
|
||||
talloc_free(mem_ctx);
|
||||
exit(10);
|
||||
}
|
||||
|
||||
|
||||
/* populate the ipmux node array */
|
||||
num_nodes = 0;
|
||||
for (i=0; i<nodemap->num; i++) {
|
||||
if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
|
||||
continue;
|
||||
}
|
||||
ipmux_nodes[num_nodes].pnn = i;
|
||||
ipmux_nodes[num_nodes].sin = nodemap->nodes[i].sin;
|
||||
num_nodes++;
|
||||
}
|
||||
|
||||
|
||||
/* open a raw socket to send the packets out through */
|
||||
s = ctdb_sys_open_sending_socket();
|
||||
if (s == -1) {
|
||||
DEBUG(0,("IPMUX: failed to open raw socket - exiting\n"));
|
||||
talloc_free(mem_ctx);
|
||||
exit(10);
|
||||
}
|
||||
|
||||
|
||||
/* open the ipq handle */
|
||||
ipqh = ipq_create_handle(0, PF_INET);
|
||||
if (ipqh == NULL) {
|
||||
DEBUG(0,("IPMUX: failed to create ipq handle - exiting\n"));
|
||||
talloc_free(mem_ctx);
|
||||
exit(10);
|
||||
}
|
||||
|
||||
ret = ipq_set_mode(ipqh, IPQ_COPY_PACKET, PKTSIZE);
|
||||
if (ret < 0) {
|
||||
DEBUG(0,("IPMUX: failed to set ipq mode. make sure the ip_queue module is loaded - exiting\n"));
|
||||
talloc_free(mem_ctx);
|
||||
exit(10);
|
||||
}
|
||||
|
||||
while (1) {
|
||||
/* wait for the next packet */
|
||||
ret = ipq_read(ipqh, pktbuf, PKTSIZE, 0);
|
||||
if (ret <= 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* read the packet */
|
||||
ipqp = ipq_get_packet(pktbuf);
|
||||
if (ipqp == NULL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* calculate a hash based on the clients ip address */
|
||||
ip = (struct iphdr *)&ipqp->payload[0];
|
||||
/* ntohl here since the client ip addresses are much more
|
||||
likely to differ in the lower bits than the hight bits */
|
||||
hash = ntohl(ip->saddr) % num_nodes;
|
||||
|
||||
|
||||
/* if the packet is hashed to the current host, then
|
||||
just accept it and let the kernel pass it onto
|
||||
the local stack
|
||||
*/
|
||||
if (ipmux_nodes[hash].pnn == mypnn) {
|
||||
ipq_set_verdict(ipqh, ipqp->packet_id, NF_ACCEPT, 0, pktbuf);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* we have hashed it to one of the other nodes, so
|
||||
send the packet off and tell the kernel to not worry
|
||||
about this packet any more
|
||||
*/
|
||||
ret = sendto(s, &ipqp->payload[0], ipqp->data_len, 0, &ipmux_nodes[hash].sin, sizeof(struct sockaddr_in));
|
||||
ipq_set_verdict(ipqh, ipqp->packet_id, NF_STOLEN, 0, pktbuf);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user