[MEDIUM] add support for binding to source port ranges during connect

Some users are already hitting the 64k source port limit when
connecting to servers. The system usually maintains a list of
unused source ports, regardless of the source IP they're bound
to. So in order to go beyond the 64k concurrent connections, we
have to manage the source ip:port lists ourselves.

The solution consists in assigning a source port range to each
server and use a free port in that range when connecting to that
server, either for a proxied connection or for a health check.
The port must then be put back into the server's range when the
connection is closed.

This mechanism is used only when a port range is specified on
a server. It makes it possible to reach 64k connections per
server, possibly all from the same IP address. Right now it
should be more than enough even for huge deployments.
This commit is contained in:
Willy Tarreau 2009-06-10 11:09:37 +02:00
parent f68da4603a
commit c6f4ce8fc4
11 changed files with 307 additions and 10 deletions

View File

@ -3397,7 +3397,9 @@ source <addr>[:<port>] [interface <name>]
<port> is an optional port. It is normally not needed but may be useful
in some very specific contexts. The default value of zero means
the system will select a free port.
the system will select a free port. Note that port ranges are not
supported in the backend. If you want to force port ranges, you
have to specify them on each "server" line.
<addr2> is the IP address to present to the server when connections are
forwarded in full transparent proxy mode. This is currently only
@ -4372,13 +4374,21 @@ slowstart <start_time_in_ms>
trouble to running servers. It only applies when a server has been previously
seen as failed.
source <addr>[:<port>] [usesrc { <addr2>[:<port2>] | client | clientip } ]
source <addr>[:<port>] [interface <name>] ...
source <addr>[:<pl>[-<ph>]] [usesrc { <addr2>[:<port2>] | client | clientip } ]
source <addr>[:<pl>[-<ph>]] [interface <name>] ...
The "source" parameter sets the source address which will be used when
connecting to the server. It follows the exact same parameters and principle
as the backend "source" keyword, except that it only applies to the server
referencing it. Please consult the "source" keyword for details.
Additionally, the "source" statement on a server line allows one to specify a
source port range by indicating the lower and higher bounds delimited by a
dash ('-'). Some operating systems might require a valid IP address when a
source port range is specified. It is permitted to have the same IP/range for
several servers. Doing so makes it possible to bypass the maximum of 64k
total concurrent connections. The limit will then reach 64k connections per
server.
track [<proxy>/]<server>
This option enables ability to set the current state of the server by
tracking another one. Only a server with checks enabled can be tracked

View File

@ -2,7 +2,7 @@
include/common/standard.h
This files contains some general purpose functions and macros.
Copyright (C) 2000-2008 Willy Tarreau - w@1wt.eu
Copyright (C) 2000-2009 Willy Tarreau - w@1wt.eu
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@ -168,6 +168,17 @@ struct sockaddr_un *str2sun(const char *str);
*/
struct sockaddr_in *str2sa(char *str);
/*
* converts <str> to a struct sockaddr_in* which is locally allocated, and a
* port range consisting in two integers. The low and high end are always set
* even if the port is unspecified, in which case (0,0) is returned. The low
* port is set in the sockaddr_in. Thus, it is enough to check the size of the
* returned range to know if an array must be allocated or not. The format is
* "addr[:port[-port]]", where "addr" can be a dotted IPv4 address, a host
* name, or empty or "*" to indicate INADDR_ANY.
*/
struct sockaddr_in *str2sa_range(char *str, int *low, int *high);
/*
* converts <str> to two struct in_addr* which must be pre-allocated.
* The format is "addr[/mask]", where "addr" cannot be empty, and mask

View File

@ -0,0 +1,77 @@
/*
include/proto/port_range.h
This file defines everything needed to manage port ranges
Copyright (C) 2000-2009 Willy Tarreau - w@1wt.eu
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation, version 2.1
exclusively.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef _PROTO_PORT_RANGE_H
#define _PROTO_PORT_RANGE_H
#include <types/port_range.h>
/* return an available port from range <range>, or zero if none is left */
static inline int port_range_alloc_port(struct port_range *range)
{
int ret;
if (!range->avail)
return 0;
ret = range->ports[range->get];
range->get++;
if (range->get >= range->size)
range->get = 0;
range->avail--;
return ret;
}
/* release port <port> into port range <range>. Does nothing if <port> is zero
* nor if <range> is null. The caller is responsible for marking the port
* unused by either setting the port to zero or the range to NULL.
*/
static inline void port_range_release_port(struct port_range *range, int port)
{
if (!port || !range)
return;
range->ports[range->put] = port;
range->avail++;
range->put++;
if (range->put >= range->size)
range->put = 0;
}
/* return a new initialized port range of N ports. The ports are not
* filled in, it's up to the caller to do it.
*/
static inline struct port_range *port_range_alloc_range(int n)
{
struct port_range *ret;
ret = calloc(1, sizeof(struct port_range) +
n * sizeof(((struct port_range *)0)->ports[0]));
ret->size = ret->avail = n;
return ret;
}
#endif /* _PROTO_PORT_RANGE_H */
/*
* Local variables:
* c-indent-level: 8
* c-basic-offset: 8
* End:
*/

View File

@ -70,6 +70,8 @@ struct fdtab {
unsigned char ev; /* event seen in return of poll() : FD_POLL_* */
struct sockaddr *peeraddr; /* pointer to peer's network address, or NULL if unset */
socklen_t peerlen; /* peer's address length, or 0 if unset */
int local_port; /* optional local port */
struct port_range *port_range; /* optional port range to bind to */
};
/*

View File

@ -0,0 +1,40 @@
/*
include/types/port_range.h
This file defines everything needed to manage port ranges
Copyright (C) 2000-2009 Willy Tarreau - w@1wt.eu
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation, version 2.1
exclusively.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef _TYPES_PORT_RANGE_H
#define _TYPES_PORT_RANGE_H
#include <netinet/in.h>
struct port_range {
int size, get, put; /* range size, and get/put positions */
int avail; /* number of available ports left */
uint16_t ports[0]; /* array of <size> ports, in host byte order */
};
#endif /* _TYPES_PORT_RANGE_H */
/*
* Local variables:
* c-indent-level: 8
* c-basic-offset: 8
* End:
*/

View File

@ -31,6 +31,7 @@
#include <types/buffers.h>
#include <types/freq_ctr.h>
#include <types/port_range.h>
#include <types/proxy.h>
#include <types/queue.h>
#include <types/task.h>
@ -94,6 +95,7 @@ struct server {
#endif
int iface_len; /* bind interface name length */
char *iface_name; /* bind interface name or NULL */
struct port_range *sport_range; /* optional per-server TCP source ports */
struct server *tracknext, *tracked; /* next server in a tracking list, tracked server */
char *trackit; /* temporary variable to make assignment deferrable */

View File

@ -33,6 +33,7 @@
#include <proto/fd.h>
#include <proto/httperr.h>
#include <proto/log.h>
#include <proto/port_range.h>
#include <proto/proto_http.h>
#include <proto/proto_tcp.h>
#include <proto/queue.h>
@ -1812,9 +1813,44 @@ int connect_server(struct session *s)
if (s->srv->iface_name)
setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, s->srv->iface_name, s->srv->iface_len + 1);
#endif
ret = tcpv4_bind_socket(fd, flags, &s->srv->source_addr, remote);
if (s->srv->sport_range) {
int attempts = 10; /* should be more than enough to find a spare port */
struct sockaddr_in src;
ret = 1;
src = s->srv->source_addr;
do {
/* note: in case of retry, we may have to release a previously
* allocated port, hence this loop's construct.
*/
port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
fdtab[fd].port_range = NULL;
if (!attempts)
break;
attempts--;
fdtab[fd].local_port = port_range_alloc_port(s->srv->sport_range);
if (!fdtab[fd].local_port)
break;
fdtab[fd].port_range = s->srv->sport_range;
src.sin_port = htons(fdtab[fd].local_port);
ret = tcpv4_bind_socket(fd, flags, &src, remote);
} while (ret != 0); /* binding NOK */
}
else {
ret = tcpv4_bind_socket(fd, flags, &s->srv->source_addr, remote);
}
if (ret) {
port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
fdtab[fd].port_range = NULL;
close(fd);
if (ret == 1) {
Alert("Cannot bind to source address before connect() for server %s/%s. Aborting.\n",
s->be->id, s->srv->id);
@ -1887,6 +1923,8 @@ int connect_server(struct session *s)
msg = "local address already in use";
qfprintf(stderr,"Cannot connect: %s.\n",msg);
port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
fdtab[fd].port_range = NULL;
close(fd);
send_log(s->be, LOG_EMERG,
"Connect() failed for server %s/%s: %s.\n",
@ -1894,11 +1932,15 @@ int connect_server(struct session *s)
return SN_ERR_RESOURCE;
} else if (errno == ETIMEDOUT) {
//qfprintf(stderr,"Connect(): ETIMEDOUT");
port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
fdtab[fd].port_range = NULL;
close(fd);
return SN_ERR_SRVTO;
} else {
// (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
//qfprintf(stderr,"Connect(): %d", errno);
port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
fdtab[fd].port_range = NULL;
close(fd);
return SN_ERR_SRVCL;
}

View File

@ -40,6 +40,7 @@
#include <proto/dumpstats.h>
#include <proto/httperr.h>
#include <proto/log.h>
#include <proto/port_range.h>
#include <proto/protocols.h>
#include <proto/proto_tcp.h>
#include <proto/proto_http.h>
@ -2179,18 +2180,34 @@ int cfg_parse_listen(const char *file, int linenum, char **args, int inv)
cur_arg += 1;
}
else if (!strcmp(args[cur_arg], "source")) { /* address to which we bind when connecting */
int port_low, port_high;
if (!*args[cur_arg + 1]) {
#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
Alert("parsing [%s:%d] : '%s' expects <addr>[:<port>], and optional '%s' <addr> as argument.\n",
Alert("parsing [%s:%d] : '%s' expects <addr>[:<port>[-<port>]], and optional '%s' <addr> as argument.\n",
file, linenum, "source", "usesrc");
#else
Alert("parsing [%s:%d] : '%s' expects <addr>[:<port>] as argument.\n",
Alert("parsing [%s:%d] : '%s' expects <addr>[:<port>[-<port>]] as argument.\n",
file, linenum, "source");
#endif
return -1;
}
newsrv->state |= SRV_BIND_SRC;
newsrv->source_addr = *str2sa(args[cur_arg + 1]);
newsrv->source_addr = *str2sa_range(args[cur_arg + 1], &port_low, &port_high);
if (port_low != port_high) {
int i;
if (port_low <= 0 || port_low > 65535 ||
port_high <= 0 || port_high > 65535 ||
port_low > port_high) {
Alert("parsing [%s:%d] : invalid source port range %d-%d.\n",
file, linenum, port_low, port_high);
return -1;
}
newsrv->sport_range = port_range_alloc_range(port_high - port_low + 1);
for (i = 0; i < newsrv->sport_range->size; i++)
newsrv->sport_range->ports[i] = port_low + i;
}
cur_arg += 2;
while (*(args[cur_arg])) {
if (!strcmp(args[cur_arg], "usesrc")) { /* address to use outside */

View File

@ -36,6 +36,7 @@
#include <proto/fd.h>
#include <proto/log.h>
#include <proto/queue.h>
#include <proto/port_range.h>
#include <proto/proto_http.h>
#include <proto/proto_tcp.h>
#include <proto/proxy.h>
@ -597,7 +598,38 @@ struct task *process_chk(struct task *t)
setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
s->iface_name, s->iface_len + 1);
#endif
ret = tcpv4_bind_socket(fd, flags, &s->source_addr, remote);
if (s->sport_range) {
int bind_attempts = 10; /* should be more than enough to find a spare port */
struct sockaddr_in src;
ret = 1;
src = s->source_addr;
do {
/* note: in case of retry, we may have to release a previously
* allocated port, hence this loop's construct.
*/
port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
fdtab[fd].port_range = NULL;
if (!bind_attempts)
break;
bind_attempts--;
fdtab[fd].local_port = port_range_alloc_port(s->sport_range);
if (!fdtab[fd].local_port)
break;
fdtab[fd].port_range = s->sport_range;
src.sin_port = htons(fdtab[fd].local_port);
ret = tcpv4_bind_socket(fd, flags, &src, remote);
} while (ret != 0); /* binding NOK */
}
else {
ret = tcpv4_bind_socket(fd, flags, &s->source_addr, remote);
}
if (ret) {
s->result |= SRV_CHK_ERROR;
switch (ret) {
@ -682,6 +714,8 @@ struct task *process_chk(struct task *t)
}
}
}
port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
fdtab[fd].port_range = NULL;
close(fd); /* socket creation error */
}

View File

@ -19,6 +19,7 @@
#include <common/config.h>
#include <proto/fd.h>
#include <proto/port_range.h>
struct fdtab *fdtab = NULL; /* array of all the file descriptors */
int maxfd; /* # of the highest fd + 1 */
@ -36,6 +37,8 @@ int nbpollers = 0;
void fd_delete(int fd)
{
EV_FD_CLO(fd);
port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
fdtab[fd].port_range = NULL;
close(fd);
fdtab[fd].state = FD_STCLOSE;

View File

@ -1,7 +1,7 @@
/*
* General purpose functions.
*
* Copyright 2000-2007 Willy Tarreau <w@1wt.eu>
* Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@ -240,6 +240,65 @@ struct sockaddr_in *str2sa(char *str)
return &sa;
}
/*
* converts <str> to a struct sockaddr_in* which is locally allocated, and a
* port range consisting in two integers. The low and high end are always set
* even if the port is unspecified, in which case (0,0) is returned. The low
* port is set in the sockaddr_in. Thus, it is enough to check the size of the
* returned range to know if an array must be allocated or not. The format is
* "addr[:port[-port]]", where "addr" can be a dotted IPv4 address, a host
* name, or empty or "*" to indicate INADDR_ANY.
*/
struct sockaddr_in *str2sa_range(char *str, int *low, int *high)
{
static struct sockaddr_in sa;
char *c;
int portl, porth;
memset(&sa, 0, sizeof(sa));
str = strdup(str);
if (str == NULL)
goto out_nofree;
if ((c = strrchr(str,':')) != NULL) {
char *sep;
*c++ = '\0';
sep = strchr(c, '-');
if (sep)
*sep++ = '\0';
else
sep = c;
portl = atol(c);
porth = atol(sep);
}
else {
portl = 0;
porth = 0;
}
if (*str == '*' || *str == '\0') { /* INADDR_ANY */
sa.sin_addr.s_addr = INADDR_ANY;
}
else if (!inet_pton(AF_INET, str, &sa.sin_addr)) {
struct hostent *he;
if ((he = gethostbyname(str)) == NULL) {
Alert("Invalid server name: '%s'\n", str);
}
else
sa.sin_addr = *(struct in_addr *) *(he->h_addr_list);
}
sa.sin_port = htons(portl);
sa.sin_family = AF_INET;
*low = portl;
*high = porth;
free(str);
out_nofree:
return &sa;
}
/*
* converts <str> to two struct in_addr* which must be pre-allocated.
* The format is "addr[/mask]", where "addr" cannot be empty, and mask