From 2fe7dd0b2ee00d43c8fc0e1a5b3ebaf5b4d2543a Mon Sep 17 00:00:00 2001 From: William Lallemand Date: Tue, 11 Sep 2018 16:51:29 +0200 Subject: [PATCH] MEDIUM: protocol: sockpair protocol This protocol is based on the uxst one, but it uses socketpair and FD passing insteads of a connect()/accept(). The "sockpair@" prefix has been implemented for both bind and server keywords. When HAProxy wants to connect through a sockpair@, it creates 2 new sockets using the socketpair() syscall and pass one of the socket through the FD specified on the server line. On the bind side, haproxy will receive the FD, and will use it like it was the FD of an accept() syscall. This protocol was designed for internal communication within HAProxy between the master and the workers, but it's possible to use it externaly with a wrapper and pass the FD through environment variabls. --- Makefile | 3 +- doc/configuration.txt | 10 + include/common/standard.h | 3 +- include/proto/proto_sockpair.h | 28 +++ src/cfgparse.c | 14 ++ src/listener.c | 7 + src/proto_sockpair.c | 405 +++++++++++++++++++++++++++++++++ src/standard.c | 17 +- 8 files changed, 484 insertions(+), 3 deletions(-) create mode 100644 include/proto/proto_sockpair.h create mode 100644 src/proto_sockpair.c diff --git a/Makefile b/Makefile index d68d26368..4b5cc4072 100644 --- a/Makefile +++ b/Makefile @@ -876,7 +876,8 @@ OBJS = src/proto_http.o src/cfgparse.o src/server.o src/stream.o \ src/sha1.o src/hpack-tbl.o src/hpack-enc.o src/uri_auth.o \ src/time.o src/proto_udp.o src/arg.o src/signal.o \ src/protocol.o src/lru.o src/hdr_idx.o src/hpack-huff.o \ - src/mailers.o src/h2.o src/base64.o src/hash.o src/http.o + src/mailers.o src/h2.o src/base64.o src/hash.o src/http.o \ + src/proto_sockpair.o EBTREE_OBJS = $(EBTREE_DIR)/ebtree.o $(EBTREE_DIR)/eb32sctree.o \ $(EBTREE_DIR)/eb32tree.o $(EBTREE_DIR)/eb64tree.o \ diff --git a/doc/configuration.txt b/doc/configuration.txt index 2332bb7d8..31974cc2a 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -2561,6 +2561,10 @@ bind / [, ...] [param*] - 'fd@' -> use file descriptor inherited from the parent. The fd must be bound and may or may not already be listening. + - 'sockpair@'-> like fd@ but you must use the fd of a + connected unix socket or of a socketpair. The bind waits + to receive a FD over the unix socket and uses it as if it + was the FD of an accept(). Should be used carefully. You may want to reference some environment variables in the address parameter, see section 2.3 about environment variables. @@ -7746,6 +7750,12 @@ server
[:[port]] [param*] - 'ipv6@' -> address is always IPv6 - 'unix@' -> address is a path to a local unix socket - 'abns@' -> address is in abstract namespace (Linux only) + - 'sockpair@' -> address is the FD of a connected unix + socket or of a socketpair. During a connection, the + backend creates a pair of connected sockets, and passes + one of them over the FD. The bind part will use the + received socket as the client FD. Should be used + carefully. You may want to reference some environment variables in the address parameter, see section 2.3 about environment variables. The "init-addr" setting can be used to modify the way diff --git a/include/common/standard.h b/include/common/standard.h index 4a7b2a333..ad1a76748 100644 --- a/include/common/standard.h +++ b/include/common/standard.h @@ -38,6 +38,7 @@ #include #include #include +#include #ifndef LLONG_MAX # define LLONG_MAX 9223372036854775807LL @@ -956,7 +957,7 @@ static inline int is_inet_addr(const struct sockaddr_storage *addr) */ static inline int is_addr(const struct sockaddr_storage *addr) { - if (addr->ss_family == AF_UNIX) + if (addr->ss_family == AF_UNIX || addr->ss_family == AF_CUST_SOCKPAIR) return 1; else return is_inet_addr(addr); diff --git a/include/proto/proto_sockpair.h b/include/proto/proto_sockpair.h new file mode 100644 index 000000000..4692461ba --- /dev/null +++ b/include/proto/proto_sockpair.h @@ -0,0 +1,28 @@ +/* + * Socket Pair protocol layer (sockpair) + * + * Copyright HAProxy Technologies - William Lallemand + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _PROTO_PROTO_SOCKPAIR_H +# define _PROTO_PROTO_SOCKPAIR_H + +int recv_fd_uxst(int sock); +int send_fd_uxst(int fd, int send_fd); + +#endif /* _PROTO_PROTO_SOCKPAIR_H */ + diff --git a/src/cfgparse.c b/src/cfgparse.c index 2ac7b9659..e20ab72da 100644 --- a/src/cfgparse.c +++ b/src/cfgparse.c @@ -295,6 +295,20 @@ int str2listener(char *str, struct proxy *curproxy, struct bind_conf *bind_conf, } port = end = get_host_port(ss2); + + } else if (ss2->ss_family == AF_CUST_SOCKPAIR) { + socklen_t addr_len; + inherited = 1; + + fd = ((struct sockaddr_in *)ss2)->sin_addr.s_addr; + addr_len = sizeof(*ss2); + if (getsockname(fd, (struct sockaddr *)ss2, &addr_len) == -1) { + memprintf(err, "cannot use file descriptor '%d' : %s.\n", fd, strerror(errno)); + goto fail; + } + + ss2->ss_family = AF_CUST_SOCKPAIR; /* reassign AF_CUST_SOCKPAIR because of getsockname */ + port = end = 0; } /* OK the address looks correct */ diff --git a/src/listener.c b/src/listener.c index 09ac50bd2..fb2306927 100644 --- a/src/listener.c +++ b/src/listener.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -541,6 +542,12 @@ void listener_accept(int fd) goto end; } + /* with sockpair@ we don't want to do an accept */ + if (unlikely(l->addr.ss_family == AF_CUST_SOCKPAIR)) { + if ((cfd = recv_fd_uxst(fd)) != -1) + fcntl(cfd, F_SETFL, O_NONBLOCK); + } else + #ifdef USE_ACCEPT4 /* only call accept4() if it's known to be safe, otherwise * fallback to the legacy accept() + fcntl(). diff --git a/src/proto_sockpair.c b/src/proto_sockpair.c new file mode 100644 index 000000000..8f4a87462 --- /dev/null +++ b/src/proto_sockpair.c @@ -0,0 +1,405 @@ +/* + * Socket Pair protocol layer (sockpair) + * + * Copyright HAProxy Technologies - William Lallemand + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +static void sockpair_add_listener(struct listener *listener, int port); +static int sockpair_bind_listener(struct listener *listener, char *errmsg, int errlen); +static int sockpair_bind_listeners(struct protocol *proto, char *errmsg, int errlen); +static int sockpair_connect_server(struct connection *conn, int data, int delack); + +/* Note: must not be declared as its list will be overwritten */ +static struct protocol proto_sockpair = { + .name = "sockpair", + .sock_domain = AF_CUST_SOCKPAIR, + .sock_type = SOCK_STREAM, + .sock_prot = 0, + .sock_family = AF_UNIX, + .sock_addrlen = sizeof(struct sockaddr_un), + .l3_addrlen = sizeof(((struct sockaddr_un*)0)->sun_path),/* path len */ + .accept = &listener_accept, + .connect = &sockpair_connect_server, + .bind = sockpair_bind_listener, + .bind_all = sockpair_bind_listeners, + .unbind_all = NULL, + .enable_all = enable_all_listeners, + .disable_all = disable_all_listeners, + .get_src = NULL, + .get_dst = NULL, + .pause = NULL, + .add = sockpair_add_listener, + .listeners = LIST_HEAD_INIT(proto_sockpair.listeners), + .nb_listeners = 0, +}; + +/* Add to the list of sockpair listeners (port is ignored). The + * listener's state is automatically updated from LI_INIT to LI_ASSIGNED. + * The number of listeners for the protocol is updated. + */ +static void sockpair_add_listener(struct listener *listener, int port) +{ + if (listener->state != LI_INIT) + return; + listener->state = LI_ASSIGNED; + listener->proto = &proto_sockpair; + LIST_ADDQ(&proto_sockpair.listeners, &listener->proto_list); + proto_sockpair.nb_listeners++; +} + +/* This function creates all UNIX sockets bound to the protocol entry . + * It is intended to be used as the protocol's bind_all() function. + * The sockets will be registered but not added to any fd_set, in order not to + * loose them across the fork(). A call to uxst_enable_listeners() is needed + * to complete initialization. + * + * The return value is composed from ERR_NONE, ERR_RETRYABLE and ERR_FATAL. + */ +static int sockpair_bind_listeners(struct protocol *proto, char *errmsg, int errlen) +{ + struct listener *listener; + int err = ERR_NONE; + + list_for_each_entry(listener, &proto->listeners, proto_list) { + err |= sockpair_bind_listener(listener, errmsg, errlen); + if (err & ERR_ABORT) + break; + } + return err; +} + +/* This function changes the state from ASSIGNED to LISTEN. The socket is NOT + * enabled for polling. The return value is composed from ERR_NONE, + * ERR_RETRYABLE and ERR_FATAL. It may return a warning or an error message in + * if the message is at most bytes long (including '\0'). + * Note that may be NULL if is also zero. + */ +static int sockpair_bind_listener(struct listener *listener, char *errmsg, int errlen) +{ + int fd = listener->fd; + int err; + const char *msg = NULL; + + err = ERR_NONE; + + /* ensure we never return garbage */ + if (errlen) + *errmsg = 0; + + if (listener->state != LI_ASSIGNED) + return ERR_NONE; /* already bound */ + + if (listener->fd == -1) { + err |= ERR_FATAL | ERR_ALERT; + msg = "sockpair can be only used with inherited FDs"; + goto err_return; + } + + if (fd >= global.maxsock) { + err |= ERR_FATAL | ERR_ALERT; + msg = "socket(): not enough free sockets, raise -n argument"; + goto err_return; + } + if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) { + err |= ERR_FATAL | ERR_ALERT; + msg = "cannot make sockpair non-blocking"; + goto err_return; + } + + listener->state = LI_LISTEN; + + fd_insert(fd, listener, listener->proto->accept, + listener->bind_conf->bind_thread[relative_pid-1] ? + listener->bind_conf->bind_thread[relative_pid-1] : MAX_THREADS_MASK); + + return err; + + err_return: + if (msg && errlen) + snprintf(errmsg, errlen, "%s [fd %d]", msg, fd); + return err; +} + +/* + * Send FD over a unix socket + * + * is the FD to send + * is the fd of the unix socket to use for the transfer + * + * The iobuf variable could be use in the future to enhance the protocol. + */ +int send_fd_uxst(int fd, int send_fd) +{ + char iobuf[2]; + struct iovec iov; + struct msghdr msghdr; + + char cmsgbuf[CMSG_SPACE(sizeof(int))]; + char buf[CMSG_SPACE(sizeof(int))]; + struct cmsghdr *cmsg = (void *)buf; + + int *fdptr; + + iov.iov_base = iobuf; + iov.iov_len = sizeof(iobuf); + + memset(&msghdr, 0, sizeof(msghdr)); + msghdr.msg_iov = &iov; + msghdr.msg_iovlen = 1; + + /* Now send the fds */ + msghdr.msg_control = cmsgbuf; + msghdr.msg_controllen = CMSG_SPACE(sizeof(int)); + + cmsg = CMSG_FIRSTHDR(&msghdr); + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + + fdptr = (int *)CMSG_DATA(cmsg); + memcpy(fdptr, &send_fd, sizeof(send_fd)); + + if (sendmsg(fd, &msghdr, 0) != sizeof(iobuf)) { + ha_warning("Failed to transfer socket\n"); + return 1; + } + + return 0; +} + +/* + * + * This function works like uxst_connect_server but insteads of creating a + * socket and establishing a connection, it creates a pair of connected + * sockets, and send one of them through the destination FD. The destination FD + * is stored in addr.to->sin_addr.s_addr during configuration parsing. + * + * conn->target may point either to a valid server or to a backend, depending + * on conn->target. Only OBJ_TYPE_PROXY and OBJ_TYPE_SERVER are supported. The + * parameter is a boolean indicating whether there are data waiting for + * being sent or not, in order to adjust data write polling and on some + * platforms. The argument is ignored. + * + * Note that a pending send_proxy message accounts for data. + * + * It can return one of : + * - SF_ERR_NONE if everything's OK + * - SF_ERR_SRVTO if there are no more servers + * - SF_ERR_SRVCL if the connection was refused by the server + * - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn) + * - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...) + * - SF_ERR_INTERNAL for any other purely internal errors + * Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted. + * + * The connection's fd is inserted only when SF_ERR_NONE is returned, otherwise + * it's invalid and the caller has nothing to do. + */ +static int sockpair_connect_server(struct connection *conn, int data, int delack) +{ + int sv[2], fd, dst_fd = -1; + + /* the FD is stored in the sockaddr struct */ + dst_fd = ((struct sockaddr_in *)&conn->addr.to)->sin_addr.s_addr; + + conn->flags = 0; + + if (obj_type(conn->target) != OBJ_TYPE_PROXY && + obj_type(conn->target) != OBJ_TYPE_SERVER) { + conn->flags |= CO_FL_ERROR; + return SF_ERR_INTERNAL; + } + + if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { + ha_alert("socketpair(): Cannot create socketpair. Giving up.\n"); + conn->flags |= CO_FL_ERROR; + return SF_ERR_RESOURCE; + } + + fd = conn->handle.fd = sv[1]; + + if (fd >= global.maxsock) { + /* do not log anything there, it's a normal condition when this option + * is used to serialize connections to a server ! + */ + ha_alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n"); + close(sv[0]); + close(sv[1]); + conn->err_code = CO_ER_CONF_FDLIM; + conn->flags |= CO_FL_ERROR; + return SF_ERR_PRXCOND; /* it is a configuration limit */ + } + + if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) { + qfprintf(stderr,"Cannot set client socket to non blocking mode.\n"); + close(sv[0]); + close(sv[1]); + conn->err_code = CO_ER_SOCK_ERR; + conn->flags |= CO_FL_ERROR; + return SF_ERR_INTERNAL; + } + + /* if a send_proxy is there, there are data */ + data |= conn->send_proxy_ofs; + + if (global.tune.server_sndbuf) + setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &global.tune.server_sndbuf, sizeof(global.tune.server_sndbuf)); + + if (global.tune.server_rcvbuf) + setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &global.tune.server_rcvbuf, sizeof(global.tune.server_rcvbuf)); + + /* The new socket is sent on the other side, it should be retrieved and + * considered as an 'accept' socket on the server side */ + if (send_fd_uxst(dst_fd, sv[0]) == -1) { + close(sv[0]); + close(sv[1]); + conn->err_code = CO_ER_SOCK_ERR; + conn->flags |= CO_FL_ERROR; + return SF_ERR_INTERNAL; + } + + close(sv[0]); /* we don't need this side anymore */ + + conn->flags &= ~CO_FL_WAIT_L4_CONN; + + conn->flags |= CO_FL_ADDR_TO_SET; + + /* Prepare to send a few handshakes related to the on-wire protocol. */ + if (conn->send_proxy_ofs) + conn->flags |= CO_FL_SEND_PROXY; + + conn_ctrl_init(conn); /* registers the FD */ + fdtab[fd].linger_risk = 0; /* no need to disable lingering */ + + if (conn_xprt_init(conn) < 0) { + conn_full_close(conn); + conn->flags |= CO_FL_ERROR; + return SF_ERR_RESOURCE; + } + + if (conn->flags & (CO_FL_HANDSHAKE | CO_FL_WAIT_L4_CONN)) { + conn_sock_want_send(conn); /* for connect status, proxy protocol or SSL */ + } + else { + /* If there's no more handshake, we need to notify the data + * layer when the connection is already OK otherwise we'll have + * no other opportunity to do it later (eg: health checks). + */ + data = 1; + } + + if (data) + conn_xprt_want_send(conn); /* prepare to send data if any */ + + return SF_ERR_NONE; /* connection is OK */ +} + + +/* + * Receive a file descriptor transfered from a unix socket. + * + * Return -1 or a socket fd; + * + * The iobuf variable could be use in the future to enhance the protocol. + */ +int recv_fd_uxst(int sock) +{ + struct msghdr msghdr; + struct iovec iov; + char iobuf[2]; + + char cmsgbuf[CMSG_SPACE(sizeof(int))]; + char buf[CMSG_SPACE(sizeof(int))]; + struct cmsghdr *cmsg = (void *)buf; + + + int recv_fd = -1; + int ret = -1; + + memset(&msghdr, 0, sizeof(msghdr)); + + iov.iov_base = iobuf; + iov.iov_len = sizeof(iobuf); + + msghdr.msg_iov = &iov; + msghdr.msg_iovlen = 1; + + msghdr.msg_control = cmsgbuf; + msghdr.msg_controllen = CMSG_SPACE(sizeof(int)); + + iov.iov_len = sizeof(iobuf); + iov.iov_base = iobuf; + + while (1) { + ret = recvmsg(sock, &msghdr, 0); + if (ret == -1 && errno == EINTR) + continue; + else + break; + } + + if (ret == -1) + return ret; + + cmsg = CMSG_FIRSTHDR(&msghdr); + if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_RIGHTS) { + size_t totlen = cmsg->cmsg_len - + CMSG_LEN(0); + memcpy(&recv_fd, CMSG_DATA(cmsg), totlen); + } + return recv_fd; +} + +__attribute__((constructor)) +static void __uxst_protocol_init(void) +{ + protocol_register(&proto_sockpair); +} + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/src/standard.c b/src/standard.c index 04df94ead..ae1f54eac 100644 --- a/src/standard.c +++ b/src/standard.c @@ -894,7 +894,22 @@ struct sockaddr_storage *str2sa_range(const char *str, int *port, int *low, int else ss.ss_family = AF_UNSPEC; - if (ss.ss_family == AF_UNSPEC && strncmp(str2, "fd@", 3) == 0) { + if (ss.ss_family == AF_UNSPEC && strncmp(str2, "sockpair@", 9) == 0) { + char *endptr; + + str2 += 9; + + ((struct sockaddr_in *)&ss)->sin_addr.s_addr = strtol(str2, &endptr, 10); + + if (!*str2 || *endptr) { + memprintf(err, "file descriptor '%s' is not a valid integer in '%s'\n", str2, str); + goto out; + } + + ss.ss_family = AF_CUST_SOCKPAIR; + + } + else if (ss.ss_family == AF_UNSPEC && strncmp(str2, "fd@", 3) == 0) { char *endptr; str2 += 3;