From 646d23d1b502bc07a4a846f2ca7d332506b3087e Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Thu, 4 Jan 2018 14:41:00 +0100 Subject: [PATCH 01/52] BUG/MEDIUM: h2: properly handle the END_STREAM flag on empty DATA frames Peter Lindegaard Hansen reported a problem affecting some POST requests sent by MSIE on 1.8.3. Lukas found that we incorrectly dealt with the END_STREAM flag on empty DATA frames. What happens in fact is that while we correctly report that we've read a zero-byte frame, since commit 8fc016d ("BUG/MEDIUM: h2: support uploading partial DATA frames") backported into 1.8.2, we've been able to return without updating the parser's state nor checking the frame flags in this case. The fix is trival, we just need not to return too early. This fix must be backported to 1.8. (cherry picked from commit 4a28da1e9deadf0c542b957a323c1ca015c90fe4) Signed-off-by: Willy Tarreau --- src/mux_h2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mux_h2.c b/src/mux_h2.c index c6e15eca8..7bb51ea41 100644 --- a/src/mux_h2.c +++ b/src/mux_h2.c @@ -2770,7 +2770,7 @@ static int h2_frt_transfer_data(struct h2s *h2s, struct buffer *buf, int count) flen = h2c->dfl - h2c->dpl; if (!flen) - return 0; + goto end_transfer; if (flen > h2c->dbuf->i) { flen = h2c->dbuf->i; @@ -2817,6 +2817,7 @@ static int h2_frt_transfer_data(struct h2s *h2s, struct buffer *buf, int count) return flen; } + end_transfer: /* here we're done with the frame, all the payload (except padding) was * transferred. */ From 914cf78f2e715db9e33bac0b40a92851af87ea17 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Thu, 4 Jan 2018 18:55:19 +0100 Subject: [PATCH 02/52] BUILD: ssl: silence a warning when building without NPN nor ALPN support When building with a library not offering any of these, ssl_conf_cur is not used. Can be backported to 1.8. (cherry picked from commit 5d4cafb6105ac70b57de1df2358efd5ab3844f2f) Signed-off-by: Willy Tarreau --- src/ssl_sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ssl_sock.c b/src/ssl_sock.c index f9d5f2567..163b6a13f 100644 --- a/src/ssl_sock.c +++ b/src/ssl_sock.c @@ -4044,7 +4044,7 @@ int ssl_sock_prepare_ctx(struct bind_conf *bind_conf, struct ssl_bind_conf *ssl_ struct proxy *curproxy = bind_conf->frontend; int cfgerr = 0; int verify = SSL_VERIFY_NONE; - struct ssl_bind_conf *ssl_conf_cur; + struct ssl_bind_conf __maybe_unused *ssl_conf_cur; const char *conf_ciphers; const char *conf_curves = NULL; From 52a80823e8c2d04635cc95e5d0ca9440a53441cf Mon Sep 17 00:00:00 2001 From: William Lallemand Date: Wed, 3 Jan 2018 19:15:51 +0100 Subject: [PATCH 03/52] BUG/MEDIUM: ssl: cache doesn't release shctx blocks Since the rework of the shctx with the hot list system, the ssl cache was putting session inside the hot list, without removing them. Once all block were used, they were all locked in the hot list, which was forbiding to reuse them for new sessions. Bug introduced by 4f45bb9 ("MEDIUM: shctx: separate ssl and shctx") Thanks to Jeffrey J. Persch for reporting this bug. Must be backported to 1.8. (cherry picked from commit 99b90af6213809a018e89988d7139f7048e97208) Signed-off-by: Willy Tarreau --- src/ssl_sock.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/ssl_sock.c b/src/ssl_sock.c index 163b6a13f..aecf3ddb7 100644 --- a/src/ssl_sock.c +++ b/src/ssl_sock.c @@ -3849,8 +3849,12 @@ static int sh_ssl_sess_store(unsigned char *s_id, unsigned char *data, int data_ first->len = sizeof(struct sh_ssl_sess_hdr); } - if (shctx_row_data_append(ssl_shctx, first, data, data_len) < 0) + if (shctx_row_data_append(ssl_shctx, first, data, data_len) < 0) { + shctx_row_dec_hot(ssl_shctx, first); return 0; + } + + shctx_row_dec_hot(ssl_shctx, first); return 1; } From 0c9d9a9621577582f18d3176257f26ae36d2acf4 Mon Sep 17 00:00:00 2001 From: Tim Duesterhus Date: Thu, 4 Jan 2018 19:32:13 +0100 Subject: [PATCH 04/52] BUG/MINOR: lua: Fix default value for pattern in Socket.receive The default value of the pattern in `Socket.receive` is `*l` according to the documentation and in the `socket.tcp.receive` method of Lua. The default value of `wanted` in `int hlua_socket_receive(struct lua_State *)` reflects this requirement, but the function fails to ensure this nonetheless: If no parameter is given the top of the Lua stack will have the index 1. `lua_pushinteger(L, wanted);` then pushes the default value onto the stack (with index 2). The following `lua_replace(L, 2);` then pops the top index (2) and tries to replace the index 2 with it. I am not sure why exactly that happens (possibly, because one cannot replace non-existent stack indicies), but this causes the stack index to be lost. `hlua_socket_receive_yield` then tries to read the stack index 2, to determine what to read and get the value `0`, instead of the correct HLSR_READ_LINE, thus taking the wrong branch. Fix this by ensuring that the top of the stack is not replaced by itself. This bug was introduced in commit 7e7ac32dad1e15c19152d37aaf9ea6b3f00a7226 (which is the very first commit adding the Socket class to Lua). This bugfix should be backported to every branch containing that commit: - 1.6 - 1.7 - 1.8 A test case for this bug is as follows: The 'Test' response header will contain an HTTP status line with the patch applied and will be empty without the patch applied. Replacing the `sock:receive()` with `sock:receive("*l")` will cause the status line to appear with and without the patch http.lua: core.register_action("bug", { "http-req" }, function(txn) local sock = core.tcp() sock:settimeout(60) sock:connect("127.0.0.1:80") sock:send("GET / HTTP/1.0\r\n\r\n") response = sock:receive() sock:close() txn:set_var("txn.foo", response) end) haproxy.cfg (bits omitted for brevity): global lua-load /scratch/haproxy/http.lua frontend fe bind 127.0.0.1:8080 http-request lua.bug http-response set-header Test %[var(txn.foo)] default_backend be backend be server s 127.0.0.1:80 (cherry picked from commit c6e377e6bb7de1fdb25510b18e4b49768aef0909) Signed-off-by: Willy Tarreau --- src/hlua.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/hlua.c b/src/hlua.c index abd096d03..285d25589 100644 --- a/src/hlua.c +++ b/src/hlua.c @@ -1869,7 +1869,10 @@ __LJMP static int hlua_socket_receive(struct lua_State *L) /* Set pattern. */ lua_pushinteger(L, wanted); - lua_replace(L, 2); + + /* Check if we would replace the top by itself. */ + if (lua_gettop(L) != 2) + lua_replace(L, 2); /* init bufffer, and fiil it wih prefix. */ luaL_buffinit(L, &socket->b); From c03d497583bbf2cf5892692b7e68f2d5302e878d Mon Sep 17 00:00:00 2001 From: Tim Duesterhus Date: Thu, 4 Jan 2018 19:32:14 +0100 Subject: [PATCH 05/52] DOC: lua: Fix typos in comments of hlua_socket_receive (cherry picked from commit b33754ce86cdcc877344ccedfb3a336b63154c2e) Signed-off-by: Willy Tarreau --- src/hlua.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/hlua.c b/src/hlua.c index 285d25589..3b4fc3b54 100644 --- a/src/hlua.c +++ b/src/hlua.c @@ -1810,19 +1810,19 @@ connection_empty: return 0; } -/* This Lus function gets two parameters. The first one can be string - * or a number. If the string is "*l", the user require one line. If - * the string is "*a", the user require all the content of the stream. +/* This Lua function gets two parameters. The first one can be string + * or a number. If the string is "*l", the user requires one line. If + * the string is "*a", the user requires all the contents of the stream. * If the value is a number, the user require a number of bytes equal * to the value. The default value is "*l" (a line). * - * This paraeter with a variable type is converted in integer. This + * This parameter with a variable type is converted in integer. This * integer takes this values: * -1 : read a line * -2 : read all the stream - * >0 : amount if bytes. + * >0 : amount of bytes. * - * The second parameter is optinal. It contains a string that must be + * The second parameter is optional. It contains a string that must be * concatenated with the read data. */ __LJMP static int hlua_socket_receive(struct lua_State *L) @@ -1874,7 +1874,7 @@ __LJMP static int hlua_socket_receive(struct lua_State *L) if (lua_gettop(L) != 2) lua_replace(L, 2); - /* init bufffer, and fiil it wih prefix. */ + /* init buffer, and fill it with prefix. */ luaL_buffinit(L, &socket->b); /* Check prefix. */ From 9db449a701cd9e43a04f49e2e477193fa5636323 Mon Sep 17 00:00:00 2001 From: Tim Duesterhus Date: Sat, 6 Jan 2018 19:04:45 +0100 Subject: [PATCH 06/52] BUG/MEDIUM: lua: Fix IPv6 with separate port support for Socket.connect The `socket.tcp.connect` method of Lua requires at least two parameters: The host and the port. The `Socket.connect` method of haproxy requires only one when a host with a combined port is provided. This stems from the fact that `str2sa_range` is used internally in `hlua_socket_connect`. This very fact unfortunately causes a diversion in the behaviour of Lua's socket class and haproxy's for IPv6 addresses: sock:connect("::1", "80") works fine with Lua, but fails with: connect: cannot parse destination address '::1' in haproxy, because `str2sa_range` parses the trailing `:1` as the port. This patch forcefully adds a `:` to the end of the address iff a port number greater than `0` is given as the second parameter. Technically this breaks backwards compatibility, because the docs state: > The syntax "127.0.0.1:1234" is valid. in this case, the > parameter *port* is ignored. But: The connect() call can only succeed if the second parameter is left out (which causes no breakage) or if the second parameter is an integer or a numeric string. It seems unlikely that someone would provide an address with a port number and would also provide a second parameter containing a number other than zero. Thus I feel this breakage is warranted to fix the mismatch between haproxy's socket class and Lua's one. This commit should be backported to haproxy 1.8 only, because of the possible breakage of existing Lua scripts. (cherry picked from commit 6edab865f661edf732d30232808118585cc2a1c7) Signed-off-by: Willy Tarreau --- doc/lua-api/index.rst | 2 +- src/hlua.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/doc/lua-api/index.rst b/doc/lua-api/index.rst index dceab01d7..7fe609fc9 100644 --- a/doc/lua-api/index.rst +++ b/doc/lua-api/index.rst @@ -1769,7 +1769,7 @@ Socket class "abns@", and finaly a filedescriotr can be passed with the prefix "fd@". The prefix "ipv4@", "ipv6@" and "unix@" are also supported. The port can be passed int the string. The syntax "127.0.0.1:1234" is valid. in this case, the - parameter *port* is ignored. + parameter *port* must not be set. .. js:function:: Socket.connect_ssl(socket, address, port) diff --git a/src/hlua.c b/src/hlua.c index 3b4fc3b54..3d5a81cac 100644 --- a/src/hlua.c +++ b/src/hlua.c @@ -2333,9 +2333,22 @@ __LJMP static int hlua_socket_connect(struct lua_State *L) WILL_LJMP(luaL_error(L, "connect: cannot use socket on other thread")); ip = MAY_LJMP(luaL_checkstring(L, 2)); - if (lua_gettop(L) >= 3) + if (lua_gettop(L) >= 3) { + luaL_Buffer b; port = MAY_LJMP(luaL_checkinteger(L, 3)); + /* Force the ip to end with a colon, to support IPv6 addresses + * that are not enclosed within square brackets. + */ + if (port > 0) { + luaL_buffinit(L, &b); + luaL_addstring(&b, ip); + luaL_addchar(&b, ':'); + luaL_pushresult(&b); + ip = lua_tolstring(L, lua_gettop(L), NULL); + } + } + /* check for connection break. If some data where read, return it. */ peer = xref_get_peer_and_lock(&socket->xref); if (!peer) { From c1a1297d0963f0a60e00d419fdc3c0b0bc836e60 Mon Sep 17 00:00:00 2001 From: Tim Duesterhus Date: Sat, 6 Jan 2018 19:16:25 +0100 Subject: [PATCH 07/52] BUG/MINOR: lua: Fix return value of Socket.settimeout The `socket.tcp.settimeout` method of Lua returns `1` in all cases, while the `Socket.settimeout` method of haproxy returns `0` in all cases. This breaks the `socket.http` module, because it validates the return value of `settimeout`. This bug was introduced in commit 7e7ac32dad1e15c19152d37aaf9ea6b3f00a7226 (which is the very first commit adding the Socket class to Lua). This bugfix should be backported to every branch containing that commit: - 1.6 - 1.7 - 1.8 A test case for this bug is as follows: The 'Test' response header will contain an HTTP status code with the patch applied and will be zero (nil) without the patch applied. http.lua: http = require("socket.http") core.register_action("bug", { "http-req" }, function(txn) local b, c, h = http.request { url = "http://93.184.216.34", headers = { Host = "example.com" }, create = core.tcp, redirect = false } txn:set_var("txn.foo", c) end) haproxy.cfg: global lua-load /scratch/haproxy/http.lua frontend fe bind 127.0.0.1:8080 http-request lua.bug http-response set-header Test %[var(txn.foo)] default_backend be backend be server s example.com:80 (cherry picked from commit 119a5f10e47f3507e58116002583e1226473485d) Signed-off-by: Willy Tarreau --- src/hlua.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hlua.c b/src/hlua.c index 3d5a81cac..fa629ba94 100644 --- a/src/hlua.c +++ b/src/hlua.c @@ -2490,7 +2490,7 @@ __LJMP static int hlua_socket_settimeout(struct lua_State *L) s->res.wto = tmout; xref_unlock(&socket->xref, peer); - return 0; + return 1; } __LJMP static int hlua_socket_new(lua_State *L) From 8b803aabd3816f8bee26810eee7bc6c914b4652b Mon Sep 17 00:00:00 2001 From: Olivier Houchard Date: Mon, 8 Jan 2018 16:28:57 +0100 Subject: [PATCH 08/52] MINOR: dns: Handle SRV record weight correctly. A SRV record weight can range from 0 to 65535, while haproxy weight goes from 0 to 256, so we have to divide it by 256 before handing it to haproxy. Also, a SRV record with a weight of 0 doesn't mean the server shouldn't be used, so use a minimum weight of 1. This should probably be backported to 1.8. (cherry picked from commit 2ec2db9725fb54a76c726c5b8cc502071c575d28) Signed-off-by: Willy Tarreau --- include/types/dns.h | 2 +- src/dns.c | 19 ++++++++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/include/types/dns.h b/include/types/dns.h index b1f068a61..9b1d08df7 100644 --- a/include/types/dns.h +++ b/include/types/dns.h @@ -143,7 +143,7 @@ struct dns_answer_item { int16_t class; /* query class */ int32_t ttl; /* response TTL */ int16_t priority; /* SRV type priority */ - int16_t weight; /* SRV type weight */ + uint16_t weight; /* SRV type weight */ int16_t port; /* SRV type port */ int16_t data_len; /* number of bytes in target below */ struct sockaddr address; /* IPv4 or IPv6, network format */ diff --git a/src/dns.c b/src/dns.c index fceef2e48..a957710ed 100644 --- a/src/dns.c +++ b/src/dns.c @@ -522,10 +522,16 @@ static void dns_check_dns_response(struct dns_resolution *res) if (srv->srvrq == srvrq && srv->svc_port == item->port && item->data_len == srv->hostname_dn_len && !memcmp(srv->hostname_dn, item->target, item->data_len)) { - if (srv->uweight != item->weight) { + int ha_weight; + + /* Make sure weight is at least 1, so + * that the server will be used. + */ + ha_weight = item->weight / 256 + 1; + if (srv->uweight != ha_weight) { char weight[9]; - snprintf(weight, sizeof(weight), "%d", item->weight); + snprintf(weight, sizeof(weight), "%d", ha_weight); server_parse_weight_change_request(srv, weight); } HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock); @@ -547,6 +553,7 @@ static void dns_check_dns_response(struct dns_resolution *res) if (srv) { const char *msg = NULL; char weight[9]; + int ha_weight; char hostname[DNS_MAX_NAME_SIZE]; if (dns_dn_label_to_str(item->target, item->data_len+1, @@ -563,7 +570,13 @@ static void dns_check_dns_response(struct dns_resolution *res) if ((srv->check.state & CHK_ST_CONFIGURED) && !(srv->flags & SRV_F_CHECKPORT)) srv->check.port = item->port; - snprintf(weight, sizeof(weight), "%d", item->weight); + + /* Make sure weight is at least 1, so + * that the server will be used. + */ + ha_weight = item->weight / 256 + 1; + + snprintf(weight, sizeof(weight), "%d", ha_weight); server_parse_weight_change_request(srv, weight); HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock); } From de445a05b47e5d01756e41db1d33677999a2adc3 Mon Sep 17 00:00:00 2001 From: William Lallemand Date: Tue, 9 Jan 2018 23:12:27 +0100 Subject: [PATCH 09/52] BUG/MEDIUM: mworker: execvp failure depending on argv[0] The copy_argv() function lacks a check on '-' to remove the -x, -sf and -st parameters. When reloading a master process with a path starting by /st, /sf, or /x.. the copy_argv() function skipped argv[0] leading to an execvp() without the binary. (cherry picked from commit 29f690c94574666f0789af5254890c498011e2ed) Signed-off-by: Willy Tarreau --- src/haproxy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/haproxy.c b/src/haproxy.c index e98420e22..20b18f854 100644 --- a/src/haproxy.c +++ b/src/haproxy.c @@ -1242,7 +1242,8 @@ static char **copy_argv(int argc, char **argv) while (i < argc) { /* -sf or -st or -x */ - if ((argv[i][1] == 's' && (argv[i][2] == 'f' || argv[i][2] == 't')) || argv[i][1] == 'x' ) { + if (i > 0 && argv[i][0] == '-' && + ((argv[i][1] == 's' && (argv[i][2] == 'f' || argv[i][2] == 't')) || argv[i][1] == 'x' )) { /* list of pids to finish ('f') or terminate ('t') or unix socket (-x) */ i++; while (i < argc && argv[i][0] != '-') { From 74cf455e471e2d5cb98ca28ba98fe36a29e8f846 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Thu, 4 Jan 2018 18:49:31 +0100 Subject: [PATCH 10/52] MINOR: hathreads: add support for gcc < 4.7 Till now the use of __atomic_* gcc builtins required gcc >= 4.7. Since some supported and quite common operating systems like CentOS 6 still come with older versions (4.4) and the mapping to the older builtins is reasonably simple, let's implement it. This code is only used for gcc < 4.7. It has been quickly tested on a machine using gcc 4.4.4 and provided expected results. This patch should be backported to 1.8. (cherry picked from commit 1a69af6d3892fe1946bb8babb3044d2d26afd46e) Signed-off-by: Willy Tarreau --- include/common/hathreads.h | 54 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/include/common/hathreads.h b/include/common/hathreads.h index 9782ca9a0..503abbec3 100644 --- a/include/common/hathreads.h +++ b/include/common/hathreads.h @@ -99,6 +99,58 @@ extern THREAD_LOCAL unsigned long tid_bit; /* The bit corresponding to the threa /* TODO: thread: For now, we rely on GCC builtins but it could be a good idea to * have a header file regrouping all functions dealing with threads. */ + +#if defined(__GNUC__) && (__GNUC__ < 4 || __GNUC__ == 4 && __GNUC_MINOR__ < 7) +/* gcc < 4.7 */ + +#define HA_ATOMIC_ADD(val, i) __sync_add_and_fetch(val, i) +#define HA_ATOMIC_SUB(val, i) __sync_sub_and_fetch(val, i) +#define HA_ATOMIC_AND(val, flags) __sync_and_and_fetch(val, flags) +#define HA_ATOMIC_OR(val, flags) __sync_or_and_fetch(val, flags) + +/* the CAS is a bit complicated. The older API doesn't support returning the + * value and the swap's result at the same time. So here we take what looks + * like the safest route, consisting in using the boolean version guaranteeing + * that the operation was performed or not, and we snoop a previous value. If + * the compare succeeds, we return. If it fails, we return the previous value, + * but only if it differs from the expected one. If it's the same it's a race + * thus we try again to avoid confusing a possibly sensitive caller. + */ +#define HA_ATOMIC_CAS(val, old, new) \ + ({ \ + typeof((val)) __val = (val); \ + typeof((old)) __oldp = (old); \ + typeof(*(old)) __oldv; \ + typeof((new)) __new = (new); \ + int __ret; \ + do { \ + __oldv = *__val; \ + __ret = __sync_bool_compare_and_swap(__val, *__oldp, __new); \ + } while (!__ret && *__oldp == __oldv); \ + if (!__ret) \ + *__oldp = __oldv; \ + __ret; \ + }) + +#define HA_ATOMIC_XCHG(val, new) \ + ({ \ + typeof((val)) __val = (val); \ + typeof(*(val)) __old; \ + typeof((new)) __new = (new); \ + do { __old = *__val; \ + } while (!__sync_bool_compare_and_swap(__val, __old, __new)); \ + __old; \ + }) +#define HA_ATOMIC_STORE(val, new) \ + ({ \ + typeof((val)) __val = (val); \ + typeof(*(val)) __old; \ + typeof((new)) __new = (new); \ + do { __old = *__val; \ + } while (!__sync_bool_compare_and_swap(__val, __old, __new)); \ + }) +#else +/* gcc >= 4.7 */ #define HA_ATOMIC_CAS(val, old, new) __atomic_compare_exchange_n(val, old, new, 0, 0, 0) #define HA_ATOMIC_ADD(val, i) __atomic_add_fetch(val, i, 0) #define HA_ATOMIC_SUB(val, i) __atomic_sub_fetch(val, i, 0) @@ -106,6 +158,8 @@ extern THREAD_LOCAL unsigned long tid_bit; /* The bit corresponding to the threa #define HA_ATOMIC_OR(val, flags) __atomic_or_fetch(val, flags, 0) #define HA_ATOMIC_XCHG(val, new) __atomic_exchange_n(val, new, 0) #define HA_ATOMIC_STORE(val, new) __atomic_store_n(val, new, 0) +#endif + #define HA_ATOMIC_UPDATE_MAX(val, new) \ ({ \ typeof(*(val)) __old = *(val); \ From a49366cebf074a247c1da642b472e6623a84c6c2 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Thu, 11 Jan 2018 14:20:43 +0000 Subject: [PATCH 11/52] BUILD/MINOR: ancient gcc versions atomic fix Commit 1a69af6d3892fe1946bb8babb3044d2d26afd46e introduced code for atomic prior to 4.7. Unfortunately clang uses as well those constants which is misleading. (cherry picked from commit ec5e84552a0a2e0767431731c4f40d5627dc7bdd) Signed-off-by: Willy Tarreau --- include/common/hathreads.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/common/hathreads.h b/include/common/hathreads.h index 503abbec3..5f0b96954 100644 --- a/include/common/hathreads.h +++ b/include/common/hathreads.h @@ -100,7 +100,7 @@ extern THREAD_LOCAL unsigned long tid_bit; /* The bit corresponding to the threa /* TODO: thread: For now, we rely on GCC builtins but it could be a good idea to * have a header file regrouping all functions dealing with threads. */ -#if defined(__GNUC__) && (__GNUC__ < 4 || __GNUC__ == 4 && __GNUC_MINOR__ < 7) +#if defined(__GNUC__) && (__GNUC__ < 4 || __GNUC__ == 4 && __GNUC_MINOR__ < 7) && !defined(__clang__) /* gcc < 4.7 */ #define HA_ATOMIC_ADD(val, i) __sync_add_and_fetch(val, i) From 72a0acaa4c5e8c4c09bd129ecac2352c8f6ed701 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Fri, 12 Jan 2018 10:42:12 +0100 Subject: [PATCH 12/52] BUG/MEDIUM: stream: properly handle client aborts during redispatch James Mc Bride reported an interesting case affecting all versions since at least 1.5 : if a client aborts a connection on an empty buffer at the exact moment a server redispatch happens, the CF_SHUTW_NOW flag on the channel is immediately turned into CF_SHUTW, which is not caught by check_req_may_abort(), leading the redispatch to be performed anyway with the channel marked as shut in both directions while the stream interface correctly establishes. This situation makes no sense. Ultimately the transfer times out and the server-side stream interface remains in EST state while the client is in CLO state, and this case doesn't correspond to anything we can handle in process_stream, leading to poll() being woken up all the time without any progress being made. And the session cannot even be killed from the CLI. So we must ensure that check_req_may_abort() also considers the case where the channel is already closed, which is what this patch does. Thanks to James for providing detailed captures allowing to diagnose the problem. This fix must be backported to all maintained versions. (cherry picked from commit d651ba14d44a7350b4506e9de7c7b44cc18a6bff) Signed-off-by: Willy Tarreau --- src/stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/stream.c b/src/stream.c index 39ee9ba55..60d3eff44 100644 --- a/src/stream.c +++ b/src/stream.c @@ -833,7 +833,7 @@ static void sess_establish(struct stream *s) static int check_req_may_abort(struct channel *req, struct stream *s) { return ((req->flags & (CF_READ_ERROR)) || - ((req->flags & CF_SHUTW_NOW) && /* empty and client aborted */ + ((req->flags & (CF_SHUTW_NOW|CF_SHUTW)) && /* empty and client aborted */ (channel_is_empty(req) || ((s->be->options & PR_O_ABRT_CLOSE) && !(s->si[0].flags & SI_FL_CLEAN_ABRT))))); } From a9430f93ab02a759c5dd490056247a9b872b9e94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=E9r=F4me=20Magnin?= Date: Mon, 15 Jan 2018 14:01:17 +0100 Subject: [PATCH 13/52] DOC: clarify the scope of ssl_fc_is_resumed Clarify that it's for incoming connections. (cherry picked from commit 4a326cba5b6720c95199e7f919f69a1ae682efb2) Signed-off-by: Willy Tarreau --- doc/configuration.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/configuration.txt b/doc/configuration.txt index 5e70ab2e8..208ed523a 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -14503,7 +14503,8 @@ ssl_fc_has_sni : boolean ssl_fc_is_resumed : boolean Returns true if the SSL/TLS session has been resumed through the use of - SSL session cache or TLS tickets. + SSL session cache or TLS tickets on an incoming connection over an SSL/TLS + transport layer. ssl_fc_npn : string This extracts the Next Protocol Negotiation field from an incoming connection From e80014590f274df2edd3e0532c8a2b7570c7986d Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 15 Jan 2018 18:59:16 +0100 Subject: [PATCH 14/52] CONTRIB: debug: fix a few flags definitions Commit f4cfcf9 ("MINOR: debug/flags: Add missing flags") added a number of missing flags but a few of them were incorrect, hiding real values. This can be backported to 1.8. (cherry picked from commit 260bf5c1064e4d2da61b4c73f69b7716cc8eb7ed) Signed-off-by: Willy Tarreau --- contrib/debug/flags.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/contrib/debug/flags.c b/contrib/debug/flags.c index db601aa64..558072f26 100644 --- a/contrib/debug/flags.c +++ b/contrib/debug/flags.c @@ -43,8 +43,8 @@ void show_chn_ana(unsigned int f) SHOW_FLAG(f, AN_REQ_FLT_XFER_DATA); SHOW_FLAG(f, AN_REQ_FLT_END); - SHOW_FLAG(f, AN_REQ_FLT_START_FE); - SHOW_FLAG(f, AN_REQ_FLT_START_BE); + SHOW_FLAG(f, AN_RES_FLT_START_FE); + SHOW_FLAG(f, AN_RES_FLT_START_BE); SHOW_FLAG(f, AN_RES_INSPECT); SHOW_FLAG(f, AN_RES_WAIT_HTTP); SHOW_FLAG(f, AN_RES_STORE_RULES); @@ -127,8 +127,6 @@ void show_conn_flags(unsigned int f) SHOW_FLAG(f, CO_FL_WAIT_L6_CONN); SHOW_FLAG(f, CO_FL_WAIT_L4_CONN); SHOW_FLAG(f, CO_FL_CONNECTED); - SHOW_FLAG(f, CO_FL_NOTIFY_DATA); - SHOW_FLAG(f, CO_FL_NOTIFY_DONE); SHOW_FLAG(f, CO_FL_ERROR); SHOW_FLAG(f, CO_FL_SOCK_WR_SH); SHOW_FLAG(f, CO_FL_SOCK_RD_SH); From 13165eed6abb8569d56f6eec404f4462d0cd7115 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Wed, 17 Jan 2018 15:48:53 +0100 Subject: [PATCH 15/52] BUG/MINOR: poll: too large size allocation for FD events Commit 80da05a ("MEDIUM: poll: do not use FD_* macros anymore") which appeared in 1.5-dev18 and which was backported to 1.4.23 made explicit use of arrays of FDs mapped to unsigned ints. The problem lies in the allocated size for poll(), as the resulting size is in bits and not bytes, resulting in poll() arrays being 8 times larger than necessary! In practice poll() is not used on highly loaded systems, explaining why nobody noticed. But it definetely has to be addressed. This fix needs to be backported to all stable versions. (cherry picked from commit cc35923c329bbadc78c1c026ce4e45e115852abe) Signed-off-by: Willy Tarreau --- src/ev_poll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ev_poll.c b/src/ev_poll.c index f9e445113..610509bd6 100644 --- a/src/ev_poll.c +++ b/src/ev_poll.c @@ -205,7 +205,7 @@ REGPRM1 static int _do_init(struct poller *p) int fd_evts_bytes; p->private = NULL; - fd_evts_bytes = (global.maxsock + sizeof(**fd_evts) - 1) / sizeof(**fd_evts) * sizeof(**fd_evts); + fd_evts_bytes = (global.maxsock + sizeof(**fd_evts) * 8 - 1) / (sizeof(**fd_evts) * 8) * sizeof(**fd_evts); if ((fd_evts[DIR_RD] = calloc(1, fd_evts_bytes)) == NULL) goto fail_srevt; From 07325f084d469d8e8d38d1e5a80f1d82bb641871 Mon Sep 17 00:00:00 2001 From: Emeric Brun Date: Mon, 22 Jan 2018 15:10:08 +0100 Subject: [PATCH 16/52] BUG/MEDIUM: peers: fix expire date wasn't updated if entry is modified remotely. The stktable_touch_remote considers the expire field stored in the stksess struct. The expire field was updated on the a newly created stksess to store. But if the stksess with a same key is still present the expire was not updated. This patch postpones the update of the expire field of the stksess just before processing the "touch". These bug was introduced in commit: MEDIUM: threads/stick-tables: handle multithreads on stick tables. And the fix should be backported on 1.8. (cherry picked from commit 55482913956581223dc9b54e480675610e2e177e) Signed-off-by: Willy Tarreau --- src/peers.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/peers.c b/src/peers.c index 988cd71e9..c56ed3afc 100644 --- a/src/peers.c +++ b/src/peers.c @@ -1195,9 +1195,7 @@ switchstate: newts = stksess_new(st->table, NULL); if (!newts) goto ignore_msg; - /* Force expiratiion to remote date - in case of first insert */ - newts->expire = tick_add(now_ms, expire); + if (st->table->type == SMP_T_STR) { unsigned int to_read, to_store; @@ -1351,6 +1349,9 @@ switchstate: } } + /* Force new expiration */ + ts->expire = tick_add(now_ms, expire); + HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock); stktable_touch_remote(st->table, ts, 1); From eab977ce47868d17848bfde828c303c7e515abb6 Mon Sep 17 00:00:00 2001 From: Olivier Houchard Date: Wed, 17 Jan 2018 17:39:34 +0100 Subject: [PATCH 17/52] MINOR: servers: Don't report duplicate dyncookies for disabled servers. Especially with server-templates, it can happen servers starts with a placeholder IP, in the disabled state. In this case, we don't want to report that the same cookie was generated for multiple servers. So defer the test until the server is enabled. This should be backported to 1.8. (cherry picked from commit e9bad0a9361a761f2f304dcca8ddd997c05627c2) Signed-off-by: Willy Tarreau --- src/server.c | 50 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/src/server.c b/src/server.c index a37e91968..3901e7d8b 100644 --- a/src/server.c +++ b/src/server.c @@ -86,10 +86,34 @@ int srv_getinter(const struct check *check) return (check->fastinter)?(check->fastinter):(check->inter); } -void srv_set_dyncookie(struct server *s) +/* + * Check that we did not get a hash collision. + * Unlikely, but it can happen. + */ +static inline void srv_check_for_dup_dyncookie(struct server *s) { struct proxy *p = s->proxy; struct server *tmpserv; + + for (tmpserv = p->srv; tmpserv != NULL; + tmpserv = tmpserv->next) { + if (tmpserv == s) + continue; + if (tmpserv->next_admin & SRV_ADMF_FMAINT) + continue; + if (tmpserv->cookie && + strcmp(tmpserv->cookie, s->cookie) == 0) { + ha_warning("We generated two equal cookies for two different servers.\n" + "Please change the secret key for '%s'.\n", + s->proxy->id); + } + } + +} + +void srv_set_dyncookie(struct server *s) +{ + struct proxy *p = s->proxy; char *tmpbuf; unsigned long long hash_value; size_t key_len; @@ -136,21 +160,13 @@ void srv_set_dyncookie(struct server *s) if (!s->cookie) return; s->cklen = 16; - /* - * Check that we did not get a hash collision. - * Unlikely, but it can happen. + + /* Don't bother checking if the dyncookie is duplicated if + * the server is marked as "disabled", maybe it doesn't have + * its real IP yet, but just a place holder. */ - for (tmpserv = p->srv; tmpserv != NULL; - tmpserv = tmpserv->next) { - if (tmpserv == s) - continue; - if (tmpserv->cookie && - strcmp(tmpserv->cookie, s->cookie) == 0) { - ha_warning("We generated two equal cookies for two different servers.\n" - "Please change the secret key for '%s'.\n", - s->proxy->id); - } - } + if (!(s->next_admin & SRV_ADMF_FMAINT)) + srv_check_for_dup_dyncookie(s); } /* @@ -4398,6 +4414,10 @@ static int cli_parse_enable_server(char **args, struct appctx *appctx, void *pri return 1; srv_adm_set_ready(sv); + if (!(sv->flags & SRV_F_COOKIESET) + && (sv->proxy->ck_opts & PR_CK_DYNAMIC) && + sv->cookie) + srv_check_for_dup_dyncookie(sv); return 1; } From 7b7e3077e91cf82551c17a4912d0ed85daea5f13 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sat, 20 Jan 2018 18:12:15 +0100 Subject: [PATCH 18/52] MINOR: global/threads: move cpu_map at the end of the global struct The "thread" part is 32kB long, better move it at the end of the structure since it's only used during initialization, to keep the rest grouped together. Should be backported to 1.8 to ease backporting of upcoming patches, no functional impact. (cherry picked from commit f4571a027f20a3866018a9a6749fd456370943e1) Signed-off-by: Willy Tarreau --- include/types/global.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/types/global.h b/include/types/global.h index 1f332074b..6f3fedc33 100644 --- a/include/types/global.h +++ b/include/types/global.h @@ -163,14 +163,14 @@ struct global { mode_t mode; /* 0 to leave unchanged */ } ux; } unix_bind; + struct proxy *stats_fe; /* the frontend holding the stats settings */ + struct vars vars; /* list of variables for the process scope. */ #ifdef USE_CPU_AFFINITY struct { unsigned long proc[LONGBITS]; /* list of CPU masks for the 32/64 first processes */ unsigned long thread[LONGBITS][LONGBITS]; /* list of CPU masks for the 32/64 first threads per process */ } cpu_map; #endif - struct proxy *stats_fe; /* the frontend holding the stats settings */ - struct vars vars; /* list of variables for the process scope. */ }; extern struct global global; From 00065af94c9d726c954a32183036deabc7911517 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sat, 20 Jan 2018 18:19:22 +0100 Subject: [PATCH 19/52] MINOR: threads: add a MAX_THREADS define instead of LONGBITS This one allows not to inflate some structures when threads are disabled. Now struct global is 1.4 kB instead of 33 kB. Should be backported to 1.8 for ease of backporting of upcoming patches. (cherry picked from commit 421f02e738999dec9f52665023918e22580197fd) Signed-off-by: Willy Tarreau --- include/common/hathreads.h | 4 ++++ include/types/global.h | 2 +- src/cfgparse.c | 14 +++++++------- src/haproxy.c | 2 +- src/listener.c | 2 +- 5 files changed, 14 insertions(+), 10 deletions(-) diff --git a/include/common/hathreads.h b/include/common/hathreads.h index 5f0b96954..f81953686 100644 --- a/include/common/hathreads.h +++ b/include/common/hathreads.h @@ -30,6 +30,8 @@ extern THREAD_LOCAL unsigned long tid_bit; /* The bit corresponding to the threa #ifndef USE_THREAD +#define MAX_THREADS 1 + #define __decl_hathreads(decl) #define HA_ATOMIC_CAS(val, old, new) ({((*val) == (*old)) ? (*(val) = (new) , 1) : (*(old) = *(val), 0);}) @@ -95,6 +97,8 @@ extern THREAD_LOCAL unsigned long tid_bit; /* The bit corresponding to the threa #include #include +#define MAX_THREADS LONGBITS + #define __decl_hathreads(decl) decl /* TODO: thread: For now, we rely on GCC builtins but it could be a good idea to diff --git a/include/types/global.h b/include/types/global.h index 6f3fedc33..5c5cf732e 100644 --- a/include/types/global.h +++ b/include/types/global.h @@ -168,7 +168,7 @@ struct global { #ifdef USE_CPU_AFFINITY struct { unsigned long proc[LONGBITS]; /* list of CPU masks for the 32/64 first processes */ - unsigned long thread[LONGBITS][LONGBITS]; /* list of CPU masks for the 32/64 first threads per process */ + unsigned long thread[LONGBITS][MAX_THREADS]; /* list of CPU masks for the 32/64 first threads per process */ } cpu_map; #endif }; diff --git a/src/cfgparse.c b/src/cfgparse.c index efd56c466..567f3acbf 100644 --- a/src/cfgparse.c +++ b/src/cfgparse.c @@ -1155,12 +1155,6 @@ int cfg_parse_global(const char *file, int linenum, char **args, int kwm) goto out; } global.nbthread = atol(args[1]); - if (global.nbthread < 1 || global.nbthread > LONGBITS) { - ha_alert("parsing [%s:%d] : '%s' must be between 1 and %d (was %d).\n", - file, linenum, args[0], LONGBITS, global.nbthread); - err_code |= ERR_ALERT | ERR_FATAL; - goto out; - } #ifndef USE_THREAD if (global.nbthread > 1) { ha_alert("HAProxy is not compiled with threads support, please check build options for USE_THREAD.\n"); @@ -1169,6 +1163,12 @@ int cfg_parse_global(const char *file, int linenum, char **args, int kwm) goto out; } #endif + if (global.nbthread < 1 || global.nbthread > MAX_THREADS) { + ha_alert("parsing [%s:%d] : '%s' must be between 1 and %d (was %d).\n", + file, linenum, args[0], MAX_THREADS, global.nbthread); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } } else if (!strcmp(args[0], "maxconn")) { if (alertif_too_many_args(1, file, linenum, args, &err_code)) @@ -1781,7 +1781,7 @@ int cfg_parse_global(const char *file, int linenum, char **args, int kwm) } /* Mapping at the thread level */ - for (j = 0; j < LONGBITS; j++) { + for (j = 0; j < MAX_THREADS; j++) { /* Np mapping for this thread */ if (!(thread & (1UL << j))) continue; diff --git a/src/haproxy.c b/src/haproxy.c index 20b18f854..a8d0fad87 100644 --- a/src/haproxy.c +++ b/src/haproxy.c @@ -2986,7 +2986,7 @@ int main(int argc, char **argv) if (global.cpu_map.proc[relative_pid-1]) global.cpu_map.thread[relative_pid-1][i] &= global.cpu_map.proc[relative_pid-1]; - if (i < LONGBITS && /* only the first 32/64 threads may be pinned */ + if (i < MAX_THREADS && /* only the first 32/64 threads may be pinned */ global.cpu_map.thread[relative_pid-1][i]) {/* only do this if the thread has a THREAD map */ #if defined(__FreeBSD__) || defined(__NetBSD__) cpuset_t cpuset; diff --git a/src/listener.c b/src/listener.c index dfd36cca3..fd7072684 100644 --- a/src/listener.c +++ b/src/listener.c @@ -963,7 +963,7 @@ static int bind_parse_process(char **args, int cur_arg, struct proxy *px, struct conf->bind_proc |= proc; if (thread) { - for (i = 0; i < LONGBITS; i++) + for (i = 0; i < MAX_THREADS; i++) if (!proc || (proc & (1UL << i))) conf->bind_thread[i] |= thread; } From f3703dd53d5283ae2c0bc5d99c3569e24b7d996b Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sat, 20 Jan 2018 19:30:13 +0100 Subject: [PATCH 20/52] MINOR: global: add some global activity counters to help debugging A number of counters have been added at special places helping better understanding certain bug reports. These counters are maintained per thread and are shown using "show activity" on the CLI. The "clear counters" commands also reset these counters. The output is sent as a single write(), which currently produces up to about 7 kB of data for 64 threads. If more counters are added, it may be necessary to write into multiple buffers, or to reset the counters. To backport to 1.8 to help collect more detailed bug reports. (cherry picked from commit d80cb4ee1386cb5853170371d11e41284739e9d4) Signed-off-by: Willy Tarreau --- doc/management.txt | 15 ++++++++++++- include/types/global.h | 27 ++++++++++++++++++++++++ src/cli.c | 48 ++++++++++++++++++++++++++++++++++++++++++ src/connection.c | 4 +++- src/ev_epoll.c | 17 ++++++++++++--- src/ev_kqueue.c | 15 +++++++++++-- src/ev_poll.c | 28 ++++++++++++++++++------ src/ev_select.c | 18 +++++++++++----- src/fd.c | 12 ++++++++--- src/haproxy.c | 21 +++++++++++++----- src/stats.c | 2 ++ src/stream.c | 2 ++ src/task.c | 7 +++++- 13 files changed, 188 insertions(+), 28 deletions(-) diff --git a/doc/management.txt b/doc/management.txt index 280ab1792..8cec7ea15 100644 --- a/doc/management.txt +++ b/doc/management.txt @@ -1321,7 +1321,8 @@ add map clear counters Clear the max values of the statistics counters in each proxy (frontend & - backend) and in each server. The accumulated counters are not affected. This + backend) and in each server. The accumulated counters are not affected. The + internal activity counters reported by "show activity" are also reset. This can be used to get clean counters after an incident, without having to restart nor to clear traffic counters. This command is restricted and can only be issued on sockets configured for levels "operator" or "admin". @@ -1861,6 +1862,18 @@ show fd [] that the output format may evolve over time so this output must not be parsed by tools designed to be durable. +show activity + Reports some counters about internal events that will help developers and + more generally people who know haproxy well enough to narrow down the causes + of reports of abnormal behaviours. A typical example would be a properly + running process never sleeping and eating 100% of the CPU. The output fields + will be made of one line per metric, and per-thread counters on the same + line. These counters are 32-bit and will wrap during the process' life, which + is not a problem since calls to this command will typically be performed + twice. The fields are purposely not documented so that their exact meaning is + verified in the code where the counters are fed. These values are also reset + by the "clear counters" command. + show info [typed|json] Dump info about haproxy status on current process. If "typed" is passed as an optional argument, field numbers, names and types are emitted as well so that diff --git a/include/types/global.h b/include/types/global.h index 5c5cf732e..bd7761cd6 100644 --- a/include/types/global.h +++ b/include/types/global.h @@ -173,7 +173,34 @@ struct global { #endif }; +/* per-thread activity reports. It's important that it's aligned on cache lines + * because some elements will be updated very often. Most counters are OK on + * 32-bit since this will be used during debugging sessions for troubleshooting + * in iterative mode. + */ +struct activity { + unsigned int loops; // complete loops in run_poll_loop() + unsigned int wake_cache; // active fd_cache prevented poll() from sleeping + unsigned int wake_tasks; // active tasks prevented poll() from sleeping + unsigned int wake_applets; // active applets prevented poll() from sleeping + unsigned int wake_signal; // pending signal prevented poll() from sleeping + unsigned int poll_exp; // number of times poll() sees an expired timeout (includes wake_*) + unsigned int poll_drop; // poller dropped a dead FD from the update list + unsigned int poll_dead; // poller woke up with a dead FD + unsigned int poll_skip; // poller skipped another thread's FD + unsigned int fd_skip; // fd cache skipped another thread's FD + unsigned int fd_lock; // fd cache skipped a locked FD + unsigned int fd_del; // fd cache detected a deleted FD + unsigned int conn_dead; // conn_fd_handler woke up on an FD indicating a dead connection + unsigned int stream; // calls to process_stream() + unsigned int empty_rq; // calls to process_runnable_tasks() with nothing for the thread + unsigned int long_rq; // process_runnable_tasks() left with tasks in the run queue + char __pad[0]; // unused except to check remaining room + char __end[0] __attribute__((aligned(64))); // align size to 64. +}; + extern struct global global; +extern struct activity activity[MAX_THREADS]; extern int pid; /* current process id */ extern int relative_pid; /* process id starting at 1 */ extern unsigned long pid_bit; /* bit corresponding to the process id */ diff --git a/src/cli.c b/src/cli.c index 149ecd7c8..3e62c311b 100644 --- a/src/cli.c +++ b/src/cli.c @@ -855,6 +855,53 @@ static int cli_io_handler_show_fd(struct appctx *appctx) return 1; } +/* This function dumps some activity counters used by developers and support to + * rule out some hypothesis during bug reports. It returns 0 if the output + * buffer is full and it needs to be called again, otherwise non-zero. It dumps + * everything at once in the buffer and is not designed to do it in multiple + * passes. + */ +static int cli_io_handler_show_activity(struct appctx *appctx) +{ + struct stream_interface *si = appctx->owner; + int thr; + + if (unlikely(si_ic(si)->flags & (CF_WRITE_ERROR|CF_SHUTW))) + return 1; + + chunk_reset(&trash); + + chunk_appendf(&trash, "thread_id: %u", tid); + chunk_appendf(&trash, "\ndate_now: %lu.%06lu", (long)now.tv_sec, (long)now.tv_usec); + chunk_appendf(&trash, "\nloops:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].loops); + chunk_appendf(&trash, "\nwake_cache:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].wake_cache); + chunk_appendf(&trash, "\nwake_tasks:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].wake_tasks); + chunk_appendf(&trash, "\nwake_applets:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].wake_applets); + chunk_appendf(&trash, "\nwake_signal:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].wake_signal); + chunk_appendf(&trash, "\npoll_exp:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].poll_exp); + chunk_appendf(&trash, "\npoll_drop:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].poll_drop); + chunk_appendf(&trash, "\npoll_dead:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].poll_dead); + chunk_appendf(&trash, "\npoll_skip:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].poll_skip); + chunk_appendf(&trash, "\nfd_skip:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].fd_skip); + chunk_appendf(&trash, "\nfd_lock:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].fd_lock); + chunk_appendf(&trash, "\nfd_del:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].fd_del); + chunk_appendf(&trash, "\nconn_dead:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].conn_dead); + chunk_appendf(&trash, "\nstream:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].stream); + chunk_appendf(&trash, "\nempty_rq:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].empty_rq); + chunk_appendf(&trash, "\nlong_rq:"); for (thr = 0; thr < global.nbthread; thr++) chunk_appendf(&trash, " %u", activity[thr].long_rq); + + chunk_appendf(&trash, "\n"); + + if (ci_putchk(si_ic(si), &trash) == -1) { + chunk_reset(&trash); + chunk_printf(&trash, "[output too large, cannot dump]\n"); + si_applet_cant_put(si); + } + + /* dump complete */ + return 1; +} + /* * CLI IO handler for `show cli sockets`. * Uses ctx.cli.p0 to store the restart pointer. @@ -1428,6 +1475,7 @@ static struct cli_kw_list cli_kws = {{ },{ { { "show", "env", NULL }, "show env [var] : dump environment variables known to the process", cli_parse_show_env, cli_io_handler_show_env, NULL }, { { "show", "cli", "sockets", NULL }, "show cli sockets : dump list of cli sockets", cli_parse_default, cli_io_handler_show_cli_sock, NULL }, { { "show", "fd", NULL }, "show fd [num] : dump list of file descriptors in use", cli_parse_show_fd, cli_io_handler_show_fd, NULL }, + { { "show", "activity", NULL }, "show activity : show per-thread activity stats (for support/developers)", cli_parse_default, cli_io_handler_show_activity, NULL }, { { "_getsocks", NULL }, NULL, _getsocks, NULL }, {{},} }}; diff --git a/src/connection.c b/src/connection.c index 0f8acb02d..48d7a64de 100644 --- a/src/connection.c +++ b/src/connection.c @@ -63,8 +63,10 @@ void conn_fd_handler(int fd) struct connection *conn = fdtab[fd].owner; unsigned int flags; - if (unlikely(!conn)) + if (unlikely(!conn)) { + activity[tid].conn_dead++; return; + } conn_refresh_polling_flags(conn); conn->flags |= CO_FL_WILL_UPDATE; diff --git a/src/ev_epoll.c b/src/ev_epoll.c index 602a243d7..679dfee4d 100644 --- a/src/ev_epoll.c +++ b/src/ev_epoll.c @@ -68,8 +68,10 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) for (updt_idx = 0; updt_idx < fd_nbupdt; updt_idx++) { fd = fd_updt[updt_idx]; - if (!fdtab[fd].owner) + if (!fdtab[fd].owner) { + activity[tid].poll_drop++; continue; + } HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock); fdtab[fd].updated = 0; @@ -114,8 +116,10 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) /* compute the epoll_wait() timeout */ if (!exp) wait_time = MAX_DELAY_MS; - else if (tick_is_expired(exp, now_ms)) + else if (tick_is_expired(exp, now_ms)) { + activity[tid].poll_exp++; wait_time = 0; + } else { wait_time = TICKS_TO_MS(tick_remain(now_ms, exp)) + 1; if (wait_time > MAX_DELAY_MS) @@ -136,8 +140,15 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) unsigned int e = epoll_events[count].events; fd = epoll_events[count].data.fd; - if (!fdtab[fd].owner || !(fdtab[fd].thread_mask & tid_bit)) + if (!fdtab[fd].owner) { + activity[tid].poll_dead++; continue; + } + + if (!(fdtab[fd].thread_mask & tid_bit)) { + activity[tid].poll_skip++; + continue; + } /* it looks complicated but gcc can optimize it away when constants * have same values... In fact it depends on gcc :-( diff --git a/src/ev_kqueue.c b/src/ev_kqueue.c index b42ee3dd0..69d51b6b0 100644 --- a/src/ev_kqueue.c +++ b/src/ev_kqueue.c @@ -47,8 +47,10 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) for (updt_idx = 0; updt_idx < fd_nbupdt; updt_idx++) { fd = fd_updt[updt_idx]; - if (!fdtab[fd].owner) + if (!fdtab[fd].owner) { + activity[tid].poll_drop++; continue; + } HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock); fdtab[fd].updated = 0; @@ -106,6 +108,8 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) timeout.tv_sec = (delta_ms / 1000); timeout.tv_nsec = (delta_ms % 1000) * 1000000; } + else + activity[tid].poll_exp++; fd = MIN(maxfd, global.tune.maxpollevents); gettimeofday(&before_poll, NULL); @@ -122,8 +126,15 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) unsigned int n = 0; fd = kev[count].ident; - if (!fdtab[fd].owner || !(fdtab[fd].thread_mask & tid_bit)) + if (!fdtab[fd].owner) { + activity[tid].poll_dead++; continue; + } + + if (!(fdtab[fd].thread_mask & tid_bit)) { + activity[tid].poll_skip++; + continue; + } if (kev[count].filter == EVFILT_READ) { if (kev[count].data) diff --git a/src/ev_poll.c b/src/ev_poll.c index 610509bd6..efd56ee19 100644 --- a/src/ev_poll.c +++ b/src/ev_poll.c @@ -73,8 +73,10 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) for (updt_idx = 0; updt_idx < fd_nbupdt; updt_idx++) { fd = fd_updt[updt_idx]; - if (!fdtab[fd].owner) + if (!fdtab[fd].owner) { + activity[tid].poll_drop++; continue; + } HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock); fdtab[fd].updated = 0; @@ -111,13 +113,21 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) continue; for (count = 0, fd = fds * 8*sizeof(**fd_evts); count < 8*sizeof(**fd_evts) && fd < maxfd; count++, fd++) { - - if (!fdtab[fd].owner || !(fdtab[fd].thread_mask & tid_bit)) - continue; - sr = (rn >> count) & 1; sw = (wn >> count) & 1; if ((sr|sw)) { + if (!fdtab[fd].owner) { + /* should normally not happen here except + * due to rare thread concurrency + */ + continue; + } + + if (!(fdtab[fd].thread_mask & tid_bit)) { + activity[tid].poll_skip++; + continue; + } + poll_events[nbfd].fd = fd; poll_events[nbfd].events = (sr ? (POLLIN | POLLRDHUP) : 0) | (sw ? POLLOUT : 0); nbfd++; @@ -128,8 +138,10 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) /* now let's wait for events */ if (!exp) wait_time = MAX_DELAY_MS; - else if (tick_is_expired(exp, now_ms)) + else if (tick_is_expired(exp, now_ms)) { + activity[tid].poll_exp++; wait_time = 0; + } else { wait_time = TICKS_TO_MS(tick_remain(now_ms, exp)) + 1; if (wait_time > MAX_DELAY_MS) @@ -152,8 +164,10 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) /* ok, we found one active fd */ status--; - if (!fdtab[fd].owner) + if (!fdtab[fd].owner) { + activity[tid].poll_dead++; continue; + } /* it looks complicated but gcc can optimize it away when constants * have same values... In fact it depends on gcc :-( diff --git a/src/ev_select.c b/src/ev_select.c index b2b4e5035..52c445473 100644 --- a/src/ev_select.c +++ b/src/ev_select.c @@ -55,8 +55,10 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) for (updt_idx = 0; updt_idx < fd_nbupdt; updt_idx++) { fd = fd_updt[updt_idx]; - if (!fdtab[fd].owner) + if (!fdtab[fd].owner) { + activity[tid].poll_drop++; continue; + } HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock); fdtab[fd].updated = 0; @@ -117,6 +119,8 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) delta.tv_sec = (delta_ms / 1000); delta.tv_usec = (delta_ms % 1000) * 1000; } + else + activity[tid].poll_exp++; gettimeofday(&before_poll, NULL); status = select(maxfd, @@ -138,11 +142,15 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) for (count = BITS_PER_INT, fd = fds * BITS_PER_INT; count && fd < maxfd; count--, fd++) { unsigned int n = 0; - /* if we specify read first, the accepts and zero reads will be - * seen first. Moreover, system buffers will be flushed faster. - */ - if (!fdtab[fd].owner || !(fdtab[fd].thread_mask & tid_bit)) + if (!fdtab[fd].owner) { + activity[tid].poll_dead++; continue; + } + + if (!(fdtab[fd].thread_mask & tid_bit)) { + activity[tid].poll_skip++; + continue; + } if (FD_ISSET(fd, tmp_evts[DIR_RD])) n |= FD_POLL_IN; diff --git a/src/fd.c b/src/fd.c index 9fb09ab71..148b4d27e 100644 --- a/src/fd.c +++ b/src/fd.c @@ -243,10 +243,14 @@ void fd_process_cached_events() for (entry = 0; entry < fd_cache_num; ) { fd = fd_cache[entry]; - if (!(fdtab[fd].thread_mask & tid_bit)) + if (!(fdtab[fd].thread_mask & tid_bit)) { + activity[tid].fd_skip++; goto next; - if (HA_SPIN_TRYLOCK(FD_LOCK, &fdtab[fd].lock)) + } + if (HA_SPIN_TRYLOCK(FD_LOCK, &fdtab[fd].lock)) { + activity[tid].fd_lock++; goto next; + } HA_RWLOCK_RDUNLOCK(FDCACHE_LOCK, &fdcache_lock); @@ -272,8 +276,10 @@ void fd_process_cached_events() /* If the fd was removed from the cache, it has been * replaced by the next one that we don't want to skip ! */ - if (entry < fd_cache_num && fd_cache[entry] != fd) + if (entry < fd_cache_num && fd_cache[entry] != fd) { + activity[tid].fd_del++; continue; + } next: entry++; } diff --git a/src/haproxy.c b/src/haproxy.c index a8d0fad87..952733ee3 100644 --- a/src/haproxy.c +++ b/src/haproxy.c @@ -165,6 +165,8 @@ struct global global = { /* others NULL OK */ }; +struct activity activity[MAX_THREADS] __attribute__((aligned(64))) = { }; + /*********************************************************************/ int stopping; /* non zero means stopping in progress */ @@ -2371,7 +2373,7 @@ static void sync_poll_loop() /* Runs the polling loop */ static void run_poll_loop() { - int next; + int next, exp; tv_update_date(0,1); while (1) { @@ -2389,18 +2391,27 @@ static void run_poll_loop() break; /* expire immediately if events are pending */ - if (fd_cache_num || (active_tasks_mask & tid_bit) || signal_queue_len || (active_applets_mask & tid_bit)) - next = now_ms; + exp = now_ms; + if (fd_cache_num) + activity[tid].wake_cache++; + else if (active_tasks_mask & tid_bit) + activity[tid].wake_tasks++; + else if (active_applets_mask & tid_bit) + activity[tid].wake_applets++; + else if (signal_queue_len) + activity[tid].wake_signal++; + else + exp = next; /* The poller will ensure it returns around */ - cur_poller.poll(&cur_poller, next); + cur_poller.poll(&cur_poller, exp); fd_process_cached_events(); applet_run_active(); /* Synchronize all polling loops */ sync_poll_loop(); - + activity[tid].loops++; } } diff --git a/src/stats.c b/src/stats.c index 01259282d..61e054981 100644 --- a/src/stats.c +++ b/src/stats.c @@ -3578,6 +3578,8 @@ static int cli_parse_clear_counters(char **args, struct appctx *appctx, void *pr global.ssl_max = 0; global.ssl_fe_keys_max = 0; global.ssl_be_keys_max = 0; + + memset(activity, 0, sizeof(activity)); return 1; } diff --git a/src/stream.c b/src/stream.c index 60d3eff44..ebe41be19 100644 --- a/src/stream.c +++ b/src/stream.c @@ -1627,6 +1627,8 @@ struct task *process_stream(struct task *t) struct channel *req, *res; struct stream_interface *si_f, *si_b; + activity[tid].stream++; + req = &s->req; res = &s->res; diff --git a/src/task.c b/src/task.c index 053376c39..fd9acf66d 100644 --- a/src/task.c +++ b/src/task.c @@ -196,8 +196,10 @@ void process_runnable_tasks() max_processed = 200; if (unlikely(global.nbthread <= 1)) { /* when no lock is needed, this loop is much faster */ - if (!(active_tasks_mask & tid_bit)) + if (!(active_tasks_mask & tid_bit)) { + activity[tid].empty_rq++; return; + } active_tasks_mask &= ~tid_bit; rq_next = eb32sc_lookup_ge(&rqueue, rqueue_ticks - TIMER_LOOK_BACK, tid_bit); @@ -245,6 +247,7 @@ void process_runnable_tasks() max_processed--; if (max_processed <= 0) { active_tasks_mask |= tid_bit; + activity[tid].long_rq++; break; } } @@ -254,6 +257,7 @@ void process_runnable_tasks() HA_SPIN_LOCK(TASK_RQ_LOCK, &rq_lock); if (!(active_tasks_mask & tid_bit)) { HA_SPIN_UNLOCK(TASK_RQ_LOCK, &rq_lock); + activity[tid].empty_rq++; return; } @@ -335,6 +339,7 @@ void process_runnable_tasks() HA_SPIN_LOCK(TASK_RQ_LOCK, &rq_lock); if (max_processed <= 0) { active_tasks_mask |= tid_bit; + activity[tid].long_rq++; break; } } From 5e2865841a7f0e09ac3692f837253186e039d349 Mon Sep 17 00:00:00 2001 From: Christopher Faulet Date: Mon, 15 Jan 2018 11:57:03 +0100 Subject: [PATCH 21/52] MINOR: threads/fd: Use a bitfield to know if there are FDs for a thread in the FD cache A bitfield has been added to know if there are some FDs processable by a specific thread in the FD cache. When a FD is inserted in the FD cache, the bits corresponding to its thread_mask are set. On each thread, the bitfield is updated when the FD cache is processed. If there is no FD processed, the thread is removed from the bitfield by unsetting its tid_bit. Note that this bitfield is updated but not checked in fd_process_cached_events. So, when this function is called, the FDs cache is always processed. [wt: should be backported to 1.8 as it will help fix a design limitation] (cherry picked from commit 69553fe62c5c69753d1862a3e74740a1ff6c4d8d) Signed-off-by: Willy Tarreau --- include/proto/fd.h | 2 ++ src/fd.c | 7 ++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/include/proto/fd.h b/include/proto/fd.h index 8cc191f75..44370e768 100644 --- a/include/proto/fd.h +++ b/include/proto/fd.h @@ -35,6 +35,7 @@ extern unsigned int *fd_cache; // FD events cache extern int fd_cache_num; // number of events in the cache +extern unsigned long fd_cache_mask; // Mask of threads with events in the cache extern THREAD_LOCAL int *fd_updt; // FD updates list extern THREAD_LOCAL int fd_nbupdt; // number of updates in the list @@ -115,6 +116,7 @@ static inline void fd_alloc_cache_entry(const int fd) if (fdtab[fd].cache) goto end; fd_cache_num++; + fd_cache_mask |= fdtab[fd].thread_mask; fdtab[fd].cache = fd_cache_num; fd_cache[fd_cache_num-1] = fd; end: diff --git a/src/fd.c b/src/fd.c index 148b4d27e..8411bcfb9 100644 --- a/src/fd.c +++ b/src/fd.c @@ -170,6 +170,7 @@ int nbpollers = 0; unsigned int *fd_cache = NULL; // FD events cache int fd_cache_num = 0; // number of events in the cache +unsigned long fd_cache_mask = 0; // Mask of threads with events in the cache THREAD_LOCAL int *fd_updt = NULL; // FD updates list THREAD_LOCAL int fd_nbupdt = 0; // number of updates in the list @@ -236,10 +237,8 @@ void fd_process_cached_events() { int fd, entry, e; - if (!fd_cache_num) - return; - HA_RWLOCK_RDLOCK(FDCACHE_LOCK, &fdcache_lock); + fd_cache_mask &= ~tid_bit; for (entry = 0; entry < fd_cache_num; ) { fd = fd_cache[entry]; @@ -247,6 +246,8 @@ void fd_process_cached_events() activity[tid].fd_skip++; goto next; } + + fd_cache_mask |= tid_bit; if (HA_SPIN_TRYLOCK(FD_LOCK, &fdtab[fd].lock)) { activity[tid].fd_lock++; goto next; From 3d4c732a698022b29186ec31cc36c653a1b11428 Mon Sep 17 00:00:00 2001 From: Christopher Faulet Date: Mon, 15 Jan 2018 12:16:34 +0100 Subject: [PATCH 22/52] BUG/MEDIUM: threads/polling: Use fd_cache_mask instead of fd_cache_num fd_cache_num is the number of FDs in the FD cache. It is a global variable. So it is underoptimized because we may be lead to consider there are waiting FDs for the current thread in the FD cache while in fact all FDs are assigned to the other threads. So, in such cases, the polling loop will be evaluated many more times than necessary. Instead, we now check if the thread id is set in the bitfield fd_cache_mask. [wt: it's not exactly a bug, rather a design limitation of the thread which was not addressed in time for the 1.8 release. It can appear more often than we initially predicted, when more threads are running than the number of assigned CPU cores, or when certain threads spend milliseconds computing crypto keys while other threads spin on epoll_wait(0)=0] This patch should be backported to 1.8. (cherry picked from commit 32467fef98f9a4f14be8864bc44b3551f8e34759) Signed-off-by: Willy Tarreau --- src/haproxy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/haproxy.c b/src/haproxy.c index 952733ee3..45ff0fc25 100644 --- a/src/haproxy.c +++ b/src/haproxy.c @@ -2392,7 +2392,7 @@ static void run_poll_loop() /* expire immediately if events are pending */ exp = now_ms; - if (fd_cache_num) + if (fd_cache_mask & tid_bit) activity[tid].wake_cache++; else if (active_tasks_mask & tid_bit) activity[tid].wake_tasks++; From 79ddc8190c1b1e9d6d29d763dca4250ea7257370 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sat, 20 Jan 2018 23:53:50 +0100 Subject: [PATCH 23/52] BUG/MEDIUM: fd: maintain a per-thread update mask Since the fd update tables are per-thread, we need to have a bit per thread to indicate whether an update exists, otherwise this can lead to lost update events every time multiple threads want to update the same FD. In practice *for now*, it only happens at start time when listeners are enabled and ask for polling after facing their first EAGAIN. But since the pollers are still shared, a lost event is still recovered by a neighbor thread. This will not reliably work anymore with per-thread pollers, where it has been observed a few times on startup that a single-threaded listener would not always accept incoming connections upon startup. It's worth noting that during this code review it appeared that the "new" flag in the fdtab isn't used anymore. This fix should be backported to 1.8. (cherry picked from commit ebc78d78a27ac3de7308eeb499c51d638e79ed6b) Signed-off-by: Willy Tarreau --- include/proto/fd.h | 6 +++--- include/types/fd.h | 2 +- src/cli.c | 5 ++--- src/ev_epoll.c | 2 +- src/ev_kqueue.c | 2 +- src/ev_poll.c | 2 +- src/ev_select.c | 2 +- src/fd.c | 2 +- src/stream.c | 4 ++-- 9 files changed, 13 insertions(+), 14 deletions(-) diff --git a/include/proto/fd.h b/include/proto/fd.h index 44370e768..d6b591d19 100644 --- a/include/proto/fd.h +++ b/include/proto/fd.h @@ -99,10 +99,10 @@ void fd_process_cached_events(); */ static inline void updt_fd_polling(const int fd) { - if (fdtab[fd].updated) + if (fdtab[fd].update_mask & tid_bit) /* already scheduled for update */ return; - fdtab[fd].updated = 1; + fdtab[fd].update_mask |= tid_bit; fd_updt[fd_nbupdt++] = fd; } @@ -400,7 +400,7 @@ static inline void fd_insert(int fd, unsigned long thread_mask) HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock); fdtab[fd].ev = 0; fdtab[fd].new = 1; - fdtab[fd].updated = 0; + fdtab[fd].update_mask &= ~tid_bit; fdtab[fd].linger_risk = 0; fdtab[fd].cloned = 0; fdtab[fd].cache = 0; diff --git a/include/types/fd.h b/include/types/fd.h index 032bab967..54192e478 100644 --- a/include/types/fd.h +++ b/include/types/fd.h @@ -94,13 +94,13 @@ enum fd_states { struct fdtab { __decl_hathreads(HA_SPINLOCK_T lock); unsigned long thread_mask; /* mask of thread IDs authorized to process the task */ + unsigned long update_mask; /* mask of thread IDs having an update for fd */ void (*iocb)(int fd); /* I/O handler */ void *owner; /* the connection or listener associated with this fd, NULL if closed */ unsigned int cache; /* position+1 in the FD cache. 0=not in cache. */ unsigned char state; /* FD state for read and write directions (2*3 bits) */ unsigned char ev; /* event seen in return of poll() : FD_POLL_* */ unsigned char new:1; /* 1 if this fd has just been created */ - unsigned char updated:1; /* 1 if this fd is already in the update list */ unsigned char linger_risk:1; /* 1 if we must kill lingering before closing */ unsigned char cloned:1; /* 1 if a cloned socket, requires EPOLL_CTL_DEL on close */ }; diff --git a/src/cli.c b/src/cli.c index 3e62c311b..d5c615bb1 100644 --- a/src/cli.c +++ b/src/cli.c @@ -794,7 +794,7 @@ static int cli_io_handler_show_fd(struct appctx *appctx) li = fdt.owner; chunk_printf(&trash, - " %5d : st=0x%02x(R:%c%c%c W:%c%c%c) ev=0x%02x(%c%c%c%c%c) [%c%c%c%c] cache=%u owner=%p iocb=%p(%s) tmask=0x%lx", + " %5d : st=0x%02x(R:%c%c%c W:%c%c%c) ev=0x%02x(%c%c%c%c%c) [%c%c%c] cache=%u owner=%p iocb=%p(%s) tmask=0x%lx umask=0x%lx", fd, fdt.state, (fdt.state & FD_EV_POLLED_R) ? 'P' : 'p', @@ -810,7 +810,6 @@ static int cli_io_handler_show_fd(struct appctx *appctx) (fdt.ev & FD_POLL_PRI) ? 'P' : 'p', (fdt.ev & FD_POLL_IN) ? 'I' : 'i', fdt.new ? 'N' : 'n', - fdt.updated ? 'U' : 'u', fdt.linger_risk ? 'L' : 'l', fdt.cloned ? 'C' : 'c', fdt.cache, @@ -820,7 +819,7 @@ static int cli_io_handler_show_fd(struct appctx *appctx) (fdt.iocb == dgram_fd_handler) ? "dgram_fd_handler" : (fdt.iocb == listener_accept) ? "listener_accept" : "unknown", - fdt.thread_mask); + fdt.thread_mask, fdt.update_mask); if (fdt.iocb == conn_fd_handler) { chunk_appendf(&trash, " cflg=0x%08x", conn_flags); diff --git a/src/ev_epoll.c b/src/ev_epoll.c index 679dfee4d..f37455faf 100644 --- a/src/ev_epoll.c +++ b/src/ev_epoll.c @@ -74,7 +74,7 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) } HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock); - fdtab[fd].updated = 0; + fdtab[fd].update_mask &= ~tid_bit; fdtab[fd].new = 0; eo = fdtab[fd].state; diff --git a/src/ev_kqueue.c b/src/ev_kqueue.c index 69d51b6b0..20fa29084 100644 --- a/src/ev_kqueue.c +++ b/src/ev_kqueue.c @@ -53,7 +53,7 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) } HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock); - fdtab[fd].updated = 0; + fdtab[fd].update_mask &= ~tid_bit; fdtab[fd].new = 0; eo = fdtab[fd].state; diff --git a/src/ev_poll.c b/src/ev_poll.c index efd56ee19..f24bf69a9 100644 --- a/src/ev_poll.c +++ b/src/ev_poll.c @@ -79,7 +79,7 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) } HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock); - fdtab[fd].updated = 0; + fdtab[fd].update_mask &= ~tid_bit; fdtab[fd].new = 0; eo = fdtab[fd].state; diff --git a/src/ev_select.c b/src/ev_select.c index 52c445473..19b13805c 100644 --- a/src/ev_select.c +++ b/src/ev_select.c @@ -61,7 +61,7 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) } HA_SPIN_LOCK(FD_LOCK, &fdtab[fd].lock); - fdtab[fd].updated = 0; + fdtab[fd].update_mask &= ~tid_bit; fdtab[fd].new = 0; eo = fdtab[fd].state; diff --git a/src/fd.c b/src/fd.c index 8411bcfb9..112806bbb 100644 --- a/src/fd.c +++ b/src/fd.c @@ -199,7 +199,7 @@ static void fd_dodelete(int fd, int do_close) port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port); fdinfo[fd].port_range = NULL; fdtab[fd].owner = NULL; - fdtab[fd].updated = 0; + fdtab[fd].update_mask &= ~tid_bit; fdtab[fd].new = 0; fdtab[fd].thread_mask = 0; if (do_close) diff --git a/src/stream.c b/src/stream.c index ebe41be19..92f9c0a64 100644 --- a/src/stream.c +++ b/src/stream.c @@ -2906,7 +2906,7 @@ static int stats_dump_full_strm_to_buffer(struct stream_interface *si, struct st conn->handle.fd, conn->handle.fd >= 0 ? fdtab[conn->handle.fd].state : 0, conn->handle.fd >= 0 ? fdtab[conn->handle.fd].cache : 0, - conn->handle.fd >= 0 ? fdtab[conn->handle.fd].updated : 0, + conn->handle.fd >= 0 ? !!(fdtab[conn->handle.fd].update_mask & tid_bit) : 0, conn->handle.fd >= 0 ? fdtab[conn->handle.fd].thread_mask: 0); } else if ((tmpctx = objt_appctx(strm->si[0].end)) != NULL) { @@ -2939,7 +2939,7 @@ static int stats_dump_full_strm_to_buffer(struct stream_interface *si, struct st conn->handle.fd, conn->handle.fd >= 0 ? fdtab[conn->handle.fd].state : 0, conn->handle.fd >= 0 ? fdtab[conn->handle.fd].cache : 0, - conn->handle.fd >= 0 ? fdtab[conn->handle.fd].updated : 0, + conn->handle.fd >= 0 ? !!(fdtab[conn->handle.fd].update_mask & tid_bit) : 0, conn->handle.fd >= 0 ? fdtab[conn->handle.fd].thread_mask: 0); } else if ((tmpctx = objt_appctx(strm->si[1].end)) != NULL) { From 8560c73764b0ce3249c3098d6a4808fa0659e283 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Wed, 17 Jan 2018 18:44:46 +0100 Subject: [PATCH 24/52] MINOR: fd: add a bitmask to indicate that an FD is known by the poller Some pollers like epoll() need to know if the fd is already known or not in order to compute the operation to perform (add, mod, del). For now this is performed based on the difference between the previous FD state and the new state but this will not be usable anymore once threads become responsible for their own polling. Here we come with a different approach : a bitmask is stored with the fd to indicate which pollers already know it, and the pollers will be able to simply perform the add/mod/del operations based on this bit combined with the new state. This patch only adds the bitmask declaration and initialization, it is it not yet used. It will be needed by the next two fixes and will need to be backported to 1.8. (cherry picked from commit c9c8378c2b7aed45a9b4733d81f69338dad7614a) Signed-off-by: Willy Tarreau --- include/proto/fd.h | 3 +++ include/types/fd.h | 1 + src/fd.c | 4 +++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/proto/fd.h b/include/proto/fd.h index d6b591d19..cc559ac32 100644 --- a/include/proto/fd.h +++ b/include/proto/fd.h @@ -405,6 +405,9 @@ static inline void fd_insert(int fd, unsigned long thread_mask) fdtab[fd].cloned = 0; fdtab[fd].cache = 0; fdtab[fd].thread_mask = thread_mask; + /* note: do not reset polled_mask here as it indicates which poller + * still knows this FD from a possible previous round. + */ HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); HA_SPIN_LOCK(FDTAB_LOCK, &fdtab_lock); diff --git a/include/types/fd.h b/include/types/fd.h index 54192e478..9f2c5feea 100644 --- a/include/types/fd.h +++ b/include/types/fd.h @@ -94,6 +94,7 @@ enum fd_states { struct fdtab { __decl_hathreads(HA_SPINLOCK_T lock); unsigned long thread_mask; /* mask of thread IDs authorized to process the task */ + unsigned long polled_mask; /* mask of thread IDs currently polling this fd */ unsigned long update_mask; /* mask of thread IDs having an update for fd */ void (*iocb)(int fd); /* I/O handler */ void *owner; /* the connection or listener associated with this fd, NULL if closed */ diff --git a/src/fd.c b/src/fd.c index 112806bbb..b64130ed0 100644 --- a/src/fd.c +++ b/src/fd.c @@ -202,8 +202,10 @@ static void fd_dodelete(int fd, int do_close) fdtab[fd].update_mask &= ~tid_bit; fdtab[fd].new = 0; fdtab[fd].thread_mask = 0; - if (do_close) + if (do_close) { + fdtab[fd].polled_mask = 0; close(fd); + } HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); HA_SPIN_LOCK(FDTAB_LOCK, &fdtab_lock); From fadef1c842f7810df41fd6d56bfad50fb078697e Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Thu, 18 Jan 2018 19:16:02 +0100 Subject: [PATCH 25/52] BUG/MEDIUM: epoll/threads: use one epoll_fd per thread There currently is a problem regarding epoll(). While select() and poll() compute their polling state on the fly upon each call, epoll() keeps a shared state between all threads via the epoll_fd. The problem is that once an fd is registered on *any* thread, all other threads receive events for that FD as well. It is clearly visible when binding a listener to a single thread like in the configuration below where all 4 threads will work, 3 of them simply spinning to skip the event : global nbthread 4 frontend foo bind :1234 process 1/1 The worst case happens when some slow operations are in progress on a busy thread, preventing it from processing its task and causing the other ones to wake up not being able to do anything with this event. Typically computing a large TLS key will delay processing of next events on the same thread while others will still wake up. All this simply shows that the poller must remain thread-specific, with its own events and its own ability to sleep when it doesn't have anyhing to do. This patch does exactly this. For this, it proceeds like this : - have one epoll_fd per thread instead of one per process - initialize these epoll_fd when threads are created. - mark all known FDs as updated so that the next invocation of _do_poll() recomputes their polling status (including a possible removal of undesired polling from the original FD) ; - use each fd's polled_mask to maintain an accurate status of the current polling activity for this FD. - when scanning updates, only focus on events whose new polling status differs from the existing one - during updates, always verify the thread_mask to resist migration - on __fd_clo(), for cloned FDs (typically listeners inherited from the parent during a graceful shutdown), run epoll_ctl(DEL) on all epoll_fd. This is the reason why epoll_fd is stored in a shared array and not in a thread_local storage. Note: maybe this can be moved to an update instead. Interestingly, this shows that we don't need the FD's old state anymore and that we only use it to convert it to the new state based on stable information. It appears clearly that the FD code can be further improved by computing the final state directly when manipulating it. With this change, the config above goes from 22000 cps at 380% CPU to 43000 cps at 100% CPU : not only the 3 unused threads are not activated, but they do not disturb the activity anymore. The output of "show activity" before and after the patch on a 4-thread config where a first listener on thread 2 forwards over SSL to threads 3 & 4 shows this a much smaller amount of undesired events (thread 1 doesn't wake up anymore, poll_skip remains zero, fd_skip stays low) : // before: 400% CPU, 7700 cps, 13 seconds loops: 11380717 65879 5733468 5728129 wake_cache: 0 63986 317547 314174 wake_tasks: 0 0 0 0 wake_applets: 0 0 0 0 wake_signal: 0 0 0 0 poll_exp: 0 63986 317547 314174 poll_drop: 1 0 49981 48893 poll_dead: 65514 0 31334 31934 poll_skip: 46293690 34071 22867786 22858208 fd_skip: 66068135 174157 33732685 33825727 fd_lock: 0 2 2809 2905 fd_del: 0 494361 80890 79464 conn_dead: 0 0 0 0 stream: 0 407747 50526 49474 empty_rq: 11380718 1914 5683023 5678715 long_rq: 0 0 0 0 // after: 200% cpu, 9450 cps, 11 seconds loops: 17 66147 1001631 450968 wake_cache: 0 66119 865139 321227 wake_tasks: 0 0 0 0 wake_applets: 0 0 0 0 wake_signal: 0 0 0 0 poll_exp: 0 66119 865139 321227 poll_drop: 6 5 38279 60768 poll_dead: 0 0 0 0 poll_skip: 0 0 0 0 fd_skip: 54 172661 4411407 2008198 fd_lock: 0 0 10890 5394 fd_del: 0 492829 58965 105091 conn_dead: 0 0 0 0 stream: 0 406223 38663 61338 empty_rq: 18 40 962999 390549 long_rq: 0 0 0 0 This patch presents a few risks but fixes a real problem with threads, and as such it needs be backported to 1.8. It depends on previous patch ("MINOR: fd: add a bitmask to indicate that an FD is known by the poller"). Special thanks go to Samuel Reed for providing a large amount of useful debugging information and for testing fixes. (cherry picked from commit d9e7e36c6e5c0f9988a758d81fbe0a8fa8413922) Signed-off-by: Willy Tarreau --- src/ev_epoll.c | 108 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 73 insertions(+), 35 deletions(-) diff --git a/src/ev_epoll.c b/src/ev_epoll.c index f37455faf..635b8a5fe 100644 --- a/src/ev_epoll.c +++ b/src/ev_epoll.c @@ -29,7 +29,7 @@ /* private data */ static THREAD_LOCAL struct epoll_event *epoll_events = NULL; -static int epoll_fd; +static int epoll_fd[MAX_THREADS]; // per-thread epoll_fd /* This structure may be used for any purpose. Warning! do not use it in * recursive functions ! @@ -49,8 +49,14 @@ static THREAD_LOCAL struct epoll_event ev; */ REGPRM1 static void __fd_clo(int fd) { - if (unlikely(fdtab[fd].cloned)) - epoll_ctl(epoll_fd, EPOLL_CTL_DEL, fd, &ev); + if (unlikely(fdtab[fd].cloned)) { + unsigned long m = fdtab[fd].thread_mask; + int i; + + for (i = global.nbthread - 1; i >= 0; i--) + if (m & (1UL << i)) + epoll_ctl(epoll_fd[i], EPOLL_CTL_DEL, fd, &ev); + } } /* @@ -82,34 +88,36 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) fdtab[fd].state = en; HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); - if ((eo ^ en) & FD_EV_POLLED_RW) { - /* poll status changed */ - - if ((en & FD_EV_POLLED_RW) == 0) { + if (fdtab[fd].polled_mask & tid_bit) { + if (!(fdtab[fd].thread_mask & tid_bit) || !(en & FD_EV_POLLED_RW)) { /* fd removed from poll list */ opcode = EPOLL_CTL_DEL; - } - else if ((eo & FD_EV_POLLED_RW) == 0) { - /* new fd in the poll list */ - opcode = EPOLL_CTL_ADD; + HA_ATOMIC_AND(&fdtab[fd].polled_mask, ~tid_bit); } else { /* fd status changed */ opcode = EPOLL_CTL_MOD; } - - /* construct the epoll events based on new state */ - ev.events = 0; - if (en & FD_EV_POLLED_R) - ev.events |= EPOLLIN | EPOLLRDHUP; - - if (en & FD_EV_POLLED_W) - ev.events |= EPOLLOUT; - - ev.data.fd = fd; - - epoll_ctl(epoll_fd, opcode, fd, &ev); } + else if ((fdtab[fd].thread_mask & tid_bit) && (en & FD_EV_POLLED_RW)) { + /* new fd in the poll list */ + opcode = EPOLL_CTL_ADD; + HA_ATOMIC_OR(&fdtab[fd].polled_mask, tid_bit); + } + else { + continue; + } + + /* construct the epoll events based on new state */ + ev.events = 0; + if (en & FD_EV_POLLED_R) + ev.events |= EPOLLIN | EPOLLRDHUP; + + if (en & FD_EV_POLLED_W) + ev.events |= EPOLLOUT; + + ev.data.fd = fd; + epoll_ctl(epoll_fd[tid], opcode, fd, &ev); } fd_nbupdt = 0; @@ -129,7 +137,7 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) /* now let's wait for polled events */ gettimeofday(&before_poll, NULL); - status = epoll_wait(epoll_fd, epoll_events, global.tune.maxpollevents, wait_time); + status = epoll_wait(epoll_fd[tid], epoll_events, global.tune.maxpollevents, wait_time); tv_update_date(wait_time, status); measure_idle(); @@ -146,7 +154,10 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) } if (!(fdtab[fd].thread_mask & tid_bit)) { + /* FD has been migrated */ activity[tid].poll_skip++; + epoll_ctl(epoll_fd[tid], EPOLL_CTL_DEL, fd, &ev); + HA_ATOMIC_AND(&fdtab[fd].polled_mask, ~tid_bit); continue; } @@ -178,14 +189,38 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) static int init_epoll_per_thread() { + int fd; + epoll_events = calloc(1, sizeof(struct epoll_event) * global.tune.maxpollevents); if (epoll_events == NULL) - return 0; + goto fail_alloc; + + if (tid) { + epoll_fd[tid] = epoll_create(global.maxsock + 1); + if (epoll_fd[tid] < 0) + goto fail_fd; + } + + /* we may have to unregister some events initially registered on the + * original fd when it was alone, and/or to register events on the new + * fd for this thread. Let's just mark them as updated, the poller will + * do the rest. + */ + for (fd = 0; fd < maxfd; fd++) + updt_fd_polling(fd); + return 1; + fail_fd: + free(epoll_events); + fail_alloc: + return 0; } static void deinit_epoll_per_thread() { + if (tid) + close(epoll_fd[tid]); + free(epoll_events); epoll_events = NULL; } @@ -199,8 +234,8 @@ REGPRM1 static int _do_init(struct poller *p) { p->private = NULL; - epoll_fd = epoll_create(global.maxsock + 1); - if (epoll_fd < 0) + epoll_fd[tid] = epoll_create(global.maxsock + 1); + if (epoll_fd[tid] < 0) goto fail_fd; hap_register_per_thread_init(init_epoll_per_thread); @@ -219,9 +254,9 @@ REGPRM1 static int _do_init(struct poller *p) */ REGPRM1 static void _do_term(struct poller *p) { - if (epoll_fd >= 0) { - close(epoll_fd); - epoll_fd = -1; + if (epoll_fd[tid] >= 0) { + close(epoll_fd[tid]); + epoll_fd[tid] = -1; } p->private = NULL; @@ -251,10 +286,10 @@ REGPRM1 static int _do_test(struct poller *p) */ REGPRM1 static int _do_fork(struct poller *p) { - if (epoll_fd >= 0) - close(epoll_fd); - epoll_fd = epoll_create(global.maxsock + 1); - if (epoll_fd < 0) + if (epoll_fd[tid] >= 0) + close(epoll_fd[tid]); + epoll_fd[tid] = epoll_create(global.maxsock + 1); + if (epoll_fd[tid] < 0) return 0; return 1; } @@ -268,11 +303,14 @@ __attribute__((constructor)) static void _do_register(void) { struct poller *p; + int i; if (nbpollers >= MAX_POLLERS) return; - epoll_fd = -1; + for (i = 0; i < MAX_THREADS; i++) + epoll_fd[i] = -1; + p = &pollers[nbpollers++]; p->name = "epoll"; From f839593dd26ec210ba66d74b2a4c2040dd1ed806 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Fri, 19 Jan 2018 08:56:14 +0100 Subject: [PATCH 26/52] BUG/MEDIUM: kqueue/threads: use one kqueue_fd per thread This is the same principle as the previous patch (BUG/MEDIUM: epoll/threads: use one epoll_fd per thread) except that this time it's for kqueue. We don't want all threads to wake up because of activity on a single other thread that the other ones are not interested in. Just like with previous patch, this one shows that the polling state doesn't need to be changed here and that some simplifications are now possible. This patch only implements the minimum required for a stable backport. This should be backported to 1.8. (cherry picked from commit 7a2364d4741eed7d435019690b0f0d12878939b0) Signed-off-by: Willy Tarreau --- src/ev_kqueue.c | 93 ++++++++++++++++++++++++++++++------------------- 1 file changed, 58 insertions(+), 35 deletions(-) diff --git a/src/ev_kqueue.c b/src/ev_kqueue.c index 20fa29084..532d49dff 100644 --- a/src/ev_kqueue.c +++ b/src/ev_kqueue.c @@ -29,7 +29,7 @@ /* private data */ -static int kqueue_fd; +static int kqueue_fd[MAX_THREADS]; // per-thread kqueue_fd static THREAD_LOCAL struct kevent *kev = NULL; /* @@ -61,35 +61,34 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) fdtab[fd].state = en; HA_SPIN_UNLOCK(FD_LOCK, &fdtab[fd].lock); - if ((eo ^ en) & FD_EV_POLLED_RW) { - /* poll status changed */ - if ((eo ^ en) & FD_EV_POLLED_R) { - /* read poll status changed */ - if (en & FD_EV_POLLED_R) { - EV_SET(&kev[changes], fd, EVFILT_READ, EV_ADD, 0, 0, NULL); - changes++; - } - else { - EV_SET(&kev[changes], fd, EVFILT_READ, EV_DELETE, 0, 0, NULL); - changes++; - } + if (!(fdtab[fd].thread_mask & tid_bit) || !(en & FD_EV_POLLED_RW)) { + if (!(fdtab[fd].polled_mask & tid_bit)) { + /* fd was not watched, it's still not */ + continue; } + /* fd totally removed from poll list */ + EV_SET(&kev[changes++], fd, EVFILT_READ, EV_DELETE, 0, 0, NULL); + EV_SET(&kev[changes++], fd, EVFILT_WRITE, EV_DELETE, 0, 0, NULL); + HA_ATOMIC_AND(&fdtab[fd].polled_mask, ~tid_bit); + } + else { + /* OK fd has to be monitored, it was either added or changed */ - if ((eo ^ en) & FD_EV_POLLED_W) { - /* write poll status changed */ - if (en & FD_EV_POLLED_W) { - EV_SET(&kev[changes], fd, EVFILT_WRITE, EV_ADD, 0, 0, NULL); - changes++; - } - else { - EV_SET(&kev[changes], fd, EVFILT_WRITE, EV_DELETE, 0, 0, NULL); - changes++; - } - } + if (en & FD_EV_POLLED_R) + EV_SET(&kev[changes++], fd, EVFILT_READ, EV_ADD, 0, 0, NULL); + else if (fdtab[fd].polled_mask & tid_bit) + EV_SET(&kev[changes++], fd, EVFILT_READ, EV_DELETE, 0, 0, NULL); + + if (en & FD_EV_POLLED_W) + EV_SET(&kev[changes++], fd, EVFILT_WRITE, EV_ADD, 0, 0, NULL); + else if (fdtab[fd].polled_mask & tid_bit) + EV_SET(&kev[changes++], fd, EVFILT_WRITE, EV_DELETE, 0, 0, NULL); + + HA_ATOMIC_OR(&fdtab[fd].polled_mask, tid_bit); } } if (changes) - kevent(kqueue_fd, kev, changes, NULL, 0, NULL); + kevent(kqueue_fd[tid], kev, changes, NULL, 0, NULL); fd_nbupdt = 0; delta_ms = 0; @@ -113,7 +112,7 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) fd = MIN(maxfd, global.tune.maxpollevents); gettimeofday(&before_poll, NULL); - status = kevent(kqueue_fd, // int kq + status = kevent(kqueue_fd[tid], // int kq NULL, // const struct kevent *changelist 0, // int nchanges kev, // struct kevent *eventlist @@ -155,11 +154,32 @@ REGPRM2 static void _do_poll(struct poller *p, int exp) static int init_kqueue_per_thread() { + int fd; + /* we can have up to two events per fd (*/ kev = calloc(1, sizeof(struct kevent) * 2 * global.maxsock); if (kev == NULL) - return 0; + goto fail_alloc; + + if (tid) { + kqueue_fd[tid] = kqueue(); + if (kqueue_fd[tid] < 0) + goto fail_fd; + } + + /* we may have to unregister some events initially registered on the + * original fd when it was alone, and/or to register events on the new + * fd for this thread. Let's just mark them as updated, the poller will + * do the rest. + */ + for (fd = 0; fd < maxfd; fd++) + updt_fd_polling(fd); + return 1; + fail_fd: + free(kev); + fail_alloc: + return 0; } static void deinit_kqueue_per_thread() @@ -177,8 +197,8 @@ REGPRM1 static int _do_init(struct poller *p) { p->private = NULL; - kqueue_fd = kqueue(); - if (kqueue_fd < 0) + kqueue_fd[tid] = kqueue(); + if (kqueue_fd[tid] < 0) goto fail_fd; hap_register_per_thread_init(init_kqueue_per_thread); @@ -196,9 +216,9 @@ REGPRM1 static int _do_init(struct poller *p) */ REGPRM1 static void _do_term(struct poller *p) { - if (kqueue_fd >= 0) { - close(kqueue_fd); - kqueue_fd = -1; + if (kqueue_fd[tid] >= 0) { + close(kqueue_fd[tid]); + kqueue_fd[tid] = -1; } p->private = NULL; @@ -227,8 +247,8 @@ REGPRM1 static int _do_test(struct poller *p) */ REGPRM1 static int _do_fork(struct poller *p) { - kqueue_fd = kqueue(); - if (kqueue_fd < 0) + kqueue_fd[tid] = kqueue(); + if (kqueue_fd[tid] < 0) return 0; return 1; } @@ -242,11 +262,14 @@ __attribute__((constructor)) static void _do_register(void) { struct poller *p; + int i; if (nbpollers >= MAX_POLLERS) return; - kqueue_fd = -1; + for (i = 0; i < MAX_THREADS; i++) + kqueue_fd[i] = -1; + p = &pollers[nbpollers++]; p->name = "kqueue"; From a91f5578f2d8690f8ef0d956a65a248e845a4a86 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 23 Jan 2018 19:01:49 +0100 Subject: [PATCH 27/52] BUG/MEDIUM: threads/mworker: fix a race on startup Marc Fournier reported an interesting case when using threads with the master-worker mode : sometimes, a listener would have its FD closed during startup. Sometimes it could even be health checks seeing this. What happens is that after the threads are created, and the pollers enabled on each threads, the master-worker pipe is registered, and at the same time a close() is performed on the write side of this pipe since the children must not use it. But since this is replicated in every thread, what happens is that the first thread closes the pipe, thus releases the FD, and the next thread starting a listener in parallel gets this FD reassigned. Then another thread closes the FD again, which this time corresponds to the listener. It can also happen with the health check sockets if they're started early enough. This patch splits the mworker_pipe_register() function in two, so that the close() of the write side of the FD is performed very early after the fork() and long before threads are created (we don't need to delay it anyway). Only the pipe registration is done in the threaded code since it is important that the pollers are properly allocated for this. The mworker_pipe_register() function now takes care of registering the pipe only once, and this is guaranteed by a new surrounding lock. The call to protocol_enable_all() looks fragile in theory since it scans the list of proxies and their listeners, though in practice all threads scan the same list and take the same locks for each listener so it's not possible that any of them escapes the process and finishes before all listeners are started. And the operation is idempotent. This fix must be backported to 1.8. Thanks to Marc for providing very detailed traces clearly showing the problem. (cherry picked from commit 1605c7ae6154d8c2cfcf3b325872b1a7266c5bc2) Signed-off-by: Willy Tarreau --- include/common/hathreads.h | 1 + src/haproxy.c | 19 +++++++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/include/common/hathreads.h b/include/common/hathreads.h index f81953686..cfcb48e9b 100644 --- a/include/common/hathreads.h +++ b/include/common/hathreads.h @@ -237,6 +237,7 @@ enum lock_label { PID_LIST_LOCK, EMAIL_ALERTS_LOCK, PIPES_LOCK, + START_LOCK, LOCK_LABELS }; struct lock_stat { diff --git a/src/haproxy.c b/src/haproxy.c index 45ff0fc25..89044c6a5 100644 --- a/src/haproxy.c +++ b/src/haproxy.c @@ -2339,9 +2339,12 @@ void mworker_pipe_handler(int fd) return; } -void mworker_pipe_register(int pipefd[2]) +/* should only be called once per process */ +void mworker_pipe_register() { - close(mworker_pipe[1]); /* close the write end of the master pipe in the children */ + if (fdtab[mworker_pipe[0]].owner) + /* already initialized */ + return; fcntl(mworker_pipe[0], F_SETFL, O_NONBLOCK); fdtab[mworker_pipe[0]].owner = mworker_pipe; @@ -2419,6 +2422,7 @@ static void *run_thread_poll_loop(void *data) { struct per_thread_init_fct *ptif; struct per_thread_deinit_fct *ptdf; + static __maybe_unused HA_SPINLOCK_T start_lock; tid = *((unsigned int *)data); tid_bit = (1UL << tid); @@ -2431,8 +2435,11 @@ static void *run_thread_poll_loop(void *data) } } - if (global.mode & MODE_MWORKER) - mworker_pipe_register(mworker_pipe); + if (global.mode & MODE_MWORKER) { + HA_SPIN_LOCK(START_LOCK, &start_lock); + mworker_pipe_register(); + HA_SPIN_UNLOCK(START_LOCK, &start_lock); + } protocol_enable_all(); THREAD_SYNC_ENABLE(); @@ -2861,6 +2868,10 @@ int main(int argc, char **argv) /* child must never use the atexit function */ atexit_flag = 0; + /* close the write end of the master pipe in the children */ + if (global.mode & MODE_MWORKER) + close(mworker_pipe[1]); + if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)) { devnullfd = open("/dev/null", O_RDWR, 0); if (devnullfd < 0) { From 945f4cf086b9029d892712c4bd382de6a02fb1e1 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 23 Jan 2018 19:20:19 +0100 Subject: [PATCH 28/52] BUG/MINOR: mworker: only write to pidfile if it exists A missing test causes a write(-1, $PID) to appear in strace output when in master-worker mode. This is totally harmless though. This fix must be backported to 1.8. (cherry picked from commit 46ec48bc1ad81cdf10aacfd5d7b2911c0ef2b0df) Signed-off-by: Willy Tarreau --- src/haproxy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/haproxy.c b/src/haproxy.c index 89044c6a5..8e0e30d8a 100644 --- a/src/haproxy.c +++ b/src/haproxy.c @@ -2786,7 +2786,8 @@ int main(int argc, char **argv) if (global.mode & MODE_MWORKER) { char pidstr[100]; snprintf(pidstr, sizeof(pidstr), "%d\n", getpid()); - shut_your_big_mouth_gcc(write(pidfd, pidstr, strlen(pidstr))); + if (pidfd >= 0) + shut_your_big_mouth_gcc(write(pidfd, pidstr, strlen(pidstr))); } /* the father launches the required number of processes */ From bbedf00dbd0e4dfc002fb8007b1ca769006a91bb Mon Sep 17 00:00:00 2001 From: Olivier Houchard Date: Wed, 24 Jan 2018 15:41:04 +0100 Subject: [PATCH 29/52] MINOR: threads: Fix build when we're not compiling with threads. Only declare the start_lock if threads are compiled in, otherwise HA_SPINLOCK_T won't be defined. This should be backported to 1.8 when/if 1605c7ae6154d8c2cfcf3b325872b1a7266c5bc2 is backported. (cherry picked from commit 0048dd04c9a6e9b9c7def8e7060793290e0c36ff) Signed-off-by: Willy Tarreau --- src/haproxy.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/haproxy.c b/src/haproxy.c index 8e0e30d8a..8a4e8a563 100644 --- a/src/haproxy.c +++ b/src/haproxy.c @@ -2422,7 +2422,9 @@ static void *run_thread_poll_loop(void *data) { struct per_thread_init_fct *ptif; struct per_thread_deinit_fct *ptdf; - static __maybe_unused HA_SPINLOCK_T start_lock; +#ifdef USE_THREAD + static HA_SPINLOCK_T start_lock; +#endif tid = *((unsigned int *)data); tid_bit = (1UL << tid); From 2c2cb94b059954208d3645768cf5ed723668934a Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Thu, 25 Jan 2018 07:28:37 +0100 Subject: [PATCH 30/52] BUG/MINOR: threads: always set an owner to the thread_sync pipe The owner of the fd used by the synchronization pipe was set to NULL, making it ignored by maxfd computation. The risk would be that some synchronization events get delayed between threads when using poll() or select(). However this is only theorical since the pipe is created before listeners are bound so normally its FD should be lower and this should normally not happen. The only possible situation would be if all listeners are bound to inherited FDs which are lower than the pipe's. This patch must be backported to 1.8. (cherry picked from commit c20d73733871ee820fe87880e076a00cbad1b7d6) Signed-off-by: Willy Tarreau --- src/hathreads.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hathreads.c b/src/hathreads.c index 50f3c7701..fea4ffeb0 100644 --- a/src/hathreads.c +++ b/src/hathreads.c @@ -49,7 +49,7 @@ int thread_sync_init(unsigned long mask) rfd = threads_sync_pipe[0]; fcntl(rfd, F_SETFL, O_NONBLOCK); - fdtab[rfd].owner = NULL; + fdtab[rfd].owner = thread_sync_io_handler; fdtab[rfd].iocb = thread_sync_io_handler; fd_insert(rfd, MAX_THREADS_MASK); From 757dd8c7407c94dd95272ba0bc92c18d0f106867 Mon Sep 17 00:00:00 2001 From: Christopher Faulet Date: Wed, 24 Jan 2018 21:49:41 +0100 Subject: [PATCH 31/52] BUG/MEDIUM: threads/server: Fix deadlock in srv_set_stopping/srv_set_admin_flag Because of a typo (HA_SPIN_LOCK instead of HA_SPIN_UNLOCK), there is a deadlock in srv_set_stopping and srv_set_admin_flag when there is at least one trackers. This patch must be backported in 1.8. (cherry picked from commit 8d01fd6b3caf7fd97a21aa24bb946b12484ce1a3) Signed-off-by: Willy Tarreau --- src/server.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/server.c b/src/server.c index 3901e7d8b..07a6603a3 100644 --- a/src/server.c +++ b/src/server.c @@ -976,7 +976,7 @@ void srv_set_stopping(struct server *s, const char *reason, struct check *check) for (srv = s->trackers; srv; srv = srv->tracknext) { HA_SPIN_LOCK(SERVER_LOCK, &srv->lock); srv_set_stopping(srv, NULL, NULL); - HA_SPIN_LOCK(SERVER_LOCK, &srv->lock); + HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock); } } @@ -1019,7 +1019,7 @@ void srv_set_admin_flag(struct server *s, enum srv_admin mode, const char *cause for (srv = s->trackers; srv; srv = srv->tracknext) { HA_SPIN_LOCK(SERVER_LOCK, &srv->lock); srv_set_admin_flag(srv, mode, cause); - HA_SPIN_LOCK(SERVER_LOCK, &srv->lock); + HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock); } } From 616078521470b5c49325fdb9af8c345bedb770b2 Mon Sep 17 00:00:00 2001 From: Christopher Faulet Date: Thu, 25 Jan 2018 11:36:35 +0100 Subject: [PATCH 32/52] BUG/MEDIUM: checks: Don't try to release undefined conn_stream when a check is freed When a healt-check is released, the attached conn_stream may be undefined. For instance, this happens when 'no-check' option is used on a server line. So we must check it is defined before trying to release it. This patch must be backported in 1.8. (cherry picked from commit 23d86d157edd6da8d3fe40d7883a3d2beeed3189) Signed-off-by: Willy Tarreau --- src/checks.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/checks.c b/src/checks.c index 56c9d609d..0d4893e69 100644 --- a/src/checks.c +++ b/src/checks.c @@ -3098,10 +3098,12 @@ void free_check(struct check *check) check->bi = NULL; free(check->bo); check->bo = NULL; - free(check->cs->conn); - check->cs->conn = NULL; - cs_free(check->cs); - check->cs = NULL; + if (check->cs) { + free(check->cs->conn); + check->cs->conn = NULL; + cs_free(check->cs); + check->cs = NULL; + } } void email_alert_free(struct email_alert *alert) From b763e589b5f2a809b11e87a5598cc1008158773e Mon Sep 17 00:00:00 2001 From: Christopher Faulet Date: Thu, 25 Jan 2018 16:32:18 +0100 Subject: [PATCH 33/52] BUG/MINOR: kqueue/threads: Don't forget to close kqueue_fd[tid] on each thread in deinit_kqueue_per_thread, kqueue_fd[tid] must be closed, except for the main thread (the first one, tid==0). This patch must be backported in 1.8 with commit 7a2364d4. (cherry picked from commit 13b007d583924d19d692268c2f51a2fc2b46fd0c) Signed-off-by: Willy Tarreau --- src/ev_kqueue.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/ev_kqueue.c b/src/ev_kqueue.c index 532d49dff..d58bc4ab4 100644 --- a/src/ev_kqueue.c +++ b/src/ev_kqueue.c @@ -184,6 +184,9 @@ static int init_kqueue_per_thread() static void deinit_kqueue_per_thread() { + if (tid) + close(kqueue_fd[tid]); + free(kev); kev = NULL; } From 0c44babeace11d34fb6c134df6b4eb80c1c2fd50 Mon Sep 17 00:00:00 2001 From: Christopher Faulet Date: Thu, 25 Jan 2018 16:10:16 +0100 Subject: [PATCH 34/52] MINOR: threads: Use __decl_hathreads instead of #ifdef/#endif A #ifdef/#endif on USE_THREAD was added in the commit 0048dd04 ("MINOR: threads: Fix build when we're not compiling with threads.") to conditionally define the start_lock variable, because HA_SPINLOCK_T is only defined when HAProxy is compiled with threads. If fact, to do that, we should use the macro __decl_hathreads instead. If commit 0048dd04 is backported in 1.8, this one can also be backported. (cherry picked from commit da18b9db7b26080b899985191c6344d79f5497b5) Signed-off-by: Willy Tarreau --- src/haproxy.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/haproxy.c b/src/haproxy.c index 8a4e8a563..34e8f7549 100644 --- a/src/haproxy.c +++ b/src/haproxy.c @@ -2422,9 +2422,7 @@ static void *run_thread_poll_loop(void *data) { struct per_thread_init_fct *ptif; struct per_thread_deinit_fct *ptdf; -#ifdef USE_THREAD - static HA_SPINLOCK_T start_lock; -#endif + __decl_hathreads(static HA_SPINLOCK_T start_lock); tid = *((unsigned int *)data); tid_bit = (1UL << tid); From f17eea8675d31ae31ef3154d95f5a1a7f98d326a Mon Sep 17 00:00:00 2001 From: Christopher Faulet Date: Thu, 25 Jan 2018 16:18:09 +0100 Subject: [PATCH 35/52] BUILD: epoll/threads: Add test on MAX_THREADS to avoid warnings when complied without threads When HAProxy is complied without threads, gcc throws following warnings: src/ev_epoll.c:222:3: warning: array subscript is outside array bounds [-Warray-bounds] ... src/ev_epoll.c:199:11: warning: array subscript is outside array bounds [-Warray-bounds] ... Of course, this is not a bug. In such case, tid is always equal to 0. But to avoid the noise, a check on MAX_THREADS in "if (tid)" lines makes gcc happy. This patch should be backported in 1.8 with the commit d9e7e36c ("BUG/MEDIUM: epoll/threads: use one epoll_fd per thread"). (cherry picked from commit 3e805ed08ec1c16c29a59b6d08dc8d96f0b69e15) Signed-off-by: Willy Tarreau --- src/ev_epoll.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ev_epoll.c b/src/ev_epoll.c index 635b8a5fe..e5c0001c9 100644 --- a/src/ev_epoll.c +++ b/src/ev_epoll.c @@ -195,7 +195,7 @@ static int init_epoll_per_thread() if (epoll_events == NULL) goto fail_alloc; - if (tid) { + if (MAX_THREADS > 1 && tid) { epoll_fd[tid] = epoll_create(global.maxsock + 1); if (epoll_fd[tid] < 0) goto fail_fd; @@ -218,7 +218,7 @@ static int init_epoll_per_thread() static void deinit_epoll_per_thread() { - if (tid) + if (MAX_THREADS > 1 && tid) close(epoll_fd[tid]); free(epoll_events); From 2dd90ea170614be1f1c6214c8236791ade96f737 Mon Sep 17 00:00:00 2001 From: Christopher Faulet Date: Thu, 25 Jan 2018 16:40:35 +0100 Subject: [PATCH 36/52] BUILD: kqueue/threads: Add test on MAX_THREADS to avoid warnings when complied without threads This is the same patch than the previous one ("BUILD: epoll/threads: Add test on MAX_THREADS to avoid warnings when complied without threads "). It should be backported in 1.8 with the commit 7a2364d4 ("BUG/MEDIUM: kqueue/threads: use one kqueue_fd per thread"). (cherry picked from commit 727c89b3dfd158760a68656be0615f1a8f192933) Signed-off-by: Willy Tarreau --- src/ev_kqueue.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ev_kqueue.c b/src/ev_kqueue.c index d58bc4ab4..86731af88 100644 --- a/src/ev_kqueue.c +++ b/src/ev_kqueue.c @@ -161,7 +161,7 @@ static int init_kqueue_per_thread() if (kev == NULL) goto fail_alloc; - if (tid) { + if (MAX_THREADS > 1 && tid) { kqueue_fd[tid] = kqueue(); if (kqueue_fd[tid] < 0) goto fail_fd; @@ -184,7 +184,7 @@ static int init_kqueue_per_thread() static void deinit_kqueue_per_thread() { - if (tid) + if (MAX_THREADS > 1 && tid) close(kqueue_fd[tid]); free(kev); From 34e807e77d57b0a087b196b9d397c181285bb255 Mon Sep 17 00:00:00 2001 From: Tim Duesterhus Date: Thu, 25 Jan 2018 16:24:44 +0100 Subject: [PATCH 37/52] CLEANUP: sample: Fix comment encoding of sample.c The file contained an 'e' with an gravis accent and thus was not US-ASCII, but ISO-8859-1. Also correct the spelling in the incorrect comment. The incorrect character was introduced in commit: 4d9a1d1a5c4720a169654ee47f9a4364261ffab4 v1.6-dev1 is the first tag containing this comment, the fix should be backported to haproxy 1.6 and newer. (cherry picked from commit c555ee0c45098278ddad695cbbd16a815bcb50da) Signed-off-by: Willy Tarreau --- src/sample.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/sample.c b/src/sample.c index f9c1ff4e4..d51cc2da2 100644 --- a/src/sample.c +++ b/src/sample.c @@ -389,12 +389,12 @@ struct sample_fetch *find_sample_fetch(const char *kw, int len) return NULL; } -/* This fucntion browse the list of available saple fetch. is +/* This function browses the list of available sample fetches. is * the last used sample fetch. If it is the first call, it must set to NULL. - * is the index of the next samplečfetch entry. It is used as private - * value. It is useles to initiate it. + * is the index of the next sample fetch entry. It is used as private + * value. It is useless to initiate it. * - * It returns always the newt fetch_sample entry, and NULL when the end of + * It returns always the new fetch_sample entry, and NULL when the end of * the list is reached. */ struct sample_fetch *sample_fetch_getnext(struct sample_fetch *current, int *idx) From ffde75e94211d25b30b15687d10783d0b9000e9d Mon Sep 17 00:00:00 2001 From: Tim Duesterhus Date: Thu, 25 Jan 2018 16:24:45 +0100 Subject: [PATCH 38/52] CLEANUP: sample: Fix outdated comment about sample casts functions The cast functions modify their output type as of commit: b805f71d1bb1487f01f78a6ffab26d44919e9944 v1.5-dev20 is the first tag containing this comment, the fix should be backported to haproxy 1.5 and newer. (cherry picked from commit ec6b0a2d189b84d743baccbcc14a4a4b97390549) Signed-off-by: Willy Tarreau --- src/sample.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/sample.c b/src/sample.c index d51cc2da2..3ceba523b 100644 --- a/src/sample.c +++ b/src/sample.c @@ -496,8 +496,6 @@ struct sample_conv *find_sample_conv(const char *kw, int len) /******************************************************************/ /* Sample casts functions */ -/* Note: these functions do *NOT* set the output type on the */ -/* sample, the caller is responsible for doing this on return. */ /******************************************************************/ static int c_ip2int(struct sample *smp) From 23528d0f23485c9914311f9f10dd0cd0fe4d734f Mon Sep 17 00:00:00 2001 From: Tim Duesterhus Date: Thu, 25 Jan 2018 16:24:46 +0100 Subject: [PATCH 39/52] BUG/MINOR: sample: Fix output type of c_ipv62ip c_ipv62ip failed to set the output type of the cast to SMP_T_IPV4 even for a successful conversion. This bug exists as of commit cc4d1716a2e72516c2505a6459a9ddbbfb186da2 which is the first commit adding this function. v1.6-dev4 is the first tag containing this commit, the fix should be backported to haproxy 1.6 and newer. (cherry picked from commit bf5ce02effdc2d471fdb421010757320136cbb45) Signed-off-by: Willy Tarreau --- src/sample.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sample.c b/src/sample.c index 3ceba523b..5380d02e6 100644 --- a/src/sample.c +++ b/src/sample.c @@ -531,7 +531,7 @@ static int c_ipv62ip(struct sample *smp) { if (!v6tov4(&smp->data.u.ipv4, &smp->data.u.ipv6)) return 0; - smp->data.type = SMP_T_IPV6; + smp->data.type = SMP_T_IPV4; return 1; } From 283193a5dd4bbdfc6880c203f34ddbdfe3f5e8d9 Mon Sep 17 00:00:00 2001 From: Tim Duesterhus Date: Thu, 25 Jan 2018 16:24:47 +0100 Subject: [PATCH 40/52] CLEANUP: Fix typo in ARGT_MSK6 comment The incorrect comment was introduced in commit: 2ac5718dbd4ec722ece228e9f613d2be74eee9da v1.5-dev9 is the first tag containing this comment, the fix should be backported to haproxy 1.5 and newer. (cherry picked from commit 92bb03420935c7fd844ddccd78acb7b1da2fffd4) Signed-off-by: Willy Tarreau --- include/types/arg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/types/arg.h b/include/types/arg.h index 651164b2d..c3b09deff 100644 --- a/include/types/arg.h +++ b/include/types/arg.h @@ -49,7 +49,7 @@ enum { ARGT_IPV4, /* an IPv4 address */ ARGT_MSK4, /* an IPv4 address mask (integer or dotted), stored as ARGT_IPV4 */ ARGT_IPV6, /* an IPv6 address */ - ARGT_MSK6, /* an IPv6 address mask (integer or dotted), stored as ARGT_IPV4 */ + ARGT_MSK6, /* an IPv6 address mask (integer or dotted), stored as ARGT_IPV6 */ ARGT_TIME, /* a delay in ms by default, stored as ARGT_UINT */ ARGT_SIZE, /* a size in bytes by default, stored as ARGT_UINT */ ARGT_FE, /* a pointer to a frontend only */ From e0a80ca5b24fc3b9abf8e3a155d8dfdd80996f1c Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 29 Jan 2018 15:17:05 +0100 Subject: [PATCH 41/52] BUG/MINOR: cli: use global.maxsock and not maxfd to list all FDs The "show fd" command on the CLI doesn't list the last FD in use since it doesn't include maxfd. We don't need to use maxfd here anyway as global.maxsock will do the job pretty well and removes this dependency. This patch may be backported to 1.8. (cherry picked from commit ccea35c9800756f867aff8ec2752fa75480ac345) Signed-off-by: Willy Tarreau --- src/cli.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cli.c b/src/cli.c index d5c615bb1..cff59f28a 100644 --- a/src/cli.c +++ b/src/cli.c @@ -772,7 +772,7 @@ static int cli_io_handler_show_fd(struct appctx *appctx) /* we have two inner loops here, one for the proxy, the other one for * the buffer. */ - while (fd < maxfd) { + while (fd < global.maxsock) { struct fdtab fdt; struct listener *li = NULL; struct server *sv = NULL; From 9e53b1986a3b6fc4a95254c828eb3aa551a263ce Mon Sep 17 00:00:00 2001 From: Christopher Faulet Date: Tue, 30 Jan 2018 11:04:29 +0100 Subject: [PATCH 42/52] BUG/MINOR: threads: Update labels array because of changes in lock_label enum Recent changes to the enum were not synchronized with the lock debugging code. Now we use a switch/case instead of an array so that the compiler throws a warning if there is any inconsistency. To be backported to 1.8 (at least to add the START entry). (cherry picked from commit f51bac2ba83a27a75ef6bab58bfde6489b999c49) Signed-off-by: Willy Tarreau --- include/common/hathreads.h | 54 +++++++++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 9 deletions(-) diff --git a/include/common/hathreads.h b/include/common/hathreads.h index cfcb48e9b..56abb471b 100644 --- a/include/common/hathreads.h +++ b/include/common/hathreads.h @@ -201,6 +201,7 @@ int thread_need_sync(void); #if defined(DEBUG_THREAD) || defined(DEBUG_FULL) +/* WARNING!!! if you update this enum, please also keep lock_label() up to date below */ enum lock_label { THREAD_SYNC_LOCK = 0, FDTAB_LOCK, @@ -318,16 +319,51 @@ struct ha_rwlock { } info; }; +static inline const char *lock_label(enum lock_label label) +{ + switch (label) { + case THREAD_SYNC_LOCK: return "THREAD_SYNC"; + case FDCACHE_LOCK: return "FDCACHE"; + case FD_LOCK: return "FD"; + case TASK_RQ_LOCK: return "TASK_RQ"; + case TASK_WQ_LOCK: return "TASK_WQ"; + case POOL_LOCK: return "POOL"; + case LISTENER_LOCK: return "LISTENER"; + case LISTENER_QUEUE_LOCK: return "LISTENER_QUEUE"; + case PROXY_LOCK: return "PROXY"; + case SERVER_LOCK: return "SERVER"; + case UPDATED_SERVERS_LOCK: return "UPDATED_SERVERS"; + case LBPRM_LOCK: return "LBPRM"; + case SIGNALS_LOCK: return "SIGNALS"; + case STK_TABLE_LOCK: return "STK_TABLE"; + case STK_SESS_LOCK: return "STK_SESS"; + case APPLETS_LOCK: return "APPLETS"; + case PEER_LOCK: return "PEER"; + case BUF_WQ_LOCK: return "BUF_WQ"; + case STRMS_LOCK: return "STRMS"; + case SSL_LOCK: return "SSL"; + case SSL_GEN_CERTS_LOCK: return "SSL_GEN_CERTS"; + case PATREF_LOCK: return "PATREF"; + case PATEXP_LOCK: return "PATEXP"; + case PATLRU_LOCK: return "PATLRU"; + case VARS_LOCK: return "VARS"; + case COMP_POOL_LOCK: return "COMP_POOL"; + case LUA_LOCK: return "LUA"; + case NOTIF_LOCK: return "NOTIF"; + case SPOE_APPLET_LOCK: return "SPOE_APPLET"; + case DNS_LOCK: return "DNS"; + case PID_LIST_LOCK: return "PID_LIST"; + case EMAIL_ALERTS_LOCK: return "EMAIL_ALERTS"; + case PIPES_LOCK: return "PIPES"; + case START_LOCK: return "START"; + case LOCK_LABELS: break; /* keep compiler happy */ + }; + /* only way to come here is consecutive to an internal bug */ + abort(); +} + static inline void show_lock_stats() { - const char *labels[LOCK_LABELS] = {"THREAD_SYNC", "FDTAB", "FDCACHE", "FD", "POLL", - "TASK_RQ", "TASK_WQ", "POOL", - "LISTENER", "LISTENER_QUEUE", "PROXY", "SERVER", - "UPDATED_SERVERS", "LBPRM", "SIGNALS", "STK_TABLE", "STK_SESS", - "APPLETS", "PEER", "BUF_WQ", "STREAMS", "SSL", "SSL_GEN_CERTS", - "PATREF", "PATEXP", "PATLRU", "VARS", "COMP_POOL", "LUA", - "NOTIF", "SPOE_APPLET", "DNS", "PID_LIST", "EMAIL_ALERTS", - "PIPES" }; int lbl; for (lbl = 0; lbl < LOCK_LABELS; lbl++) { @@ -341,7 +377,7 @@ static inline void show_lock_stats() "\t # read unlock : %lu (%ld)\n" "\t # wait time for read : %.3f msec\n" "\t # wait time for read/lock : %.3f nsec\n", - labels[lbl], + lock_label(lbl), lock_stats[lbl].num_write_locked, lock_stats[lbl].num_write_unlocked, lock_stats[lbl].num_write_unlocked - lock_stats[lbl].num_write_locked, From dad5d4d4185b6392c0e866a1e6a4c297e2b28f18 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Wed, 31 Jan 2018 09:49:29 +0100 Subject: [PATCH 43/52] BUG/MINOR: epoll/threads: only call epoll_ctl(DEL) on polled FDs Commit d9e7e36 ("BUG/MEDIUM: epoll/threads: use one epoll_fd per thread") addressed an issue with the polling and required that cloned FDs are removed from all polling threads on close. But in fact it does it for all bound threads, some of which may not necessarily poll the FD. This is harmless, but it may also make it harder later to deal with FD migration between threads. Better use polled_mask which only reports threads still aware of the FD instead of thread_mask. This fix should be backported to 1.8. (cherry picked from commit 497959290789002b814b9021a737a3c5f14e7407) Signed-off-by: Willy Tarreau --- src/ev_epoll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ev_epoll.c b/src/ev_epoll.c index e5c0001c9..1ea285366 100644 --- a/src/ev_epoll.c +++ b/src/ev_epoll.c @@ -50,7 +50,7 @@ static THREAD_LOCAL struct epoll_event ev; REGPRM1 static void __fd_clo(int fd) { if (unlikely(fdtab[fd].cloned)) { - unsigned long m = fdtab[fd].thread_mask; + unsigned long m = fdtab[fd].polled_mask; int i; for (i = global.nbthread - 1; i >= 0; i--) From f13f3a4babdb1ce23a7e982c765704bca728111a Mon Sep 17 00:00:00 2001 From: Christopher Faulet Date: Thu, 1 Feb 2018 08:45:22 +0100 Subject: [PATCH 44/52] BUG/MEDIUM: spoe: Always try to receive or send the frame to detect shutdowns Before, we checked if the buffer was allocated or not to avoid sending or receiving a frame. This was done to not call ci_putblk or co_getblk if there is nothing to do. But the checks on the buffers are also done in these functions. So this is not mandatory here. But in these functions, the channel state is also checked, so an error is returned if it is closed. By skipping the call, we also skip the checks on the channel state, delaying shutdowns detection. Now, we always try to send or receive a frame. So if the corresponding channel is closed, we can immediatly handle the error. This patch must be backported in 1.8 (cherry picked from commit d5216d474d69856a282e4443f180af2093a80d6c) Signed-off-by: Willy Tarreau --- src/flt_spoe.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/flt_spoe.c b/src/flt_spoe.c index 6aeabb2fd..a343af1db 100644 --- a/src/flt_spoe.c +++ b/src/flt_spoe.c @@ -1135,17 +1135,13 @@ spoe_send_frame(struct appctx *appctx, char *buf, size_t framesz) int ret; uint32_t netint; - if (si_ic(si)->buf == &buf_empty) - goto retry; - /* 4 bytes are reserved at the beginning of to store the frame * length. */ netint = htonl(framesz); memcpy(buf, (char *)&netint, 4); ret = ci_putblk(si_ic(si), buf, framesz+4); - if (ret <= 0) { - if (ret == -1) { + if ((ret == -3 && si_ic(si)->buf == &buf_empty) || ret == -1) { retry: si_applet_cant_put(si); return 1; /* retry */ @@ -1166,9 +1162,6 @@ spoe_recv_frame(struct appctx *appctx, char *buf, size_t framesz) int ret; uint32_t netint; - if (si_oc(si)->buf == &buf_empty) - goto retry; - ret = co_getblk(si_oc(si), (char *)&netint, 4, 0); if (ret > 0) { framesz = ntohl(netint); From bd9c62bc193678d44988eebb6ffa23b24d81652f Mon Sep 17 00:00:00 2001 From: Christopher Faulet Date: Thu, 1 Feb 2018 08:45:45 +0100 Subject: [PATCH 45/52] BUG/MEDIUM: spoe: Allow producer to read and to forward shutdown on request side This is mandatory to correctly set right timeout on the stream. Else the client timeout is never set. So only SPOE processing timeout will be evaluated. If it is not defined (ie infinity), the stream can be blocked for a while, waiting the SPOA reply. Of course, this is not a good idea to let the SPOE processing timeout undefined, but it can happen. This patch must be backported in 1.8. (cherry picked from commit 9cdca976d325cfc69a085b7d9e53ee1789a18a5b) Signed-off-by: Willy Tarreau --- src/flt_spoe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/flt_spoe.c b/src/flt_spoe.c index a343af1db..8fb6e0b71 100644 --- a/src/flt_spoe.c +++ b/src/flt_spoe.c @@ -3085,7 +3085,7 @@ spoe_chn_pre_analyze(struct stream *s, struct filter *filter, } out: - if (!ret) { + if (!ret && (chn->flags & CF_ISRESP)) { channel_dont_read(chn); channel_dont_close(chn); } From 939124768d25723d6cf40d6182c1ddcb5f34fa19 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 5 Feb 2018 20:11:38 +0100 Subject: [PATCH 46/52] BUG/MINOR: time/threads: ensure the adjusted time is always correct In the time offset calculation loop, we ensure we only commit the new date once it's futher in the future than the current one. However there is a small issue here on 32-bit platforms : if global_now is written in two cycles by another thread, starting with the tv_sec part, and the current thread reads it in the middle of a change, it may compute a wrong "adjusted" value on the first round, with the new (larger) tv_sec and the old (large) tv_usec. This will be detected as the CAS will fail, and another attempt will be made, but this time possibly with too large an adusted value, pushing the date further than needed (at worst almost one second). This patch addresses this by using a temporary adjusted time in the loop that always restarts from the last known one, and by assigning the result to the final value only once the CAS succeeds. The impact is very limited, it may cause the time to advance in small jumps on 32 bit platforms and in the worst case some timeouts might expire 1 second too early. This fix should be backported to 1.8. (cherry picked from commit a331544c33d287471ba0afaad5de5289f3cc35ea) Signed-off-by: Willy Tarreau --- src/time.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/time.c b/src/time.c index 789b47c1a..0de69627c 100644 --- a/src/time.c +++ b/src/time.c @@ -173,7 +173,7 @@ REGPRM2 int _tv_isgt(const struct timeval *tv1, const struct timeval *tv2) */ REGPRM2 void tv_update_date(int max_wait, int interrupted) { - struct timeval adjusted, deadline, tmp_now; + struct timeval adjusted, deadline, tmp_now, tmp_adj; unsigned int curr_sec_ms; /* millisecond of current second (0..999) */ unsigned long long old_now; unsigned long long new_now; @@ -215,18 +215,21 @@ REGPRM2 void tv_update_date(int max_wait, int interrupted) do { tmp_now.tv_sec = (unsigned int)(old_now >> 32); tmp_now.tv_usec = old_now & 0xFFFFFFFFU; + tmp_adj = adjusted; - if (__tv_islt(&adjusted, &tmp_now)) - adjusted = tmp_now; + if (__tv_islt(&tmp_adj, &tmp_now)) + tmp_adj = tmp_now; /* now is expected to be the most accurate date, * equal to or newer. */ - new_now = (((unsigned long long)adjusted.tv_sec) << 32) + (unsigned int)adjusted.tv_usec; + new_now = (((unsigned long long)tmp_adj.tv_sec) << 32) + (unsigned int)tmp_adj.tv_usec; /* let's try to update the global or loop again */ } while (!HA_ATOMIC_CAS(&global_now, &old_now, new_now)); + adjusted = tmp_adj; + /* the new global date when we looked was old_now, and the new one is * new_now == adjusted. We can recompute our local offset. */ From fb4921724172f89aa1d3309688aa793112839fe1 Mon Sep 17 00:00:00 2001 From: Tim Duesterhus Date: Sun, 21 Jan 2018 22:11:17 +0100 Subject: [PATCH 47/52] BUG/MEDIUM: standard: Fix memory leak in str2ip2() An haproxy compiled with: > make -j4 all TARGET=linux2628 USE_GETADDRINFO=1 And running with a configuration like this: defaults log global mode http option httplog option dontlognull timeout connect 5000 timeout client 50000 timeout server 50000 frontend fe bind :::8080 v4v6 default_backend be backend be server s example.com:80 check Will leak memory inside `str2ip2()`, because the list `result` is not properly freed in success cases: ==18875== 140 (76 direct, 64 indirect) bytes in 1 blocks are definitely lost in loss record 87 of 111 ==18875== at 0x4C2DB8F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==18875== by 0x537A565: gaih_inet (getaddrinfo.c:1223) ==18875== by 0x537DD5D: getaddrinfo (getaddrinfo.c:2425) ==18875== by 0x4868E5: str2ip2 (standard.c:733) ==18875== by 0x43F28B: srv_set_addr_via_libc (server.c:3767) ==18875== by 0x43F50A: srv_iterate_initaddr (server.c:3879) ==18875== by 0x43F50A: srv_init_addr (server.c:3944) ==18875== by 0x475B30: init (haproxy.c:1595) ==18875== by 0x40406D: main (haproxy.c:2479) The exists as long as the usage of getaddrinfo in that function exists, it was introduced in commit: d5f4328efd5f4eaa7c89cad9773124959195430a v1.5-dev8 is the first tag containing this comment, the fix should be backported to haproxy 1.5 and newer. (cherry picked from commit 7d58b4d156fe159775e240a73aaad1bb76075af5) Signed-off-by: Willy Tarreau --- src/standard.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/standard.c b/src/standard.c index acd136c27..4f89a9217 100644 --- a/src/standard.c +++ b/src/standard.c @@ -722,6 +722,7 @@ struct sockaddr_storage *str2ip2(const char *str, struct sockaddr_storage *sa, i #ifdef USE_GETADDRINFO if (global.tune.options & GTUNE_USE_GAI) { struct addrinfo hints, *result; + int success = 0; memset(&result, 0, sizeof(result)); memset(&hints, 0, sizeof(hints)); @@ -733,23 +734,30 @@ struct sockaddr_storage *str2ip2(const char *str, struct sockaddr_storage *sa, i if (getaddrinfo(str, NULL, &hints, &result) == 0) { if (!sa->ss_family || sa->ss_family == AF_UNSPEC) sa->ss_family = result->ai_family; - else if (sa->ss_family != result->ai_family) + else if (sa->ss_family != result->ai_family) { + freeaddrinfo(result); goto fail; + } switch (result->ai_family) { case AF_INET: memcpy((struct sockaddr_in *)sa, result->ai_addr, result->ai_addrlen); set_host_port(sa, port); - return sa; + success = 1; + break; case AF_INET6: memcpy((struct sockaddr_in6 *)sa, result->ai_addr, result->ai_addrlen); set_host_port(sa, port); - return sa; + success = 1; + break; } } if (result) freeaddrinfo(result); + + if (success) + return sa; } #endif /* try to resolve an IPv4/IPv6 hostname */ From b6649ffcfcea9ee1f623ddd6d4c485532a450eb1 Mon Sep 17 00:00:00 2001 From: Chris Lane Date: Mon, 5 Feb 2018 23:15:44 +0000 Subject: [PATCH 48/52] MINOR: init: emit warning when -sf/-sd cannot parse argument Previously, -sf and -sd command line parsing used atol which cannot detect errors. I had a problem where I was doing -sf "$pid1 $pid2 $pid" and it was sending the gracefully terminate signal only to the first pid. The change uses strtol and checks endptr and errno to see if the parsing worked. It will exit when the pid list is not parsed. [wt: this should be backported to 1.8] (cherry picked from commit 236062f7cea355bae9bbb6f5cd1953e78f36c6d8) Signed-off-by: Willy Tarreau --- src/haproxy.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/haproxy.c b/src/haproxy.c index 34e8f7549..c2cf632da 100644 --- a/src/haproxy.c +++ b/src/haproxy.c @@ -1445,13 +1445,27 @@ static void init(int argc, char **argv) else oldpids_sig = SIGTERM; /* terminate immediately */ while (argc > 1 && argv[1][0] != '-') { + char * endptr = NULL; oldpids = realloc(oldpids, (nb_oldpids + 1) * sizeof(int)); if (!oldpids) { ha_alert("Cannot allocate old pid : out of memory.\n"); exit(1); } argc--; argv++; - oldpids[nb_oldpids] = atol(*argv); + errno = 0; + oldpids[nb_oldpids] = strtol(*argv, &endptr, 10); + if (errno) { + ha_alert("-%2s option: failed to parse {%s}: %s\n", + flag, + *argv, strerror(errno)); + exit(1); + } else if (endptr && strlen(endptr)) { + while (isspace(*endptr)) endptr++; + if (*endptr != 0) + ha_alert("-%2s option: some bytes unconsumed in PID list {%s}\n", + flag, endptr); + exit(1); + } if (oldpids[nb_oldpids] <= 0) usage(progname); nb_oldpids++; From ec91da75e8ee9785551d2eb31e7c84cc8e6217a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=E9r=F4me=20Magnin?= Date: Wed, 7 Feb 2018 11:39:58 +0100 Subject: [PATCH 49/52] DOC: Describe routing impact of using interface keyword on bind lines (cherry picked from commit 61275198b30a505df7824ba9081d45d5d6d07f81) Signed-off-by: Willy Tarreau --- doc/configuration.txt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/doc/configuration.txt b/doc/configuration.txt index 208ed523a..eda780e1a 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -10768,7 +10768,12 @@ interface interface, not an aliased interface. It is also possible to bind multiple frontends to the same address if they are bound to different interfaces. Note that binding to a network interface requires root privileges. This parameter - is only compatible with TCPv4/TCPv6 sockets. + is only compatible with TCPv4/TCPv6 sockets. When specified, return traffic + uses the same interface as inbound traffic, and its associated routing table, + even if there are explicit routes through different interfaces configured. + This can prove useful to address asymmetric routing issues when the same + client IP addresses need to be able to reach frontends hosted on different + interfaces. level This setting is used with the stats sockets only to restrict the nature of From 27e631c46b19228a401572c9478a6515da46d3d2 Mon Sep 17 00:00:00 2001 From: Pavlos Parissis Date: Wed, 7 Feb 2018 21:42:16 +0100 Subject: [PATCH 50/52] DOC: Mention -Ws in the list of available options (cherry picked from commit f65f257871907f831558b1fab626d871fd48e984) Signed-off-by: Willy Tarreau --- doc/management.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/management.txt b/doc/management.txt index 8cec7ea15..68a17c256 100644 --- a/doc/management.txt +++ b/doc/management.txt @@ -176,6 +176,10 @@ list of options is : is compatible either with the foreground or daemon mode. It is recommended to use this mode with multiprocess and systemd. + -Ws : master-worker mode with support of `notify` type of systemd service. + This option is only available when HAProxy was built with `USE_SYSTEMD` + build option enabled. + -c : only performs a check of the configuration files and exits before trying to bind. The exit status is zero if everything is OK, or non-zero if an error is encountered. From 021fa041313c45913e59846ffbb366e1405116ad Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Thu, 8 Feb 2018 09:55:09 +0100 Subject: [PATCH 51/52] BUG/MINOR: config: don't emit a warning when global stats is incompletely configured Martin Brauer reported an unexpected warning when some parts of the global stats are defined but not the listening address, like below : global #stats socket run/admin.sock mode 660 level admin stats timeout 30s Then haproxy complains : [WARNING] 334/150131 (23086) : config : frontend 'GLOBAL' has no 'bind' directive. Please declare it as a backend if this was intended. This is because of the check for a bind-less frontend (the global section creates a frontend for the stats). There's no clean fix for this one, so here we're simply checking that the frontend is not the global stats one before emitting the warning. This patch should be backported to all stable versions. (cherry picked from commit 58aa5ccd7675e5c960b045e96fdf69845d6449b4) Signed-off-by: Willy Tarreau --- src/cfgparse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cfgparse.c b/src/cfgparse.c index 567f3acbf..23c963cb9 100644 --- a/src/cfgparse.c +++ b/src/cfgparse.c @@ -7681,7 +7681,7 @@ int check_config_validity() break; } - if ((curproxy->cap & PR_CAP_FE) && LIST_ISEMPTY(&curproxy->conf.listeners)) { + if (curproxy != global.stats_fe && (curproxy->cap & PR_CAP_FE) && LIST_ISEMPTY(&curproxy->conf.listeners)) { ha_warning("config : %s '%s' has no 'bind' directive. Please declare it as a backend if this was intended.\n", proxy_type_str(curproxy), curproxy->id); err_code |= ERR_WARN; From 1deb90d5243a5cfa5da7592978592eb9ab2c8c6f Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Thu, 8 Feb 2018 14:05:15 +0100 Subject: [PATCH 52/52] [RELEASE] Released version 1.8.4 Released version 1.8.4 with the following main changes : - BUG/MEDIUM: h2: properly handle the END_STREAM flag on empty DATA frames - BUILD: ssl: silence a warning when building without NPN nor ALPN support - BUG/MEDIUM: ssl: cache doesn't release shctx blocks - BUG/MINOR: lua: Fix default value for pattern in Socket.receive - DOC: lua: Fix typos in comments of hlua_socket_receive - BUG/MEDIUM: lua: Fix IPv6 with separate port support for Socket.connect - BUG/MINOR: lua: Fix return value of Socket.settimeout - MINOR: dns: Handle SRV record weight correctly. - BUG/MEDIUM: mworker: execvp failure depending on argv[0] - MINOR: hathreads: add support for gcc < 4.7 - BUILD/MINOR: ancient gcc versions atomic fix - BUG/MEDIUM: stream: properly handle client aborts during redispatch - DOC: clarify the scope of ssl_fc_is_resumed - CONTRIB: debug: fix a few flags definitions - BUG/MINOR: poll: too large size allocation for FD events - BUG/MEDIUM: peers: fix expire date wasn't updated if entry is modified remotely. - MINOR: servers: Don't report duplicate dyncookies for disabled servers. - MINOR: global/threads: move cpu_map at the end of the global struct - MINOR: threads: add a MAX_THREADS define instead of LONGBITS - MINOR: global: add some global activity counters to help debugging - MINOR: threads/fd: Use a bitfield to know if there are FDs for a thread in the FD cache - BUG/MEDIUM: threads/polling: Use fd_cache_mask instead of fd_cache_num - BUG/MEDIUM: fd: maintain a per-thread update mask - MINOR: fd: add a bitmask to indicate that an FD is known by the poller - BUG/MEDIUM: epoll/threads: use one epoll_fd per thread - BUG/MEDIUM: kqueue/threads: use one kqueue_fd per thread - BUG/MEDIUM: threads/mworker: fix a race on startup - BUG/MINOR: mworker: only write to pidfile if it exists - MINOR: threads: Fix build when we're not compiling with threads. - BUG/MINOR: threads: always set an owner to the thread_sync pipe - BUG/MEDIUM: threads/server: Fix deadlock in srv_set_stopping/srv_set_admin_flag - BUG/MEDIUM: checks: Don't try to release undefined conn_stream when a check is freed - BUG/MINOR: kqueue/threads: Don't forget to close kqueue_fd[tid] on each thread - MINOR: threads: Use __decl_hathreads instead of #ifdef/#endif - BUILD: epoll/threads: Add test on MAX_THREADS to avoid warnings when complied without threads - BUILD: kqueue/threads: Add test on MAX_THREADS to avoid warnings when complied without threads - CLEANUP: sample: Fix comment encoding of sample.c - CLEANUP: sample: Fix outdated comment about sample casts functions - BUG/MINOR: sample: Fix output type of c_ipv62ip - CLEANUP: Fix typo in ARGT_MSK6 comment - BUG/MINOR: cli: use global.maxsock and not maxfd to list all FDs - BUG/MINOR: threads: Update labels array because of changes in lock_label enum - BUG/MINOR: epoll/threads: only call epoll_ctl(DEL) on polled FDs - BUG/MEDIUM: spoe: Always try to receive or send the frame to detect shutdowns - BUG/MEDIUM: spoe: Allow producer to read and to forward shutdown on request side - BUG/MINOR: time/threads: ensure the adjusted time is always correct - BUG/MEDIUM: standard: Fix memory leak in str2ip2() - MINOR: init: emit warning when -sf/-sd cannot parse argument - DOC: Describe routing impact of using interface keyword on bind lines - DOC: Mention -Ws in the list of available options - BUG/MINOR: config: don't emit a warning when global stats is incompletely configured --- CHANGELOG | 53 +++++++++++++++++++++++++++++++++++++++++++ README | 2 +- VERDATE | 2 +- VERSION | 2 +- doc/configuration.txt | 2 +- examples/haproxy.spec | 5 +++- src/haproxy.c | 4 ++-- 7 files changed, 63 insertions(+), 7 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 8e7849a34..3d87520e6 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,59 @@ ChangeLog : =========== +2018/02/08 : 1.8.4 + - BUG/MEDIUM: h2: properly handle the END_STREAM flag on empty DATA frames + - BUILD: ssl: silence a warning when building without NPN nor ALPN support + - BUG/MEDIUM: ssl: cache doesn't release shctx blocks + - BUG/MINOR: lua: Fix default value for pattern in Socket.receive + - DOC: lua: Fix typos in comments of hlua_socket_receive + - BUG/MEDIUM: lua: Fix IPv6 with separate port support for Socket.connect + - BUG/MINOR: lua: Fix return value of Socket.settimeout + - MINOR: dns: Handle SRV record weight correctly. + - BUG/MEDIUM: mworker: execvp failure depending on argv[0] + - MINOR: hathreads: add support for gcc < 4.7 + - BUILD/MINOR: ancient gcc versions atomic fix + - BUG/MEDIUM: stream: properly handle client aborts during redispatch + - DOC: clarify the scope of ssl_fc_is_resumed + - CONTRIB: debug: fix a few flags definitions + - BUG/MINOR: poll: too large size allocation for FD events + - BUG/MEDIUM: peers: fix expire date wasn't updated if entry is modified remotely. + - MINOR: servers: Don't report duplicate dyncookies for disabled servers. + - MINOR: global/threads: move cpu_map at the end of the global struct + - MINOR: threads: add a MAX_THREADS define instead of LONGBITS + - MINOR: global: add some global activity counters to help debugging + - MINOR: threads/fd: Use a bitfield to know if there are FDs for a thread in the FD cache + - BUG/MEDIUM: threads/polling: Use fd_cache_mask instead of fd_cache_num + - BUG/MEDIUM: fd: maintain a per-thread update mask + - MINOR: fd: add a bitmask to indicate that an FD is known by the poller + - BUG/MEDIUM: epoll/threads: use one epoll_fd per thread + - BUG/MEDIUM: kqueue/threads: use one kqueue_fd per thread + - BUG/MEDIUM: threads/mworker: fix a race on startup + - BUG/MINOR: mworker: only write to pidfile if it exists + - MINOR: threads: Fix build when we're not compiling with threads. + - BUG/MINOR: threads: always set an owner to the thread_sync pipe + - BUG/MEDIUM: threads/server: Fix deadlock in srv_set_stopping/srv_set_admin_flag + - BUG/MEDIUM: checks: Don't try to release undefined conn_stream when a check is freed + - BUG/MINOR: kqueue/threads: Don't forget to close kqueue_fd[tid] on each thread + - MINOR: threads: Use __decl_hathreads instead of #ifdef/#endif + - BUILD: epoll/threads: Add test on MAX_THREADS to avoid warnings when complied without threads + - BUILD: kqueue/threads: Add test on MAX_THREADS to avoid warnings when complied without threads + - CLEANUP: sample: Fix comment encoding of sample.c + - CLEANUP: sample: Fix outdated comment about sample casts functions + - BUG/MINOR: sample: Fix output type of c_ipv62ip + - CLEANUP: Fix typo in ARGT_MSK6 comment + - BUG/MINOR: cli: use global.maxsock and not maxfd to list all FDs + - BUG/MINOR: threads: Update labels array because of changes in lock_label enum + - BUG/MINOR: epoll/threads: only call epoll_ctl(DEL) on polled FDs + - BUG/MEDIUM: spoe: Always try to receive or send the frame to detect shutdowns + - BUG/MEDIUM: spoe: Allow producer to read and to forward shutdown on request side + - BUG/MINOR: time/threads: ensure the adjusted time is always correct + - BUG/MEDIUM: standard: Fix memory leak in str2ip2() + - MINOR: init: emit warning when -sf/-sd cannot parse argument + - DOC: Describe routing impact of using interface keyword on bind lines + - DOC: Mention -Ws in the list of available options + - BUG/MINOR: config: don't emit a warning when global stats is incompletely configured + 2017/12/30 : 1.8.3 - BUG/MEDIUM: h2: properly handle and report some stream errors - BUG/MEDIUM: h2: improve handling of frames received on closed streams diff --git a/README b/README index 24f770ffe..0c2c4b1e8 100644 --- a/README +++ b/README @@ -3,7 +3,7 @@ ---------------------- version 1.8 willy tarreau - 2017/12/30 + 2018/02/08 1) How to build it diff --git a/VERDATE b/VERDATE index b54cf31fb..ea32c8f0c 100644 --- a/VERDATE +++ b/VERDATE @@ -1,2 +1,2 @@ $Format:%ci$ -2017/12/30 +2018/02/08 diff --git a/VERSION b/VERSION index a7ee35a3e..bfa363e76 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.8.3 +1.8.4 diff --git a/doc/configuration.txt b/doc/configuration.txt index eda780e1a..36968860a 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -4,7 +4,7 @@ ---------------------- version 1.8 willy tarreau - 2017/12/30 + 2018/02/08 This document covers the configuration language as implemented in the version diff --git a/examples/haproxy.spec b/examples/haproxy.spec index 67ab70fc9..d259dfe21 100644 --- a/examples/haproxy.spec +++ b/examples/haproxy.spec @@ -1,6 +1,6 @@ Summary: HA-Proxy is a TCP/HTTP reverse proxy for high availability environments Name: haproxy -Version: 1.8.3 +Version: 1.8.4 Release: 1 License: GPL Group: System Environment/Daemons @@ -74,6 +74,9 @@ fi %attr(0755,root,root) %config %{_sysconfdir}/rc.d/init.d/%{name} %changelog +* Thu Feb 8 2018 Willy Tarreau +- updated to 1.8.4 + * Sat Dec 30 2017 Willy Tarreau - updated to 1.8.3 diff --git a/src/haproxy.c b/src/haproxy.c index c2cf632da..8aa7eeda5 100644 --- a/src/haproxy.c +++ b/src/haproxy.c @@ -1,6 +1,6 @@ /* * HA-Proxy : High Availability-enabled HTTP/TCP proxy - * Copyright 2000-2017 Willy Tarreau . + * Copyright 2000-2018 Willy Tarreau . * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -354,7 +354,7 @@ void hap_register_per_thread_deinit(void (*fct)()) static void display_version() { printf("HA-Proxy version " HAPROXY_VERSION " " HAPROXY_DATE"\n"); - printf("Copyright 2000-2017 Willy Tarreau \n\n"); + printf("Copyright 2000-2018 Willy Tarreau \n\n"); } static void display_build_opts()