Implement TCP Keep-Alives across most Unix-like systems (#12782)
## TCP Keep-Alives [TCP Keep-Alives](https://datatracker.ietf.org/doc/html/rfc9293#name-tcp-keep-alives) provides a way to detect whether a TCP connection is alive or dead, which can be useful for reducing system resources by cleaning up dead connections. There is full support of TCP Keep-Alives on Linux and partial support on macOS in `redis` at present. This PR intends to complete the rest. ## Unix-like OS's support `TCP_KEEPIDLE`, `TCP_KEEPINTVL`, and `TCP_KEEPCNT` are not included in the POSIX standard for `setsockopts`, while these three socket options are widely available on most Unix-like systems and Windows. ### References - [AIX](https://www.ibm.com/support/pages/ibm-aix-tcp-keepalive-probes) - [DragonflyBSD](https://man.dragonflybsd.org/?command=tcp§ion=4) - [FreeBSD](https://www.freebsd.org/cgi/man.cgi?query=tcp) - [HP-UX](https://docstore.mik.ua/manuals/hp-ux/en/B2355-60130/TCP.7P.html) - [illumos](https://illumos.org/man/4P/tcp) - [Linux](https://man7.org/linux/man-pages/man7/tcp.7.html) - [NetBSD](https://man.netbsd.org/NetBSD-8.0/tcp.4) - [Windows](https://learn.microsoft.com/en-us/windows/win32/winsock/ipproto-tcp-socket-options) ### Mac OS In earlier versions, macOS only supported setting `TCP_KEEPALIVE` (the equivalent of `TCP_KEEPIDLE` on other platforms), but since macOS 10.8 it has supported `TCP_KEEPINTVL` and `TCP_KEEPCNT`. Check out [this mailing list](https://lists.apple.com/archives/macnetworkprog/2012/Jul/msg00005.html) and [the source code](https://github.com/apple/darwin-xnu/blob/main/bsd/netinet/tcp.h#L215-L230) for more details. ### Solaris Solaris claimed it supported the TCP-Alives mechanism, but `TCP_KEEPIDLE`, `TCP_KEEPINTVL`, and `TCP_KEEPCNT` were not available on Solaris until the latest version 11.4. Therefore, we need to simulate the TCP-Alives mechanism on other platforms via `TCP_KEEPALIVE_THRESHOLD` + `TCP_KEEPALIVE_ABORT_THRESHOLD`. - [Solaris 11.3](https://docs.oracle.com/cd/E86824_01/html/E54777/tcp-7p.html) - [Solaris 11.4](https://docs.oracle.com/cd/E88353_01/html/E37851/tcp-4p.html) --------- Co-authored-by: Oran Agra <oran@redislabs.com>
This commit is contained in:
parent
27a8e3b04e
commit
1aa633d61b
149
src/anet.c
149
src/anet.c
@ -130,57 +130,142 @@ int anetCloexec(int fd) {
|
||||
return r;
|
||||
}
|
||||
|
||||
/* Set TCP keep alive option to detect dead peers. The interval option
|
||||
* is only used for Linux as we are using Linux-specific APIs to set
|
||||
* the probe send time, interval, and count. */
|
||||
/* Enable TCP keep-alive mechanism to detect dead peers,
|
||||
* TCP_KEEPIDLE, TCP_KEEPINTVL and TCP_KEEPCNT will be set accordingly. */
|
||||
int anetKeepAlive(char *err, int fd, int interval)
|
||||
{
|
||||
int val = 1;
|
||||
|
||||
if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &val, sizeof(val)) == -1)
|
||||
int enabled = 1;
|
||||
if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &enabled, sizeof(enabled)))
|
||||
{
|
||||
anetSetError(err, "setsockopt SO_KEEPALIVE: %s", strerror(errno));
|
||||
return ANET_ERR;
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
/* Default settings are more or less garbage, with the keepalive time
|
||||
* set to 7200 by default on Linux. Modify settings to make the feature
|
||||
* actually useful. */
|
||||
int idle;
|
||||
int intvl;
|
||||
int cnt;
|
||||
|
||||
/* Send first probe after interval. */
|
||||
val = interval;
|
||||
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &val, sizeof(val)) < 0) {
|
||||
anetSetError(err, "setsockopt TCP_KEEPIDLE: %s\n", strerror(errno));
|
||||
return ANET_ERR;
|
||||
}
|
||||
/* There are platforms that are expected to support the full mechanism of TCP keep-alive,
|
||||
* we want the compiler to emit warnings of unused variables if the preprocessor directives
|
||||
* somehow fail, and other than those platforms, just omit these warnings if they happen.
|
||||
*/
|
||||
#if !(defined(_AIX) || defined(__APPLE__) || defined(__DragonFly__) || \
|
||||
defined(__FreeBSD__) || defined(__illumos__) || defined(__linux__) || \
|
||||
defined(__NetBSD__) || defined(__sun))
|
||||
UNUSED(interval);
|
||||
UNUSED(idle);
|
||||
UNUSED(intvl);
|
||||
UNUSED(cnt);
|
||||
#endif
|
||||
|
||||
/* Send next probes after the specified interval. Note that we set the
|
||||
* delay as interval / 3, as we send three probes before detecting
|
||||
* an error (see the next setsockopt call). */
|
||||
val = interval/3;
|
||||
if (val == 0) val = 1;
|
||||
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &val, sizeof(val)) < 0) {
|
||||
/* The implementation of TCP keep-alive on Solaris/SmartOS is a bit unusual
|
||||
* compared to other Unix-like systems.
|
||||
* Thus, we need to specialize it on Solaris. */
|
||||
#ifdef __sun
|
||||
/* There are two keep-alive mechanisms on Solaris:
|
||||
* - By default, the first keep-alive probe is sent out after a TCP connection is idle for two hours.
|
||||
* If the peer does not respond to the probe within eight minutes, the TCP connection is aborted.
|
||||
* You can alter the interval for sending out the first probe using the socket option TCP_KEEPALIVE_THRESHOLD
|
||||
* in milliseconds or TCP_KEEPIDLE in seconds.
|
||||
* The system default is controlled by the TCP ndd parameter tcp_keepalive_interval. The minimum value is ten seconds.
|
||||
* The maximum is ten days, while the default is two hours. If you receive no response to the probe,
|
||||
* you can use the TCP_KEEPALIVE_ABORT_THRESHOLD socket option to change the time threshold for aborting a TCP connection.
|
||||
* The option value is an unsigned integer in milliseconds. The value zero indicates that TCP should never time out and
|
||||
* abort the connection when probing. The system default is controlled by the TCP ndd parameter tcp_keepalive_abort_interval.
|
||||
* The default is eight minutes.
|
||||
|
||||
* - The second implementation is activated if socket option TCP_KEEPINTVL and/or TCP_KEEPCNT are set.
|
||||
* The time between each consequent probes is set by TCP_KEEPINTVL in seconds.
|
||||
* The minimum value is ten seconds. The maximum is ten days, while the default is two hours.
|
||||
* The TCP connection will be aborted after certain amount of probes, which is set by TCP_KEEPCNT, without receiving response.
|
||||
*/
|
||||
|
||||
idle = interval;
|
||||
if (idle < 10) idle = 10; // kernel expects at least 10 seconds
|
||||
if (idle > 10*24*60*60) idle = 10*24*60*60; // kernel expects at most 10 days
|
||||
|
||||
/* `TCP_KEEPIDLE`, `TCP_KEEPINTVL`, and `TCP_KEEPCNT` were not available on Solaris
|
||||
* until version 11.4, but let's take a chance here. */
|
||||
#if defined(TCP_KEEPIDLE) && defined(TCP_KEEPINTVL) && defined(TCP_KEEPCNT)
|
||||
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &idle, sizeof(idle))) {
|
||||
anetSetError(err, "setsockopt TCP_KEEPIDLE: %s\n", strerror(errno));
|
||||
return ANET_ERR;
|
||||
}
|
||||
intvl = idle/3;
|
||||
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &intvl, sizeof(intvl))) {
|
||||
anetSetError(err, "setsockopt TCP_KEEPINTVL: %s\n", strerror(errno));
|
||||
return ANET_ERR;
|
||||
}
|
||||
cnt = 3;
|
||||
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &cnt, sizeof(cnt))) {
|
||||
anetSetError(err, "setsockopt TCP_KEEPCNT: %s\n", strerror(errno));
|
||||
return ANET_ERR;
|
||||
}
|
||||
return ANET_OK;
|
||||
#endif
|
||||
|
||||
/* Fall back to the first implementation of tcp-alive mechanism for older Solaris,
|
||||
* simulate the tcp-alive mechanism on other platforms via `TCP_KEEPALIVE_THRESHOLD` + `TCP_KEEPALIVE_ABORT_THRESHOLD`.
|
||||
*/
|
||||
idle *= 1000; // kernel expects milliseconds
|
||||
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE_THRESHOLD, &idle, sizeof(idle))) {
|
||||
anetSetError(err, "setsockopt TCP_KEEPINTVL: %s\n", strerror(errno));
|
||||
return ANET_ERR;
|
||||
}
|
||||
|
||||
/* Consider the socket in error state after three we send three ACK
|
||||
* probes without getting a reply. */
|
||||
val = 3;
|
||||
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &val, sizeof(val)) < 0) {
|
||||
/* Note that the consequent probes will not be sent at equal intervals on Solaris,
|
||||
* but will be sent using the exponential backoff algorithm. */
|
||||
intvl = idle/3;
|
||||
cnt = 3;
|
||||
int time_to_abort = intvl * cnt;
|
||||
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE_ABORT_THRESHOLD, &time_to_abort, sizeof(time_to_abort))) {
|
||||
anetSetError(err, "setsockopt TCP_KEEPCNT: %s\n", strerror(errno));
|
||||
return ANET_ERR;
|
||||
}
|
||||
#elif defined(__APPLE__)
|
||||
/* Set idle time with interval */
|
||||
val = interval;
|
||||
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE, &val, sizeof(val)) < 0) {
|
||||
|
||||
return ANET_OK;
|
||||
#endif
|
||||
|
||||
#ifdef TCP_KEEPIDLE
|
||||
/* Default settings are more or less garbage, with the keepalive time
|
||||
* set to 7200 by default on Linux and other Unix-like systems.
|
||||
* Modify settings to make the feature actually useful. */
|
||||
|
||||
/* Send first probe after interval. */
|
||||
idle = interval;
|
||||
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &idle, sizeof(idle))) {
|
||||
anetSetError(err, "setsockopt TCP_KEEPIDLE: %s\n", strerror(errno));
|
||||
return ANET_ERR;
|
||||
}
|
||||
#elif defined(TCP_KEEPALIVE)
|
||||
/* Darwin/macOS uses TCP_KEEPALIVE in place of TCP_KEEPIDLE. */
|
||||
idle = interval;
|
||||
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE, &idle, sizeof(idle))) {
|
||||
anetSetError(err, "setsockopt TCP_KEEPALIVE: %s\n", strerror(errno));
|
||||
return ANET_ERR;
|
||||
}
|
||||
#else
|
||||
((void) interval); /* Avoid unused var warning for non Linux systems. */
|
||||
#endif
|
||||
|
||||
#ifdef TCP_KEEPINTVL
|
||||
/* Send next probes after the specified interval. Note that we set the
|
||||
* delay as interval / 3, as we send three probes before detecting
|
||||
* an error (see the next setsockopt call). */
|
||||
intvl = interval/3;
|
||||
if (intvl == 0) intvl = 1;
|
||||
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &intvl, sizeof(intvl))) {
|
||||
anetSetError(err, "setsockopt TCP_KEEPINTVL: %s\n", strerror(errno));
|
||||
return ANET_ERR;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef TCP_KEEPCNT
|
||||
/* Consider the socket in error state after three we send three ACK
|
||||
* probes without getting a reply. */
|
||||
cnt = 3;
|
||||
if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &cnt, sizeof(cnt))) {
|
||||
anetSetError(err, "setsockopt TCP_KEEPCNT: %s\n", strerror(errno));
|
||||
return ANET_ERR;
|
||||
}
|
||||
#endif
|
||||
|
||||
return ANET_OK;
|
||||
|
Loading…
x
Reference in New Issue
Block a user