Use FD_CLOEXEC in Sentinel, so that FDs don't leak to the scripts it runs (#8242)
Sentinel uses execve to run scripts, so it needs to use FD_CLOEXEC on all file descriptors, so that they're not accessible by the script it runs. This commit includes a change to the sentinel tests, which verifies no FDs are left opened when the script is executed.
This commit is contained in:
parent
aaf71b380e
commit
fb66e2e249
1
src/ae.c
1
src/ae.c
@ -31,6 +31,7 @@
|
||||
*/
|
||||
|
||||
#include "ae.h"
|
||||
#include "anet.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <sys/time.h>
|
||||
|
@ -51,6 +51,7 @@ static int aeApiCreate(aeEventLoop *eventLoop) {
|
||||
zfree(state);
|
||||
return -1;
|
||||
}
|
||||
anetCloexec(state->epfd);
|
||||
eventLoop->apidata = state;
|
||||
return 0;
|
||||
}
|
||||
|
@ -82,6 +82,7 @@ static int aeApiCreate(aeEventLoop *eventLoop) {
|
||||
zfree(state);
|
||||
return -1;
|
||||
}
|
||||
anetCloexec(state->portfd);
|
||||
|
||||
state->npending = 0;
|
||||
|
||||
|
@ -53,6 +53,7 @@ static int aeApiCreate(aeEventLoop *eventLoop) {
|
||||
zfree(state);
|
||||
return -1;
|
||||
}
|
||||
anetCloexec(state->kqfd);
|
||||
eventLoop->apidata = state;
|
||||
return 0;
|
||||
}
|
||||
|
23
src/anet.c
23
src/anet.c
@ -94,6 +94,29 @@ int anetBlock(char *err, int fd) {
|
||||
return anetSetBlock(err,fd,0);
|
||||
}
|
||||
|
||||
/* Enable the FD_CLOEXEC on the given fd to avoid fd leaks.
|
||||
* This function should be invoked for fd's on specific places
|
||||
* where fork + execve system calls are called. */
|
||||
int anetCloexec(int fd) {
|
||||
int r;
|
||||
int flags;
|
||||
|
||||
do {
|
||||
r = fcntl(fd, F_GETFD);
|
||||
} while (r == -1 && errno == EINTR);
|
||||
|
||||
if (r == -1 || (r & FD_CLOEXEC))
|
||||
return r;
|
||||
|
||||
flags = r | FD_CLOEXEC;
|
||||
|
||||
do {
|
||||
r = fcntl(fd, F_SETFD, flags);
|
||||
} while (r == -1 && errno == EINTR);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/* Set TCP keep alive option to detect dead peers. The interval option
|
||||
* is only used for Linux as we are using Linux-specific APIs to set
|
||||
* the probe send time, interval, and count. */
|
||||
|
@ -70,6 +70,7 @@ int anetUnixAccept(char *err, int serversock);
|
||||
int anetWrite(int fd, char *buf, int count);
|
||||
int anetNonBlock(char *err, int fd);
|
||||
int anetBlock(char *err, int fd);
|
||||
int anetCloexec(int fd);
|
||||
int anetEnableTcpNoDelay(char *err, int fd);
|
||||
int anetDisableTcpNoDelay(char *err, int fd);
|
||||
int anetTcpKeepAlive(char *err, int fd);
|
||||
|
@ -398,7 +398,7 @@ int clusterLockConfig(char *filename) {
|
||||
/* To lock it, we need to open the file in a way it is created if
|
||||
* it does not exist, otherwise there is a race condition with other
|
||||
* processes. */
|
||||
int fd = open(filename,O_WRONLY|O_CREAT,0644);
|
||||
int fd = open(filename,O_WRONLY|O_CREAT|O_CLOEXEC,0644);
|
||||
if (fd == -1) {
|
||||
serverLog(LL_WARNING,
|
||||
"Can't open %s in order to acquire a lock: %s",
|
||||
|
@ -7670,6 +7670,11 @@ void moduleInitModulesSystem(void) {
|
||||
anetNonBlock(NULL,server.module_blocked_pipe[0]);
|
||||
anetNonBlock(NULL,server.module_blocked_pipe[1]);
|
||||
|
||||
/* Enable close-on-exec flag on pipes in case of the fork-exec system calls in
|
||||
* sentinels or redis servers. */
|
||||
anetCloexec(server.module_blocked_pipe[0]);
|
||||
anetCloexec(server.module_blocked_pipe[1]);
|
||||
|
||||
/* Create the timers radix tree. */
|
||||
Timers = raxNew();
|
||||
|
||||
|
@ -1104,6 +1104,7 @@ void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
|
||||
"Accepting client connection: %s", server.neterr);
|
||||
return;
|
||||
}
|
||||
anetCloexec(cfd);
|
||||
serverLog(LL_VERBOSE,"Accepted %s:%d", cip, cport);
|
||||
acceptCommonHandler(connCreateAcceptedSocket(cfd),0,cip);
|
||||
}
|
||||
@ -1124,6 +1125,7 @@ void acceptTLSHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
|
||||
"Accepting client connection: %s", server.neterr);
|
||||
return;
|
||||
}
|
||||
anetCloexec(cfd);
|
||||
serverLog(LL_VERBOSE,"Accepted %s:%d", cip, cport);
|
||||
acceptCommonHandler(connCreateAcceptedTLS(cfd, server.tls_auth_clients),0,cip);
|
||||
}
|
||||
@ -1143,6 +1145,7 @@ void acceptUnixHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
|
||||
"Accepting client connection: %s", server.neterr);
|
||||
return;
|
||||
}
|
||||
anetCloexec(cfd);
|
||||
serverLog(LL_VERBOSE,"Accepted connection to %s", server.unixsocket);
|
||||
acceptCommonHandler(connCreateAcceptedSocket(cfd),CLIENT_UNIX_SOCKET,NULL);
|
||||
}
|
||||
|
@ -2135,6 +2135,7 @@ void sentinelReconnectInstance(sentinelRedisInstance *ri) {
|
||||
link->cc->errstr);
|
||||
instanceLinkCloseConnection(link,link->cc);
|
||||
} else {
|
||||
anetCloexec(link->cc->c.fd);
|
||||
link->pending_commands = 0;
|
||||
link->cc_conn_time = mstime();
|
||||
link->cc->data = link;
|
||||
@ -2162,7 +2163,7 @@ void sentinelReconnectInstance(sentinelRedisInstance *ri) {
|
||||
instanceLinkCloseConnection(link,link->pc);
|
||||
} else {
|
||||
int retval;
|
||||
|
||||
anetCloexec(link->pc->c.fd);
|
||||
link->pc_conn_time = mstime();
|
||||
link->pc->data = link;
|
||||
redisAeAttach(server.el,link->pc);
|
||||
|
@ -2957,6 +2957,7 @@ int listenToPort(int port, int *fds, int *count) {
|
||||
return C_ERR;
|
||||
}
|
||||
anetNonBlock(NULL,fds[*count]);
|
||||
anetCloexec(fds[*count]);
|
||||
(*count)++;
|
||||
}
|
||||
return C_OK;
|
||||
@ -3095,6 +3096,7 @@ void initServer(void) {
|
||||
exit(1);
|
||||
}
|
||||
anetNonBlock(NULL,server.sofd);
|
||||
anetCloexec(server.sofd);
|
||||
}
|
||||
|
||||
/* Abort if there are no listening sockets at all. */
|
||||
@ -5470,7 +5472,7 @@ void setupChildSignalHandlers(void) {
|
||||
* of the parent process, e.g. fd(socket or flock) etc.
|
||||
* should close the resources not used by the child process, so that if the
|
||||
* parent restarts it can bind/lock despite the child possibly still running. */
|
||||
void closeClildUnusedResourceAfterFork() {
|
||||
void closeChildUnusedResourceAfterFork() {
|
||||
closeListeningSockets(0);
|
||||
if (server.cluster_enabled && server.cluster_config_file_lock_fd != -1)
|
||||
close(server.cluster_config_file_lock_fd); /* don't care if this fails */
|
||||
@ -5497,7 +5499,7 @@ int redisFork(int purpose) {
|
||||
server.in_fork_child = purpose;
|
||||
setOOMScoreAdj(CONFIG_OOM_BGCHILD);
|
||||
setupChildSignalHandlers();
|
||||
closeClildUnusedResourceAfterFork();
|
||||
closeChildUnusedResourceAfterFork();
|
||||
} else {
|
||||
/* Parent */
|
||||
server.stat_total_forks++;
|
||||
|
@ -400,6 +400,11 @@ proc check_leaks instance_types {
|
||||
|
||||
# Execute all the units inside the 'tests' directory.
|
||||
proc run_tests {} {
|
||||
set sentinel_fd_leaks_file "sentinel_fd_leaks"
|
||||
if { [file exists $sentinel_fd_leaks_file] } {
|
||||
file delete $sentinel_fd_leaks_file
|
||||
}
|
||||
|
||||
set tests [lsort [glob ../tests/*]]
|
||||
foreach test $tests {
|
||||
if {$::run_matching ne {} && [string match $::run_matching $test] == 0} {
|
||||
@ -414,7 +419,14 @@ proc run_tests {} {
|
||||
|
||||
# Print a message and exists with 0 / 1 according to zero or more failures.
|
||||
proc end_tests {} {
|
||||
if {$::failed == 0} {
|
||||
set sentinel_fd_leaks_file "sentinel_fd_leaks"
|
||||
if { [file exists $sentinel_fd_leaks_file] } {
|
||||
puts [colorstr red "WARNING: sentinel test(s) failed, there are leaked fds in sentinel:"]
|
||||
exec cat $sentinel_fd_leaks_file
|
||||
exit 1
|
||||
}
|
||||
|
||||
if {$::failed == 0 } {
|
||||
puts "GOOD! No errors."
|
||||
exit 0
|
||||
} else {
|
||||
|
@ -10,7 +10,7 @@ set ::tlsdir "../../tls"
|
||||
|
||||
proc main {} {
|
||||
parse_options
|
||||
spawn_instance sentinel $::sentinel_base_port $::instances_count
|
||||
spawn_instance sentinel $::sentinel_base_port $::instances_count [list "sentinel deny-scripts-reconfig no"]
|
||||
spawn_instance redis $::redis_base_port $::instances_count
|
||||
run_tests
|
||||
cleanup
|
||||
|
@ -37,6 +37,8 @@ test "(init) Sentinels can start monitoring a master" {
|
||||
S $id SENTINEL SET mymaster down-after-milliseconds 2000
|
||||
S $id SENTINEL SET mymaster failover-timeout 20000
|
||||
S $id SENTINEL SET mymaster parallel-syncs 10
|
||||
S $id SENTINEL SET mymaster notification-script ../../tests/includes/notify.sh
|
||||
S $id SENTINEL SET mymaster client-reconfig-script ../../tests/includes/notify.sh
|
||||
}
|
||||
}
|
||||
|
||||
|
20
tests/sentinel/tests/includes/notify.sh
Executable file
20
tests/sentinel/tests/includes/notify.sh
Executable file
@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
OS=`uname -s`
|
||||
if [ ${OS} == "Linux" ]
|
||||
then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# fd 3 is meant to catch the actual access to /proc/pid/fd,
|
||||
# in case there's an fd leak by the sentinel,
|
||||
# it can take 3, but then the access to /proc will take another fd, and we'll catch that.
|
||||
leaked_fd_count=`ls /proc/self/fd | grep -vE '^[0|1|2|3]$' | wc -l`
|
||||
if [ $leaked_fd_count -gt 0 ]
|
||||
then
|
||||
sentinel_fd_leaks_file="../sentinel_fd_leaks"
|
||||
if [ ! -f $sentinel_fd_leaks_file ]
|
||||
then
|
||||
ls -l /proc/self/fd | cat >> $sentinel_fd_leaks_file
|
||||
fi
|
||||
fi
|
Loading…
x
Reference in New Issue
Block a user