mirror of
https://github.com/systemd/systemd-stable.git
synced 2025-01-03 01:17:45 +03:00
main: bump fs.nr_open + fs.max-file to their largest possible values
After discussions with kernel folks, a system with memcg really shouldn't need extra hard limits on file descriptors anymore, as they are properly accounted for by memcg anyway. Hence, let's bump these values to their maximums. This also adds a build time option to turn thiss off, to cover those users who do not want to use memcg.
This commit is contained in:
parent
52d363e32e
commit
a8b627aaed
11
NEWS
11
NEWS
@ -52,6 +52,17 @@ CHANGES WITH 240 in spe:
|
||||
anymore (and neither can any shared library they use — or any shared
|
||||
library used by any shared library they use and so on).
|
||||
|
||||
* The fs.nr_open and fs.file-max sysctls are now automatically bumped
|
||||
to the highest possible values, as separate accounting of file
|
||||
descriptors is no longer necessary, as memcg tracks them correctly as
|
||||
part of the memory accounting anyway. Thus, from the four limits on
|
||||
file descriptors currently enforced (fs.file-max, fs.nr_open,
|
||||
RLIMIT_NOFILE hard, RLIMIT_NOFILE soft) we turn off the first two,
|
||||
and keep only the latter two. A set of build-time options
|
||||
(-Dbump-proc-sys-fs-file-max=no and -Dbump-proc-sys-fs-nr-open=no)
|
||||
has been added to revert this change in behaviour, which might be
|
||||
an option for systems that turn off memcg in the kernel.
|
||||
|
||||
CHANGES WITH 239:
|
||||
|
||||
* NETWORK INTERFACE DEVICE NAMING CHANGES: systemd-udevd's "net_id"
|
||||
|
@ -73,6 +73,9 @@ sysvrcnd_path = get_option('sysvrcnd-path')
|
||||
conf.set10('HAVE_SYSV_COMPAT', sysvinit_path != '' and sysvrcnd_path != '',
|
||||
description : 'SysV init scripts and rcN.d links are supported')
|
||||
|
||||
conf.set10('BUMP_PROC_SYS_FS_FILE_MAX', get_option('bump-proc-sys-fs-file-max'))
|
||||
conf.set10('BUMP_PROC_SYS_FS_NR_OPEN', get_option('bump-proc-sys-fs-nr-open'))
|
||||
|
||||
# join_paths ignore the preceding arguments if an absolute component is
|
||||
# encountered, so this should canonicalize various paths when they are
|
||||
# absolute or relative.
|
||||
|
@ -49,6 +49,10 @@ option('debug-extra', type : 'array', choices : ['hashmap', 'mmap-cache'], value
|
||||
description : 'enable extra debugging')
|
||||
option('memory-accounting-default', type : 'boolean',
|
||||
description : 'enable MemoryAccounting= by default')
|
||||
option('bump-proc-sys-fs-file-max', type : 'boolean',
|
||||
description : 'bump /proc/sys/fs/file-max to ULONG_MAX')
|
||||
option('bump-proc-sys-fs-nr-open', type : 'boolean',
|
||||
description : 'bump /proc/sys/fs/nr_open to INT_MAX')
|
||||
option('valgrind', type : 'boolean', value : false,
|
||||
description : 'do extra operations to avoid valgrind warnings')
|
||||
option('log-trace', type : 'boolean', value : false,
|
||||
|
@ -73,6 +73,7 @@
|
||||
#include "stdio-util.h"
|
||||
#include "strv.h"
|
||||
#include "switch-root.h"
|
||||
#include "sysctl-util.h"
|
||||
#include "terminal-util.h"
|
||||
#include "umask-util.h"
|
||||
#include "user-util.h"
|
||||
@ -1162,6 +1163,88 @@ static int prepare_reexecute(
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void bump_file_max_and_nr_open(void) {
|
||||
|
||||
/* Let's bump fs.file-max and fs.nr_open to their respective maximums. On current kernels large numbers of file
|
||||
* descriptors are no longer a performance problem and their memory is properly tracked by memcg, thus counting
|
||||
* them and limiting them in another two layers of limits is unnecessary and just complicates things. This
|
||||
* function hence turns off 2 of the 4 levels of limits on file descriptors, and makes RLIMIT_NOLIMIT (soft +
|
||||
* hard) the only ones that really matter. */
|
||||
|
||||
#if BUMP_PROC_SYS_FS_FILE_MAX || BUMP_PROC_SYS_FS_NR_OPEN
|
||||
_cleanup_free_ char *t = NULL;
|
||||
int r;
|
||||
#endif
|
||||
|
||||
#if BUMP_PROC_SYS_FS_FILE_MAX
|
||||
/* I so wanted to use STRINGIFY(ULONG_MAX) here, but alas we can't as glibc/gcc define that as
|
||||
* "(0x7fffffffffffffffL * 2UL + 1UL)". Seriously. 😢 */
|
||||
if (asprintf(&t, "%lu\n", ULONG_MAX) < 0) {
|
||||
log_oom();
|
||||
return;
|
||||
}
|
||||
|
||||
r = sysctl_write("fs/file-max", t);
|
||||
if (r < 0)
|
||||
log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.file-max, ignoring: %m");
|
||||
#endif
|
||||
|
||||
#if BUMP_PROC_SYS_FS_FILE_MAX && BUMP_PROC_SYS_FS_NR_OPEN
|
||||
t = mfree(t);
|
||||
#endif
|
||||
|
||||
#if BUMP_PROC_SYS_FS_NR_OPEN
|
||||
int v = INT_MAX;
|
||||
|
||||
/* Arg! The kernel enforces maximum and minimum values on the fs.nr_open, but we don't really know what they
|
||||
* are. The expression by which the maximum is determined is dependent on the architecture, and is something we
|
||||
* don't really want to copy to userspace, as it is dependent on implementation details of the kernel. Since
|
||||
* the kernel doesn't expose the maximum value to us, we can only try and hope. Hence, let's start with
|
||||
* INT_MAX, and then keep halving the value until we find one that works. Ugly? Yes, absolutely, but kernel
|
||||
* APIs are kernel APIs, so what do can we do... 🤯 */
|
||||
|
||||
for (;;) {
|
||||
int k;
|
||||
|
||||
v &= ~(__SIZEOF_POINTER__ - 1); /* Round down to next multiple of the pointer size */
|
||||
if (v < 1024) {
|
||||
log_warning("Can't bump fs.nr_open, value too small.");
|
||||
break;
|
||||
}
|
||||
|
||||
k = read_nr_open();
|
||||
if (k < 0) {
|
||||
log_error_errno(k, "Failed to read fs.nr_open: %m");
|
||||
break;
|
||||
}
|
||||
if (k >= v) { /* Already larger */
|
||||
log_debug("Skipping bump, value is already larger.");
|
||||
break;
|
||||
}
|
||||
|
||||
if (asprintf(&t, "%i\n", v) < 0) {
|
||||
log_oom();
|
||||
return;
|
||||
}
|
||||
|
||||
r = sysctl_write("fs/nr_open", t);
|
||||
t = mfree(t);
|
||||
if (r == -EINVAL) {
|
||||
log_debug("Couldn't write fs.nr_open as %i, halving it.", v);
|
||||
v /= 2;
|
||||
continue;
|
||||
}
|
||||
if (r < 0) {
|
||||
log_full_errno(IN_SET(r, -EROFS, -EPERM, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, "Failed to bump fs.nr_open, ignoring: %m");
|
||||
break;
|
||||
}
|
||||
|
||||
log_debug("Successfully bumped fs.nr_open to %i", v);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
|
||||
int r, nr;
|
||||
|
||||
@ -1883,6 +1966,7 @@ static int initialize_runtime(
|
||||
machine_id_setup(NULL, arg_machine_id, NULL);
|
||||
loopback_setup();
|
||||
bump_unix_max_dgram_qlen();
|
||||
bump_file_max_and_nr_open();
|
||||
test_usr();
|
||||
write_container_id();
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user