diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml index dd6f17519d7..bede2226704 100644 --- a/man/org.freedesktop.systemd1.xml +++ b/man/org.freedesktop.systemd1.xml @@ -493,6 +493,8 @@ node /org/freedesktop/systemd1 { @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s DefaultOOMPolicy = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly i DefaultOOMScoreAdjust = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s CtrlAltDelBurstAction = '...'; }; interface org.freedesktop.DBus.Peer { ... }; @@ -735,6 +737,8 @@ node /org/freedesktop/systemd1 { + + @@ -1131,6 +1135,8 @@ node /org/freedesktop/systemd1 { + + diff --git a/man/systemd-system.conf.xml b/man/systemd-system.conf.xml index 5824e01e0c8..ca36a64debd 100644 --- a/man/systemd-system.conf.xml +++ b/man/systemd-system.conf.xml @@ -450,6 +450,20 @@ for details. Note that this default is not used for services that have Delegate= turned on. + + + DefaultOOMScoreAdjust= + + Configures the default OOM score adjustments of processes run by the service + manager. This defaults to unset (meaning the forked off processes inherit the service manager's OOM + score adjustment value), except if the service manager is run for an unprivileged user, in which case + this defaults to the service manager's OOM adjustment value plus 100 (this makes service processes + slightly more likely to be killed under memory pressure than the manager itself). This may be used to + pick a global default for the per-unit OOMScoreAdjust= setting. See + systemd.exec5 for + details. Note that this setting has no effect on the OOM score adjustment value of the service + manager process itself, it retains the original value set during its invocation. + diff --git a/src/basic/process-util.c b/src/basic/process-util.c index c424f62ef38..5e7ed06ea55 100644 --- a/src/basic/process-util.c +++ b/src/basic/process-util.c @@ -1502,6 +1502,24 @@ int set_oom_score_adjust(int value) { WRITE_STRING_FILE_VERIFY_ON_FAILURE|WRITE_STRING_FILE_DISABLE_BUFFER); } +int get_oom_score_adjust(int *ret) { + _cleanup_free_ char *t; + int r, a; + + r = read_virtual_file("/proc/self/oom_score_adj", SIZE_MAX, &t, NULL); + if (r < 0) + return r; + + delete_trailing_chars(t, WHITESPACE); + + assert_se(safe_atoi(t, &a) >= 0); + assert_se(oom_score_adjust_is_valid(a)); + + if (ret) + *ret = a; + return 0; +} + int pidfd_get_pid(int fd, pid_t *ret) { char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)]; _cleanup_free_ char *fdinfo = NULL; diff --git a/src/basic/process-util.h b/src/basic/process-util.h index 551b236c011..7e87f5a17c7 100644 --- a/src/basic/process-util.h +++ b/src/basic/process-util.h @@ -175,6 +175,7 @@ static inline int safe_fork(const char *name, ForkFlags flags, pid_t *ret_pid) { int namespace_fork(const char *outer_name, const char *inner_name, int except_fds[], size_t n_except_fds, ForkFlags flags, int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd, pid_t *ret_pid); int set_oom_score_adjust(int value); +int get_oom_score_adjust(int *ret); /* The highest possibly (theoretic) pid_t value on this architecture. */ #define PID_T_MAX ((pid_t) INT32_MAX) diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 8f6042708cd..488de1242a4 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -104,8 +104,7 @@ static int property_get_oom_score_adjust( sd_bus_error *error) { ExecContext *c = userdata; - int32_t n; - int r; + int r, n; assert(bus); assert(reply); @@ -114,17 +113,10 @@ static int property_get_oom_score_adjust( if (c->oom_score_adjust_set) n = c->oom_score_adjust; else { - _cleanup_free_ char *t = NULL; - n = 0; - r = read_one_line_file("/proc/self/oom_score_adj", &t); + r = get_oom_score_adjust(&n); if (r < 0) log_debug_errno(r, "Failed to read /proc/self/oom_score_adj, ignoring: %m"); - else { - r = safe_atoi32(t, &n); - if (r < 0) - log_debug_errno(r, "Failed to parse \"%s\" from /proc/self/oom_score_adj, ignoring: %m", t); - } } return sd_bus_message_append(reply, "i", n); diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c index 1f2ac8152c2..99b75598b5a 100644 --- a/src/core/dbus-manager.c +++ b/src/core/dbus-manager.c @@ -31,6 +31,7 @@ #include "os-util.h" #include "parse-util.h" #include "path-util.h" +#include "process-util.h" #include "selinux-access.h" #include "stat-util.h" #include "string-util.h" @@ -358,6 +359,34 @@ static int property_set_kexec_watchdog( return property_set_watchdog(userdata, WATCHDOG_KEXEC, value); } +static int property_get_oom_score_adjust( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + Manager *m = userdata; + int r, n; + + assert(m); + assert(bus); + assert(reply); + + if (m->default_oom_score_adjust_set) + n = m->default_oom_score_adjust; + else { + n = 0; + r = get_oom_score_adjust(&n); + if (r < 0) + log_debug_errno(r, "Failed to read current OOM score adjustment value, ignoring: %m"); + } + + return sd_bus_message_append(reply, "i", n); +} + static int bus_get_unit_by_name(Manager *m, sd_bus_message *message, const char *name, Unit **ret_unit, sd_bus_error *error) { Unit *u; int r; @@ -2722,6 +2751,7 @@ const sd_bus_vtable bus_manager_vtable[] = { SD_BUS_PROPERTY("DefaultTasksMax", "t", bus_property_get_tasks_max, offsetof(Manager, default_tasks_max), 0), SD_BUS_PROPERTY("TimerSlackNSec", "t", property_get_timer_slack_nsec, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("DefaultOOMPolicy", "s", bus_property_get_oom_policy, offsetof(Manager, default_oom_policy), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("DefaultOOMScoreAdjust", "i", property_get_oom_score_adjust, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("CtrlAltDelBurstAction", "s", bus_property_get_emergency_action, offsetof(Manager, cad_burst_action), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_METHOD_WITH_NAMES("GetUnit", diff --git a/src/core/main.c b/src/core/main.c index ff7f189370d..059ba6dd493 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -160,6 +161,8 @@ static NUMAPolicy arg_numa_policy; static usec_t arg_clock_usec; static void *arg_random_seed; static size_t arg_random_seed_size; +static int arg_default_oom_score_adjust; +static bool arg_default_oom_score_adjust_set; /* A copy of the original environment block */ static char **saved_env = NULL; @@ -633,6 +636,37 @@ static int config_parse_default_timeout_abort( return 0; } +static int config_parse_oom_score_adjust( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + int oa, r; + + if (isempty(rvalue)) { + arg_default_oom_score_adjust_set = false; + return 0; + } + + r = parse_oom_score_adjust(rvalue, &oa); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to parse the OOM score adjust value '%s', ignoring: %m", rvalue); + return 0; + } + + arg_default_oom_score_adjust = oa; + arg_default_oom_score_adjust_set = true; + + return 0; +} + static int parse_config_file(void) { const ConfigTableItem items[] = { { "Manager", "LogLevel", config_parse_level2, 0, NULL }, @@ -667,7 +701,7 @@ static int parse_config_file(void) { { "Manager", "DefaultStandardError", config_parse_output_restricted, 0, &arg_default_std_error }, { "Manager", "DefaultTimeoutStartSec", config_parse_sec, 0, &arg_default_timeout_start_usec }, { "Manager", "DefaultTimeoutStopSec", config_parse_sec, 0, &arg_default_timeout_stop_usec }, - { "Manager", "DefaultTimeoutAbortSec", config_parse_default_timeout_abort, 0, NULL }, + { "Manager", "DefaultTimeoutAbortSec", config_parse_default_timeout_abort, 0, NULL }, { "Manager", "DefaultRestartSec", config_parse_sec, 0, &arg_default_restart_usec }, { "Manager", "DefaultStartLimitInterval", config_parse_sec, 0, &arg_default_start_limit_interval }, /* obsolete alias */ { "Manager", "DefaultStartLimitIntervalSec", config_parse_sec, 0, &arg_default_start_limit_interval }, @@ -699,6 +733,7 @@ static int parse_config_file(void) { { "Manager", "DefaultTasksMax", config_parse_tasks_max, 0, &arg_default_tasks_max }, { "Manager", "CtrlAltDelBurstAction", config_parse_emergency_action, 0, &arg_cad_burst_action }, { "Manager", "DefaultOOMPolicy", config_parse_oom_policy, 0, &arg_default_oom_policy }, + { "Manager", "DefaultOOMScoreAdjust", config_parse_oom_score_adjust, 0, NULL }, {} }; @@ -769,6 +804,8 @@ static void set_manager_defaults(Manager *m) { m->default_tasks_accounting = arg_default_tasks_accounting; m->default_tasks_max = arg_default_tasks_max; m->default_oom_policy = arg_default_oom_policy; + m->default_oom_score_adjust_set = arg_default_oom_score_adjust_set; + m->default_oom_score_adjust = arg_default_oom_score_adjust; (void) manager_set_default_rlimits(m, arg_default_rlimit); @@ -2426,6 +2463,35 @@ static void reset_arguments(void) { arg_random_seed = mfree(arg_random_seed); arg_random_seed_size = 0; arg_clock_usec = 0; + + arg_default_oom_score_adjust_set = false; +} + +static void determine_default_oom_score_adjust(void) { + int r, a, b; + + /* Run our services at slightly higher OOM score than ourselves. But let's be conservative here, and + * do this only if we don't run as root (i.e. only if we are run in user mode, for an unprivileged + * user). */ + + if (arg_default_oom_score_adjust_set) + return; + + if (getuid() == 0) + return; + + r = get_oom_score_adjust(&a); + if (r < 0) + return (void) log_warning_errno(r, "Failed to determine current OOM score adjustment value, ignoring: %m"); + + assert_cc(100 <= OOM_SCORE_ADJ_MAX); + b = a >= OOM_SCORE_ADJ_MAX - 100 ? OOM_SCORE_ADJ_MAX : a + 100; + + if (a == b) + return; + + arg_default_oom_score_adjust = b; + arg_default_oom_score_adjust_set = true; } static int parse_configuration(const struct rlimit *saved_rlimit_nofile, @@ -2459,6 +2525,9 @@ static int parse_configuration(const struct rlimit *saved_rlimit_nofile, if (arg_show_status == _SHOW_STATUS_INVALID) arg_show_status = SHOW_STATUS_YES; + /* Slightly raise the OOM score for our services if we are running for unprivileged users. */ + determine_default_oom_score_adjust(); + /* Push variables into the manager environment block */ setenv_manager_environment(); diff --git a/src/core/manager.h b/src/core/manager.h index 67957fdfbee..97c12ce48f1 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -370,6 +370,8 @@ struct Manager { usec_t default_timer_accuracy_usec; OOMPolicy default_oom_policy; + int default_oom_score_adjust; + bool default_oom_score_adjust_set; int original_log_level; LogTarget original_log_target; diff --git a/src/core/unit.c b/src/core/unit.c index 4fd499a4f1d..304f67dbf11 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -187,6 +187,11 @@ static void unit_init(Unit *u) { if (ec) { exec_context_init(ec); + if (u->manager->default_oom_score_adjust_set) { + ec->oom_score_adjust = u->manager->default_oom_score_adjust; + ec->oom_score_adjust_set = true; + } + if (MANAGER_IS_SYSTEM(u->manager)) ec->keyring_mode = EXEC_KEYRING_SHARED; else { diff --git a/src/test/test-process-util.c b/src/test/test-process-util.c index 8c76392ae96..bee39d567b3 100644 --- a/src/test/test-process-util.c +++ b/src/test/test-process-util.c @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: LGPL-2.1-or-later */ #include +#include #include #include #include @@ -874,6 +875,24 @@ static void test_get_process_ppid(void) { } } +static void test_set_oom_score_adjust(void) { + int a, b, r; + + assert_se(get_oom_score_adjust(&a) >= 0); + + r = set_oom_score_adjust(OOM_SCORE_ADJ_MIN); + assert_se(r >= 0 || ERRNO_IS_PRIVILEGE(r)); + + if (r >= 0) { + assert_se(get_oom_score_adjust(&b) >= 0); + assert_se(b == OOM_SCORE_ADJ_MIN); + } + + assert_se(set_oom_score_adjust(a) >= 0); + assert_se(get_oom_score_adjust(&b) >= 0); + assert_se(b == a); +} + int main(int argc, char *argv[]) { log_show_color(true); test_setup_logging(LOG_INFO); @@ -904,6 +923,7 @@ int main(int argc, char *argv[]) { test_ioprio_class_from_to_string(); test_setpriority_closest(); test_get_process_ppid(); + test_set_oom_score_adjust(); return 0; } diff --git a/units/user@.service.in b/units/user@.service.in index fec9afe1305..85fc3c907e2 100644 --- a/units/user@.service.in +++ b/units/user@.service.in @@ -25,3 +25,4 @@ Delegate=pids memory TasksMax=infinity TimeoutStopSec=120s KeyringMode=inherit +OOMScoreAdjust=100