From 78f93209fc7f61f15b12d7a5f74d712bd020b249 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 16 Nov 2018 20:19:07 +0100 Subject: [PATCH] core: when Delegate=yes is set for a unit, run ExecStartPre= and friends in a subcgroup of the unit Otherwise we might conflict with the "no-processes-in-inner-cgroup" rule of cgroupsv2. Consider nspawn starting up and initializing its cgroup hierarchy with "supervisor/" and "payload/" as subcgroup, with itself moved into the former and the payload into the latter. Now, if an ExecStartPre= is run right after it cannot be placed in the main cgroup, because that is now in inner cgroup with populated children. Hence, let's run these helpers in another sub-cgroup .control/ below it. This is somewhat ugly since it weakens the clear separation of ownership, but given that this is an explicit contract, and double opt-in should be acceptable. Fixes: #10482 --- src/core/execute.c | 67 +++++++++++++++++++++++++++++++++++++++------- src/core/execute.h | 9 ++++--- src/core/service.c | 13 ++++----- 3 files changed, 70 insertions(+), 19 deletions(-) diff --git a/src/core/execute.c b/src/core/execute.c index cf3c0551674..4f0ffb835ae 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -2762,6 +2762,37 @@ static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p static char *exec_command_line(char **argv); +static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **ret) { + bool using_subcgroup; + char *p; + + assert(params); + assert(ret); + + if (!params->cgroup_path) + return -EINVAL; + + /* If we are called for a unit where cgroup delegation is on, and the payload created its own populated + * subcgroup (which we expect it to do, after all it asked for delegation), then we cannot place the control + * processes started after the main unit's process in the unit's main cgroup because it is now an inner one, + * and inner cgroups may not contain processes. Hence, if delegation is on, and this is a control process, + * let's use ".control" as subcgroup instead. Note that we do so only for ExecStartPost=, ExecReload=, + * ExecStop=, ExecStopPost=, i.e. for the commands where the main process is already forked. For ExecStartPre= + * this is not necessary, the cgroup is still empty. We distinguish these cases with the EXEC_CONTROL_CGROUP + * flag, which is only passed for the former statements, not for the latter. */ + + using_subcgroup = FLAGS_SET(params->flags, EXEC_CONTROL_CGROUP|EXEC_CGROUP_DELEGATE|EXEC_IS_CONTROL); + if (using_subcgroup) + p = strjoin(params->cgroup_path, "/.control"); + else + p = strdup(params->cgroup_path); + if (!p) + return -ENOMEM; + + *ret = p; + return using_subcgroup; +} + static int exec_child( Unit *unit, const ExecCommand *command, @@ -2994,10 +3025,18 @@ static int exec_child( } if (params->cgroup_path) { - r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL); + _cleanup_free_ char *p = NULL; + + r = exec_parameters_get_cgroup_path(params, &p); if (r < 0) { *exit_status = EXIT_CGROUP; - return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", params->cgroup_path); + return log_unit_error_errno(unit, r, "Failed to acquire cgroup path: %m"); + } + + r = cg_attach_everywhere(params->cgroup_supported, p, 0, NULL, NULL); + if (r < 0) { + *exit_status = EXIT_CGROUP; + return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", p); } } @@ -3569,6 +3608,7 @@ int exec_spawn(Unit *unit, pid_t *ret) { int socket_fd, r, named_iofds[3] = { -1, -1, -1 }, *fds = NULL; + _cleanup_free_ char *subcgroup_path = NULL; _cleanup_strv_free_ char **files_env = NULL; size_t n_storage_fds = 0, n_socket_fds = 0; _cleanup_free_ char *line = NULL; @@ -3621,6 +3661,17 @@ int exec_spawn(Unit *unit, LOG_UNIT_ID(unit), LOG_UNIT_INVOCATION_ID(unit)); + if (params->cgroup_path) { + r = exec_parameters_get_cgroup_path(params, &subcgroup_path); + if (r < 0) + return log_unit_error_errno(unit, r, "Failed to acquire subcgroup path: %m"); + if (r > 0) { /* We are using a child cgroup */ + r = cg_create(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path); + if (r < 0) + return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path); + } + } + pid = fork(); if (pid < 0) return log_unit_error_errno(unit, errno, "Failed to fork: %m"); @@ -3658,13 +3709,11 @@ int exec_spawn(Unit *unit, log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid); - /* We add the new process to the cgroup both in the child (so - * that we can be sure that no user code is ever executed - * outside of the cgroup) and in the parent (so that we can be - * sure that when we kill the cgroup the process will be - * killed too). */ - if (params->cgroup_path) - (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid); + /* We add the new process to the cgroup both in the child (so that we can be sure that no user code is ever + * executed outside of the cgroup) and in the parent (so that we can be sure that when we kill the cgroup the + * process will be killed too). */ + if (subcgroup_path) + (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path, pid); exec_status_start(&command->exec_status, pid); diff --git a/src/core/execute.h b/src/core/execute.h index 11a9b45dccf..16124cf28c4 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -294,12 +294,13 @@ typedef enum ExecFlags { EXEC_CHOWN_DIRECTORIES = 1 << 5, /* chown() the runtime/state/cache/log directories to the user we run as, under all conditions */ EXEC_NSS_BYPASS_BUS = 1 << 6, /* Set the SYSTEMD_NSS_BYPASS_BUS environment variable, to disable nss-systemd for dbus */ EXEC_CGROUP_DELEGATE = 1 << 7, + EXEC_IS_CONTROL = 1 << 8, + EXEC_CONTROL_CGROUP = 1 << 9, /* Place the process not in the indicated cgroup but in a subcgroup '/.control', but only EXEC_CGROUP_DELEGATE and EXEC_IS_CONTROL is set, too */ /* The following are not used by execute.c, but by consumers internally */ - EXEC_PASS_FDS = 1 << 8, - EXEC_IS_CONTROL = 1 << 9, - EXEC_SETENV_RESULT = 1 << 10, - EXEC_SET_WATCHDOG = 1 << 11, + EXEC_PASS_FDS = 1 << 10, + EXEC_SETENV_RESULT = 1 << 11, + EXEC_SET_WATCHDOG = 1 << 12, } ExecFlags; /* Parameters for a specific invocation of a command. This structure is put together right before a command is diff --git a/src/core/service.c b/src/core/service.c index 7675663f8b6..79347269ec5 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -1418,7 +1418,7 @@ static int service_spawn( assert(c); assert(_pid); - r = unit_prepare_exec(UNIT(s)); + r = unit_prepare_exec(UNIT(s)); /* This realizes the cgroup, among other things */ if (r < 0) return r; @@ -1777,7 +1777,7 @@ static void service_enter_stop_post(Service *s, ServiceResult f) { r = service_spawn(s, s->control_command, s->timeout_stop_usec, - EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_IS_CONTROL|EXEC_SETENV_RESULT, + EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_APPLY_TTY_STDIN|EXEC_IS_CONTROL|EXEC_SETENV_RESULT|EXEC_CONTROL_CGROUP, &s->control_pid); if (r < 0) goto fail; @@ -1892,7 +1892,7 @@ static void service_enter_stop(Service *s, ServiceResult f) { r = service_spawn(s, s->control_command, s->timeout_stop_usec, - EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_SETENV_RESULT, + EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_SETENV_RESULT|EXEC_CONTROL_CGROUP, &s->control_pid); if (r < 0) goto fail; @@ -1970,7 +1970,7 @@ static void service_enter_start_post(Service *s) { r = service_spawn(s, s->control_command, s->timeout_start_usec, - EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL, + EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_CONTROL_CGROUP, &s->control_pid); if (r < 0) goto fail; @@ -2214,7 +2214,7 @@ static void service_enter_reload(Service *s) { r = service_spawn(s, s->control_command, s->timeout_start_usec, - EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL, + EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL|EXEC_CONTROL_CGROUP, &s->control_pid); if (r < 0) goto fail; @@ -2254,7 +2254,8 @@ static void service_run_next_control(Service *s) { timeout, EXEC_APPLY_SANDBOXING|EXEC_APPLY_CHROOT|EXEC_IS_CONTROL| (IN_SET(s->control_command_id, SERVICE_EXEC_START_PRE, SERVICE_EXEC_STOP_POST) ? EXEC_APPLY_TTY_STDIN : 0)| - (IN_SET(s->control_command_id, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST) ? EXEC_SETENV_RESULT : 0), + (IN_SET(s->control_command_id, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST) ? EXEC_SETENV_RESULT : 0)| + (IN_SET(s->control_command_id, SERVICE_EXEC_START_POST, SERVICE_EXEC_RELOAD, SERVICE_EXEC_STOP, SERVICE_EXEC_STOP_POST) ? EXEC_CONTROL_CGROUP : 0), &s->control_pid); if (r < 0) goto fail;