diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml
index 968b328dd99..218946d4eee 100644
--- a/man/systemd.resource-control.xml
+++ b/man/systemd.resource-control.xml
@@ -394,6 +394,20 @@ along with systemd; If not, see .
+
+ Delegate=
+
+
+ Turns on delegation of further resource control
+ partitioning to processes of the unit. For unpriviliged
+ services (i.e. those using the User=
+ setting) this allows processes to create a subhierarchy
+ beneath its control group path. For priviliged services and
+ scopes this ensures the processes will have all control
+ group controllers enabled.
+
+
+
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index e604c3cbc69..0951a099643 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -105,7 +105,8 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
"%sBlockIOWeight=%lu\n"
"%sStartupBlockIOWeight=%lu\n"
"%sMemoryLimit=%" PRIu64 "\n"
- "%sDevicePolicy=%s\n",
+ "%sDevicePolicy=%s\n"
+ "%sDelegate=%s\n",
prefix, yes_no(c->cpu_accounting),
prefix, yes_no(c->blockio_accounting),
prefix, yes_no(c->memory_accounting),
@@ -115,7 +116,8 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
prefix, c->blockio_weight,
prefix, c->startup_blockio_weight,
prefix, c->memory_limit,
- prefix, cgroup_device_policy_to_string(c->device_policy));
+ prefix, cgroup_device_policy_to_string(c->device_policy),
+ prefix, yes_no(c->delegate));
LIST_FOREACH(device_allow, a, c->device_allow)
fprintf(f,
@@ -461,7 +463,8 @@ CGroupControllerMask cgroup_context_get_mask(CGroupContext *c) {
c->memory_limit != (uint64_t) -1)
mask |= CGROUP_MEMORY;
- if (c->device_allow || c->device_policy != CGROUP_AUTO)
+ if (c->device_allow ||
+ c->device_policy != CGROUP_AUTO)
mask |= CGROUP_DEVICE;
return mask;
@@ -474,6 +477,19 @@ CGroupControllerMask unit_get_cgroup_mask(Unit *u) {
if (!c)
return 0;
+ /* If delegation is turned on, then turn on all cgroups,
+ * unless the process we fork into it is known to drop
+ * privileges anyway, and shouldn't get access to the
+ * controllers anyway. */
+
+ if (c->delegate) {
+ ExecContext *e;
+
+ e = unit_get_exec_context(u);
+ if (!e || exec_context_maintains_privileges(e))
+ return _CGROUP_CONTROLLER_MASK_ALL;
+ }
+
return cgroup_context_get_mask(c);
}
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
index d299872b1f4..3c43885bf28 100644
--- a/src/core/cgroup.h
+++ b/src/core/cgroup.h
@@ -83,6 +83,8 @@ struct CGroupContext {
CGroupDevicePolicy device_policy;
LIST_HEAD(CGroupDeviceAllow, device_allow);
+
+ bool delegate;
};
#include "unit.h"
diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c
index 900566c29b0..db998345eb3 100644
--- a/src/core/dbus-cgroup.c
+++ b/src/core/dbus-cgroup.c
@@ -153,6 +153,7 @@ static int property_get_ulong_as_u64(
const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_VTABLE_START(0),
+ SD_BUS_PROPERTY("Delegate", "b", bus_property_get_bool, offsetof(CGroupContext, delegate), 0),
SD_BUS_PROPERTY("CPUAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, cpu_accounting), 0),
SD_BUS_PROPERTY("CPUShares", "t", property_get_ulong_as_u64, offsetof(CGroupContext, cpu_shares), 0),
SD_BUS_PROPERTY("StartupCPUShares", "t", property_get_ulong_as_u64, offsetof(CGroupContext, startup_cpu_shares), 0),
@@ -170,6 +171,39 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_VTABLE_END
};
+static int bus_cgroup_set_transient_property(
+ Unit *u,
+ CGroupContext *c,
+ const char *name,
+ sd_bus_message *message,
+ UnitSetPropertiesMode mode,
+ sd_bus_error *error) {
+
+ int r;
+
+ assert(u);
+ assert(c);
+ assert(name);
+ assert(message);
+
+ if (streq(name, "Delegate")) {
+ int b;
+
+ r = sd_bus_message_read(message, "b", &b);
+ if (r < 0)
+ return r;
+
+ if (mode != UNIT_CHECK) {
+ c->delegate = b;
+ unit_write_drop_in_private(u, mode, name, b ? "Delegate=yes" : "Delegate=no");
+ }
+
+ return 1;
+ }
+
+ return 0;
+}
+
int bus_cgroup_set_property(
Unit *u,
CGroupContext *c,
@@ -632,6 +666,14 @@ int bus_cgroup_set_property(
}
return 1;
+
+ }
+
+ if (u->transient && u->load_state == UNIT_STUB) {
+ r = bus_cgroup_set_transient_property(u, c, name, message, mode, error);
+ if (r != 0)
+ return r;
+
}
return 0;
diff --git a/src/core/execute.c b/src/core/execute.c
index c41aec222de..5cfd4a1f975 100644
--- a/src/core/execute.c
+++ b/src/core/execute.c
@@ -1444,8 +1444,10 @@ static int exec_child(ExecCommand *command,
}
#endif
-#ifdef HAVE_PAM
- if (params->cgroup_path && context->user && context->pam_name) {
+ /* If delegation is enabled we'll pass ownership of the cgroup
+ * (but only in systemd's own controller hierarchy!) to the
+ * user of the new process. */
+ if (params->cgroup_path && context->user && params->cgroup_delegate) {
err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
if (err < 0) {
*error = EXIT_CGROUP;
@@ -1459,7 +1461,6 @@ static int exec_child(ExecCommand *command,
return err;
}
}
-#endif
if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
char **rt;
@@ -2402,6 +2403,21 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
}
+bool exec_context_maintains_privileges(ExecContext *c) {
+ assert(c);
+
+ /* Returns true if the process forked off would run run under
+ * an unchanged UID or as root. */
+
+ if (!c->user)
+ return true;
+
+ if (streq(c->user, "root") || streq(c->user, "0"))
+ return true;
+
+ return false;
+}
+
void exec_status_start(ExecStatus *s, pid_t pid) {
assert(s);
diff --git a/src/core/execute.h b/src/core/execute.h
index c45dde53a64..b16a24d0c6a 100644
--- a/src/core/execute.h
+++ b/src/core/execute.h
@@ -207,6 +207,7 @@ struct ExecParameters {
bool selinux_context_net;
CGroupControllerMask cgroup_supported;
const char *cgroup_path;
+ bool cgroup_delegate;
const char *runtime_prefix;
const char *unit_id;
usec_t watchdog_usec;
@@ -244,6 +245,7 @@ int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_r
int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l);
bool exec_context_may_touch_console(ExecContext *c);
+bool exec_context_maintains_privileges(ExecContext *c);
void exec_status_start(ExecStatus *s, pid_t pid);
void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status);
diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4
index ca0139479b1..5158a9f158e 100644
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@@ -119,7 +119,8 @@ $1.BlockIOWeight, config_parse_blockio_weight, 0,
$1.StartupBlockIOWeight, config_parse_blockio_weight, 0, offsetof($1, cgroup_context.startup_blockio_weight)
$1.BlockIODeviceWeight, config_parse_blockio_device_weight, 0, offsetof($1, cgroup_context)
$1.BlockIOReadBandwidth, config_parse_blockio_bandwidth, 0, offsetof($1, cgroup_context)
-$1.BlockIOWriteBandwidth, config_parse_blockio_bandwidth, 0, offsetof($1, cgroup_context)'
+$1.BlockIOWriteBandwidth, config_parse_blockio_bandwidth, 0, offsetof($1, cgroup_context)
+$1.Delegate, config_parse_bool, 0, offsetof($1, cgroup_context.delegate)'
)m4_dnl
Unit.Description, config_parse_unit_string_printf, 0, offsetof(Unit, description)
Unit.Documentation, config_parse_documentation, 0, offsetof(Unit, documentation)
diff --git a/src/core/mount.c b/src/core/mount.c
index 01243c381ab..8b787f66b09 100644
--- a/src/core/mount.c
+++ b/src/core/mount.c
@@ -715,6 +715,7 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
exec_params.confirm_spawn = UNIT(m)->manager->confirm_spawn;
exec_params.cgroup_supported = UNIT(m)->manager->cgroup_supported;
exec_params.cgroup_path = UNIT(m)->cgroup_path;
+ exec_params.cgroup_delegate = m->cgroup_context.delegate;
exec_params.runtime_prefix = manager_get_runtime_prefix(UNIT(m)->manager);
exec_params.unit_id = UNIT(m)->id;
diff --git a/src/core/service.c b/src/core/service.c
index f27e63eb9a1..6a27e8f67d4 100644
--- a/src/core/service.c
+++ b/src/core/service.c
@@ -1000,6 +1000,7 @@ static int service_spawn(
exec_params.confirm_spawn = UNIT(s)->manager->confirm_spawn;
exec_params.cgroup_supported = UNIT(s)->manager->cgroup_supported;
exec_params.cgroup_path = path;
+ exec_params.cgroup_delegate = s->cgroup_context.delegate;
exec_params.runtime_prefix = manager_get_runtime_prefix(UNIT(s)->manager);
exec_params.unit_id = UNIT(s)->id;
exec_params.watchdog_usec = s->watchdog_usec;
diff --git a/src/core/socket.c b/src/core/socket.c
index 6ba8338d8b2..39652ef5603 100644
--- a/src/core/socket.c
+++ b/src/core/socket.c
@@ -1414,6 +1414,7 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
exec_params.confirm_spawn = UNIT(s)->manager->confirm_spawn;
exec_params.cgroup_supported = UNIT(s)->manager->cgroup_supported;
exec_params.cgroup_path = UNIT(s)->cgroup_path;
+ exec_params.cgroup_delegate = s->cgroup_context.delegate;
exec_params.runtime_prefix = manager_get_runtime_prefix(UNIT(s)->manager);
exec_params.unit_id = UNIT(s)->id;
diff --git a/src/core/swap.c b/src/core/swap.c
index 1add722bf13..0a1cc809368 100644
--- a/src/core/swap.c
+++ b/src/core/swap.c
@@ -627,6 +627,7 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
exec_params.confirm_spawn = UNIT(s)->manager->confirm_spawn;
exec_params.cgroup_supported = UNIT(s)->manager->cgroup_supported;
exec_params.cgroup_path = UNIT(s)->cgroup_path;
+ exec_params.cgroup_delegate = s->cgroup_context.delegate;
exec_params.runtime_prefix = manager_get_runtime_prefix(UNIT(s)->manager);
exec_params.unit_id = UNIT(s)->id;
diff --git a/src/machine/machined-dbus.c b/src/machine/machined-dbus.c
index 3c7d4be8d28..7f8c631ef61 100644
--- a/src/machine/machined-dbus.c
+++ b/src/machine/machined-dbus.c
@@ -622,6 +622,10 @@ int manager_start_scope(
if (r < 0)
return r;
+ r = sd_bus_message_append(m, "(sv)", "Delegate", "b", 1);
+ if (r < 0)
+ return r;
+
if (more_properties) {
r = sd_bus_message_copy(m, more_properties, true);
if (r < 0)
diff --git a/src/shared/cgroup-util.h b/src/shared/cgroup-util.h
index aca4e44c465..a65f515b455 100644
--- a/src/shared/cgroup-util.h
+++ b/src/shared/cgroup-util.h
@@ -34,7 +34,8 @@ typedef enum CGroupControllerMask {
CGROUP_CPUACCT = 2,
CGROUP_BLKIO = 4,
CGROUP_MEMORY = 8,
- CGROUP_DEVICE = 16
+ CGROUP_DEVICE = 16,
+ _CGROUP_CONTROLLER_MASK_ALL = 31
} CGroupControllerMask;
/*
diff --git a/units/systemd-nspawn@.service.in b/units/systemd-nspawn@.service.in
index 574d0deafa2..dec2ce7df79 100644
--- a/units/systemd-nspawn@.service.in
+++ b/units/systemd-nspawn@.service.in
@@ -15,6 +15,7 @@ KillMode=mixed
Type=notify
RestartForceExitStatus=133
SuccessExitStatus=133
+Delegate=yes
[Install]
WantedBy=multi-user.target
diff --git a/units/user@.service.in b/units/user@.service.in
index 8091ce1a0ba..1e21d51aaef 100644
--- a/units/user@.service.in
+++ b/units/user@.service.in
@@ -16,3 +16,4 @@ Type=notify
ExecStart=-@rootlibexecdir@/systemd --user
Slice=user-%i.slice
KillMode=mixed
+Delegate=yes