mirror of
https://github.com/systemd/systemd.git
synced 2025-01-03 05:18:09 +03:00
core: introduce new Delegate=yes/no property controlling creation of cgroup subhierarchies
For priviliged units this resource control property ensures that the processes have all controllers systemd manages enabled. For unpriviliged services (those with User= set) this ensures that access rights to the service cgroup is granted to the user in question, to create further subgroups. Note that this only applies to the name=systemd hierarchy though, as access to other controllers is not safe for unpriviliged processes. Delegate=yes should be set for container scopes where a systemd instance inside the container shall manage the hierarchies below its own cgroup and have access to all controllers. Delegate=yes should also be set for user@.service, so that systemd --user can run, controlling its own cgroup tree. This commit changes machined, systemd-nspawn@.service and user@.service to set this boolean, in order to ensure that container management will just work, and the user systemd instance can run fine.
This commit is contained in:
parent
c962cb68d5
commit
a931ad47a8
@ -394,6 +394,20 @@ along with systemd; If not, see <http://www.gnu.org/licenses/>.
|
|||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><varname>Delegate=</varname></term>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>Turns on delegation of further resource control
|
||||||
|
partitioning to processes of the unit. For unpriviliged
|
||||||
|
services (i.e. those using the <varname>User=</varname>
|
||||||
|
setting) this allows processes to create a subhierarchy
|
||||||
|
beneath its control group path. For priviliged services and
|
||||||
|
scopes this ensures the processes will have all control
|
||||||
|
group controllers enabled.</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
</variablelist>
|
</variablelist>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
@ -105,7 +105,8 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
|
|||||||
"%sBlockIOWeight=%lu\n"
|
"%sBlockIOWeight=%lu\n"
|
||||||
"%sStartupBlockIOWeight=%lu\n"
|
"%sStartupBlockIOWeight=%lu\n"
|
||||||
"%sMemoryLimit=%" PRIu64 "\n"
|
"%sMemoryLimit=%" PRIu64 "\n"
|
||||||
"%sDevicePolicy=%s\n",
|
"%sDevicePolicy=%s\n"
|
||||||
|
"%sDelegate=%s\n",
|
||||||
prefix, yes_no(c->cpu_accounting),
|
prefix, yes_no(c->cpu_accounting),
|
||||||
prefix, yes_no(c->blockio_accounting),
|
prefix, yes_no(c->blockio_accounting),
|
||||||
prefix, yes_no(c->memory_accounting),
|
prefix, yes_no(c->memory_accounting),
|
||||||
@ -115,7 +116,8 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
|
|||||||
prefix, c->blockio_weight,
|
prefix, c->blockio_weight,
|
||||||
prefix, c->startup_blockio_weight,
|
prefix, c->startup_blockio_weight,
|
||||||
prefix, c->memory_limit,
|
prefix, c->memory_limit,
|
||||||
prefix, cgroup_device_policy_to_string(c->device_policy));
|
prefix, cgroup_device_policy_to_string(c->device_policy),
|
||||||
|
prefix, yes_no(c->delegate));
|
||||||
|
|
||||||
LIST_FOREACH(device_allow, a, c->device_allow)
|
LIST_FOREACH(device_allow, a, c->device_allow)
|
||||||
fprintf(f,
|
fprintf(f,
|
||||||
@ -461,7 +463,8 @@ CGroupControllerMask cgroup_context_get_mask(CGroupContext *c) {
|
|||||||
c->memory_limit != (uint64_t) -1)
|
c->memory_limit != (uint64_t) -1)
|
||||||
mask |= CGROUP_MEMORY;
|
mask |= CGROUP_MEMORY;
|
||||||
|
|
||||||
if (c->device_allow || c->device_policy != CGROUP_AUTO)
|
if (c->device_allow ||
|
||||||
|
c->device_policy != CGROUP_AUTO)
|
||||||
mask |= CGROUP_DEVICE;
|
mask |= CGROUP_DEVICE;
|
||||||
|
|
||||||
return mask;
|
return mask;
|
||||||
@ -474,6 +477,19 @@ CGroupControllerMask unit_get_cgroup_mask(Unit *u) {
|
|||||||
if (!c)
|
if (!c)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
/* If delegation is turned on, then turn on all cgroups,
|
||||||
|
* unless the process we fork into it is known to drop
|
||||||
|
* privileges anyway, and shouldn't get access to the
|
||||||
|
* controllers anyway. */
|
||||||
|
|
||||||
|
if (c->delegate) {
|
||||||
|
ExecContext *e;
|
||||||
|
|
||||||
|
e = unit_get_exec_context(u);
|
||||||
|
if (!e || exec_context_maintains_privileges(e))
|
||||||
|
return _CGROUP_CONTROLLER_MASK_ALL;
|
||||||
|
}
|
||||||
|
|
||||||
return cgroup_context_get_mask(c);
|
return cgroup_context_get_mask(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -83,6 +83,8 @@ struct CGroupContext {
|
|||||||
|
|
||||||
CGroupDevicePolicy device_policy;
|
CGroupDevicePolicy device_policy;
|
||||||
LIST_HEAD(CGroupDeviceAllow, device_allow);
|
LIST_HEAD(CGroupDeviceAllow, device_allow);
|
||||||
|
|
||||||
|
bool delegate;
|
||||||
};
|
};
|
||||||
|
|
||||||
#include "unit.h"
|
#include "unit.h"
|
||||||
|
@ -153,6 +153,7 @@ static int property_get_ulong_as_u64(
|
|||||||
|
|
||||||
const sd_bus_vtable bus_cgroup_vtable[] = {
|
const sd_bus_vtable bus_cgroup_vtable[] = {
|
||||||
SD_BUS_VTABLE_START(0),
|
SD_BUS_VTABLE_START(0),
|
||||||
|
SD_BUS_PROPERTY("Delegate", "b", bus_property_get_bool, offsetof(CGroupContext, delegate), 0),
|
||||||
SD_BUS_PROPERTY("CPUAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, cpu_accounting), 0),
|
SD_BUS_PROPERTY("CPUAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, cpu_accounting), 0),
|
||||||
SD_BUS_PROPERTY("CPUShares", "t", property_get_ulong_as_u64, offsetof(CGroupContext, cpu_shares), 0),
|
SD_BUS_PROPERTY("CPUShares", "t", property_get_ulong_as_u64, offsetof(CGroupContext, cpu_shares), 0),
|
||||||
SD_BUS_PROPERTY("StartupCPUShares", "t", property_get_ulong_as_u64, offsetof(CGroupContext, startup_cpu_shares), 0),
|
SD_BUS_PROPERTY("StartupCPUShares", "t", property_get_ulong_as_u64, offsetof(CGroupContext, startup_cpu_shares), 0),
|
||||||
@ -170,6 +171,39 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
|
|||||||
SD_BUS_VTABLE_END
|
SD_BUS_VTABLE_END
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static int bus_cgroup_set_transient_property(
|
||||||
|
Unit *u,
|
||||||
|
CGroupContext *c,
|
||||||
|
const char *name,
|
||||||
|
sd_bus_message *message,
|
||||||
|
UnitSetPropertiesMode mode,
|
||||||
|
sd_bus_error *error) {
|
||||||
|
|
||||||
|
int r;
|
||||||
|
|
||||||
|
assert(u);
|
||||||
|
assert(c);
|
||||||
|
assert(name);
|
||||||
|
assert(message);
|
||||||
|
|
||||||
|
if (streq(name, "Delegate")) {
|
||||||
|
int b;
|
||||||
|
|
||||||
|
r = sd_bus_message_read(message, "b", &b);
|
||||||
|
if (r < 0)
|
||||||
|
return r;
|
||||||
|
|
||||||
|
if (mode != UNIT_CHECK) {
|
||||||
|
c->delegate = b;
|
||||||
|
unit_write_drop_in_private(u, mode, name, b ? "Delegate=yes" : "Delegate=no");
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int bus_cgroup_set_property(
|
int bus_cgroup_set_property(
|
||||||
Unit *u,
|
Unit *u,
|
||||||
CGroupContext *c,
|
CGroupContext *c,
|
||||||
@ -632,6 +666,14 @@ int bus_cgroup_set_property(
|
|||||||
}
|
}
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if (u->transient && u->load_state == UNIT_STUB) {
|
||||||
|
r = bus_cgroup_set_transient_property(u, c, name, message, mode, error);
|
||||||
|
if (r != 0)
|
||||||
|
return r;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1444,8 +1444,10 @@ static int exec_child(ExecCommand *command,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAVE_PAM
|
/* If delegation is enabled we'll pass ownership of the cgroup
|
||||||
if (params->cgroup_path && context->user && context->pam_name) {
|
* (but only in systemd's own controller hierarchy!) to the
|
||||||
|
* user of the new process. */
|
||||||
|
if (params->cgroup_path && context->user && params->cgroup_delegate) {
|
||||||
err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
|
err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
|
||||||
if (err < 0) {
|
if (err < 0) {
|
||||||
*error = EXIT_CGROUP;
|
*error = EXIT_CGROUP;
|
||||||
@ -1459,7 +1461,6 @@ static int exec_child(ExecCommand *command,
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
|
if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
|
||||||
char **rt;
|
char **rt;
|
||||||
@ -2402,6 +2403,21 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
|
|||||||
prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
|
prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool exec_context_maintains_privileges(ExecContext *c) {
|
||||||
|
assert(c);
|
||||||
|
|
||||||
|
/* Returns true if the process forked off would run run under
|
||||||
|
* an unchanged UID or as root. */
|
||||||
|
|
||||||
|
if (!c->user)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (streq(c->user, "root") || streq(c->user, "0"))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
void exec_status_start(ExecStatus *s, pid_t pid) {
|
void exec_status_start(ExecStatus *s, pid_t pid) {
|
||||||
assert(s);
|
assert(s);
|
||||||
|
|
||||||
|
@ -207,6 +207,7 @@ struct ExecParameters {
|
|||||||
bool selinux_context_net;
|
bool selinux_context_net;
|
||||||
CGroupControllerMask cgroup_supported;
|
CGroupControllerMask cgroup_supported;
|
||||||
const char *cgroup_path;
|
const char *cgroup_path;
|
||||||
|
bool cgroup_delegate;
|
||||||
const char *runtime_prefix;
|
const char *runtime_prefix;
|
||||||
const char *unit_id;
|
const char *unit_id;
|
||||||
usec_t watchdog_usec;
|
usec_t watchdog_usec;
|
||||||
@ -244,6 +245,7 @@ int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_r
|
|||||||
int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l);
|
int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l);
|
||||||
|
|
||||||
bool exec_context_may_touch_console(ExecContext *c);
|
bool exec_context_may_touch_console(ExecContext *c);
|
||||||
|
bool exec_context_maintains_privileges(ExecContext *c);
|
||||||
|
|
||||||
void exec_status_start(ExecStatus *s, pid_t pid);
|
void exec_status_start(ExecStatus *s, pid_t pid);
|
||||||
void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status);
|
void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status);
|
||||||
|
@ -119,7 +119,8 @@ $1.BlockIOWeight, config_parse_blockio_weight, 0,
|
|||||||
$1.StartupBlockIOWeight, config_parse_blockio_weight, 0, offsetof($1, cgroup_context.startup_blockio_weight)
|
$1.StartupBlockIOWeight, config_parse_blockio_weight, 0, offsetof($1, cgroup_context.startup_blockio_weight)
|
||||||
$1.BlockIODeviceWeight, config_parse_blockio_device_weight, 0, offsetof($1, cgroup_context)
|
$1.BlockIODeviceWeight, config_parse_blockio_device_weight, 0, offsetof($1, cgroup_context)
|
||||||
$1.BlockIOReadBandwidth, config_parse_blockio_bandwidth, 0, offsetof($1, cgroup_context)
|
$1.BlockIOReadBandwidth, config_parse_blockio_bandwidth, 0, offsetof($1, cgroup_context)
|
||||||
$1.BlockIOWriteBandwidth, config_parse_blockio_bandwidth, 0, offsetof($1, cgroup_context)'
|
$1.BlockIOWriteBandwidth, config_parse_blockio_bandwidth, 0, offsetof($1, cgroup_context)
|
||||||
|
$1.Delegate, config_parse_bool, 0, offsetof($1, cgroup_context.delegate)'
|
||||||
)m4_dnl
|
)m4_dnl
|
||||||
Unit.Description, config_parse_unit_string_printf, 0, offsetof(Unit, description)
|
Unit.Description, config_parse_unit_string_printf, 0, offsetof(Unit, description)
|
||||||
Unit.Documentation, config_parse_documentation, 0, offsetof(Unit, documentation)
|
Unit.Documentation, config_parse_documentation, 0, offsetof(Unit, documentation)
|
||||||
|
@ -715,6 +715,7 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
|
|||||||
exec_params.confirm_spawn = UNIT(m)->manager->confirm_spawn;
|
exec_params.confirm_spawn = UNIT(m)->manager->confirm_spawn;
|
||||||
exec_params.cgroup_supported = UNIT(m)->manager->cgroup_supported;
|
exec_params.cgroup_supported = UNIT(m)->manager->cgroup_supported;
|
||||||
exec_params.cgroup_path = UNIT(m)->cgroup_path;
|
exec_params.cgroup_path = UNIT(m)->cgroup_path;
|
||||||
|
exec_params.cgroup_delegate = m->cgroup_context.delegate;
|
||||||
exec_params.runtime_prefix = manager_get_runtime_prefix(UNIT(m)->manager);
|
exec_params.runtime_prefix = manager_get_runtime_prefix(UNIT(m)->manager);
|
||||||
exec_params.unit_id = UNIT(m)->id;
|
exec_params.unit_id = UNIT(m)->id;
|
||||||
|
|
||||||
|
@ -1000,6 +1000,7 @@ static int service_spawn(
|
|||||||
exec_params.confirm_spawn = UNIT(s)->manager->confirm_spawn;
|
exec_params.confirm_spawn = UNIT(s)->manager->confirm_spawn;
|
||||||
exec_params.cgroup_supported = UNIT(s)->manager->cgroup_supported;
|
exec_params.cgroup_supported = UNIT(s)->manager->cgroup_supported;
|
||||||
exec_params.cgroup_path = path;
|
exec_params.cgroup_path = path;
|
||||||
|
exec_params.cgroup_delegate = s->cgroup_context.delegate;
|
||||||
exec_params.runtime_prefix = manager_get_runtime_prefix(UNIT(s)->manager);
|
exec_params.runtime_prefix = manager_get_runtime_prefix(UNIT(s)->manager);
|
||||||
exec_params.unit_id = UNIT(s)->id;
|
exec_params.unit_id = UNIT(s)->id;
|
||||||
exec_params.watchdog_usec = s->watchdog_usec;
|
exec_params.watchdog_usec = s->watchdog_usec;
|
||||||
|
@ -1414,6 +1414,7 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
|
|||||||
exec_params.confirm_spawn = UNIT(s)->manager->confirm_spawn;
|
exec_params.confirm_spawn = UNIT(s)->manager->confirm_spawn;
|
||||||
exec_params.cgroup_supported = UNIT(s)->manager->cgroup_supported;
|
exec_params.cgroup_supported = UNIT(s)->manager->cgroup_supported;
|
||||||
exec_params.cgroup_path = UNIT(s)->cgroup_path;
|
exec_params.cgroup_path = UNIT(s)->cgroup_path;
|
||||||
|
exec_params.cgroup_delegate = s->cgroup_context.delegate;
|
||||||
exec_params.runtime_prefix = manager_get_runtime_prefix(UNIT(s)->manager);
|
exec_params.runtime_prefix = manager_get_runtime_prefix(UNIT(s)->manager);
|
||||||
exec_params.unit_id = UNIT(s)->id;
|
exec_params.unit_id = UNIT(s)->id;
|
||||||
|
|
||||||
|
@ -627,6 +627,7 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
|
|||||||
exec_params.confirm_spawn = UNIT(s)->manager->confirm_spawn;
|
exec_params.confirm_spawn = UNIT(s)->manager->confirm_spawn;
|
||||||
exec_params.cgroup_supported = UNIT(s)->manager->cgroup_supported;
|
exec_params.cgroup_supported = UNIT(s)->manager->cgroup_supported;
|
||||||
exec_params.cgroup_path = UNIT(s)->cgroup_path;
|
exec_params.cgroup_path = UNIT(s)->cgroup_path;
|
||||||
|
exec_params.cgroup_delegate = s->cgroup_context.delegate;
|
||||||
exec_params.runtime_prefix = manager_get_runtime_prefix(UNIT(s)->manager);
|
exec_params.runtime_prefix = manager_get_runtime_prefix(UNIT(s)->manager);
|
||||||
exec_params.unit_id = UNIT(s)->id;
|
exec_params.unit_id = UNIT(s)->id;
|
||||||
|
|
||||||
|
@ -622,6 +622,10 @@ int manager_start_scope(
|
|||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
|
r = sd_bus_message_append(m, "(sv)", "Delegate", "b", 1);
|
||||||
|
if (r < 0)
|
||||||
|
return r;
|
||||||
|
|
||||||
if (more_properties) {
|
if (more_properties) {
|
||||||
r = sd_bus_message_copy(m, more_properties, true);
|
r = sd_bus_message_copy(m, more_properties, true);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
|
@ -34,7 +34,8 @@ typedef enum CGroupControllerMask {
|
|||||||
CGROUP_CPUACCT = 2,
|
CGROUP_CPUACCT = 2,
|
||||||
CGROUP_BLKIO = 4,
|
CGROUP_BLKIO = 4,
|
||||||
CGROUP_MEMORY = 8,
|
CGROUP_MEMORY = 8,
|
||||||
CGROUP_DEVICE = 16
|
CGROUP_DEVICE = 16,
|
||||||
|
_CGROUP_CONTROLLER_MASK_ALL = 31
|
||||||
} CGroupControllerMask;
|
} CGroupControllerMask;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -15,6 +15,7 @@ KillMode=mixed
|
|||||||
Type=notify
|
Type=notify
|
||||||
RestartForceExitStatus=133
|
RestartForceExitStatus=133
|
||||||
SuccessExitStatus=133
|
SuccessExitStatus=133
|
||||||
|
Delegate=yes
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=multi-user.target
|
WantedBy=multi-user.target
|
||||||
|
@ -16,3 +16,4 @@ Type=notify
|
|||||||
ExecStart=-@rootlibexecdir@/systemd --user
|
ExecStart=-@rootlibexecdir@/systemd --user
|
||||||
Slice=user-%i.slice
|
Slice=user-%i.slice
|
||||||
KillMode=mixed
|
KillMode=mixed
|
||||||
|
Delegate=yes
|
||||||
|
Loading…
Reference in New Issue
Block a user