mirror of
https://github.com/systemd/systemd-stable.git
synced 2025-01-13 13:17:43 +03:00
cgroup: add a new "can_delegate" flag to the unit vtable, and set it for scope and service units only
Currently we allowed delegation for alluntis with cgroup backing except for slices. Let's make this a bit more strict for now, and only allow this in service and scope units. Let's also add a generic accessor unit_cgroup_delegate() for checking whether a unit has delegation turned on that checks the new bool first. Also, when doing transient units, let's explcitly refuse turning on delegation for unit types that don#t support it. This is mostly cosmetical as we wouldn't act on the delegation request anyway, but certainly helpful for debugging.
This commit is contained in:
parent
b8e2400586
commit
1d9cc8768f
@ -569,7 +569,7 @@ int bpf_firewall_install(Unit *u) {
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Kernel upload of egress BPF program failed: %m");
|
||||
|
||||
r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, cc->delegate ? BPF_F_ALLOW_OVERRIDE : 0);
|
||||
r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, unit_cgroup_delegate(u) ? BPF_F_ALLOW_OVERRIDE : 0);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Attaching egress BPF program to cgroup %s failed: %m", path);
|
||||
} else {
|
||||
@ -584,7 +584,7 @@ int bpf_firewall_install(Unit *u) {
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Kernel upload of ingress BPF program failed: %m");
|
||||
|
||||
r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, cc->delegate ? BPF_F_ALLOW_OVERRIDE : 0);
|
||||
r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, unit_cgroup_delegate(u) ? BPF_F_ALLOW_OVERRIDE : 0);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Attaching ingress BPF program to cgroup %s failed: %m", path);
|
||||
} else {
|
||||
|
@ -1124,14 +1124,7 @@ CGroupMask unit_get_delegate_mask(Unit *u) {
|
||||
*
|
||||
* Note that on the unified hierarchy it is safe to delegate controllers to unprivileged services. */
|
||||
|
||||
if (u->type == UNIT_SLICE)
|
||||
return 0;
|
||||
|
||||
c = unit_get_cgroup_context(u);
|
||||
if (!c)
|
||||
return 0;
|
||||
|
||||
if (!c->delegate)
|
||||
if (!unit_cgroup_delegate(u))
|
||||
return 0;
|
||||
|
||||
if (cg_all_unified() <= 0) {
|
||||
@ -1142,6 +1135,7 @@ CGroupMask unit_get_delegate_mask(Unit *u) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
assert_se(c = unit_get_cgroup_context(u));
|
||||
return c->delegate_controllers;
|
||||
}
|
||||
|
||||
@ -1496,7 +1490,7 @@ static int unit_create_cgroup(
|
||||
u->cgroup_enabled_mask = enable_mask;
|
||||
u->cgroup_bpf_state = needs_bpf ? UNIT_CGROUP_BPF_ON : UNIT_CGROUP_BPF_OFF;
|
||||
|
||||
if (u->type != UNIT_SLICE && !c->delegate) {
|
||||
if (u->type != UNIT_SLICE && !unit_cgroup_delegate(u)) {
|
||||
|
||||
/* Then, possibly move things over, but not if
|
||||
* subgroups may contain processes, which is the case
|
||||
@ -2563,6 +2557,21 @@ void unit_invalidate_cgroup_bpf(Unit *u) {
|
||||
}
|
||||
}
|
||||
|
||||
bool unit_cgroup_delegate(Unit *u) {
|
||||
CGroupContext *c;
|
||||
|
||||
assert(u);
|
||||
|
||||
if (!UNIT_VTABLE(u)->can_delegate)
|
||||
return false;
|
||||
|
||||
c = unit_get_cgroup_context(u);
|
||||
if (!c)
|
||||
return false;
|
||||
|
||||
return c->delegate;
|
||||
}
|
||||
|
||||
void manager_invalidate_startup_units(Manager *m) {
|
||||
Iterator i;
|
||||
Unit *u;
|
||||
|
@ -219,3 +219,5 @@ void manager_invalidate_startup_units(Manager *m);
|
||||
|
||||
const char* cgroup_device_policy_to_string(CGroupDevicePolicy i) _const_;
|
||||
CGroupDevicePolicy cgroup_device_policy_from_string(const char *s) _pure_;
|
||||
|
||||
bool unit_cgroup_delegate(Unit *u);
|
||||
|
@ -351,6 +351,9 @@ static int bus_cgroup_set_transient_property(
|
||||
if (streq(name, "Delegate")) {
|
||||
int b;
|
||||
|
||||
if (!UNIT_VTABLE(u)->can_delegate)
|
||||
return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Delegation not available for unit type");
|
||||
|
||||
r = sd_bus_message_read(message, "b", &b);
|
||||
if (r < 0)
|
||||
return r;
|
||||
@ -367,6 +370,9 @@ static int bus_cgroup_set_transient_property(
|
||||
} else if (streq(name, "DelegateControllers")) {
|
||||
CGroupMask mask = 0;
|
||||
|
||||
if (!UNIT_VTABLE(u)->can_delegate)
|
||||
return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Delegation not available for unit type");
|
||||
|
||||
r = sd_bus_message_enter_container(message, 'a', "s");
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
@ -599,6 +599,7 @@ const UnitVTable scope_vtable = {
|
||||
.private_section = "Scope",
|
||||
|
||||
.can_transient = true,
|
||||
.can_delegate = true,
|
||||
|
||||
.init = scope_init,
|
||||
.load = scope_load,
|
||||
|
@ -3909,6 +3909,9 @@ const UnitVTable service_vtable = {
|
||||
"Install\0",
|
||||
.private_section = "Service",
|
||||
|
||||
.can_transient = true,
|
||||
.can_delegate = true,
|
||||
|
||||
.init = service_init,
|
||||
.done = service_done,
|
||||
.load = service_load,
|
||||
@ -3954,7 +3957,6 @@ const UnitVTable service_vtable = {
|
||||
|
||||
.get_timeout = service_get_timeout,
|
||||
.needs_console = service_needs_console,
|
||||
.can_transient = true,
|
||||
|
||||
.status_message_formats = {
|
||||
.starting_stopping = {
|
||||
|
@ -4541,22 +4541,15 @@ int unit_kill_context(
|
||||
|
||||
} else if (r > 0) {
|
||||
|
||||
/* FIXME: For now, on the legacy hierarchy, we
|
||||
* will not wait for the cgroup members to die
|
||||
* if we are running in a container or if this
|
||||
* is a delegation unit, simply because cgroup
|
||||
* notification is unreliable in these
|
||||
* cases. It doesn't work at all in
|
||||
* containers, and outside of containers it
|
||||
* can be confused easily by left-over
|
||||
* directories in the cgroup — which however
|
||||
* should not exist in non-delegated units. On
|
||||
* the unified hierarchy that's different,
|
||||
* there we get proper events. Hence rely on
|
||||
* them. */
|
||||
/* FIXME: For now, on the legacy hierarchy, we will not wait for the cgroup members to die if
|
||||
* we are running in a container or if this is a delegation unit, simply because cgroup
|
||||
* notification is unreliable in these cases. It doesn't work at all in containers, and outside
|
||||
* of containers it can be confused easily by left-over directories in the cgroup — which
|
||||
* however should not exist in non-delegated units. On the unified hierarchy that's different,
|
||||
* there we get proper events. Hence rely on them. */
|
||||
|
||||
if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0 ||
|
||||
(detect_container() == 0 && !UNIT_CGROUP_BOOL(u, delegate)))
|
||||
(detect_container() == 0 && !unit_cgroup_delegate(u)))
|
||||
wait_for_exit = true;
|
||||
|
||||
if (send_sighup) {
|
||||
@ -5007,7 +5000,7 @@ void unit_set_exec_params(Unit *u, ExecParameters *p) {
|
||||
assert(p);
|
||||
|
||||
p->cgroup_path = u->cgroup_path;
|
||||
SET_FLAG(p->flags, EXEC_CGROUP_DELEGATE, UNIT_CGROUP_BOOL(u, delegate));
|
||||
SET_FLAG(p->flags, EXEC_CGROUP_DELEGATE, unit_cgroup_delegate(u));
|
||||
}
|
||||
|
||||
int unit_fork_helper_process(Unit *u, const char *name, pid_t *ret) {
|
||||
|
@ -568,6 +568,9 @@ struct UnitVTable {
|
||||
/* True if transient units of this type are OK */
|
||||
bool can_transient:1;
|
||||
|
||||
/* True if cgroup delegation is permissible */
|
||||
bool can_delegate:1;
|
||||
|
||||
/* True if queued jobs of this type should be GC'ed if no other job needs them anymore */
|
||||
bool gc_jobs:1;
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user