cgroup: add a new "can_delegate" flag to the unit vtable, and set it for scope and service units only

Currently we allowed delegation for alluntis with cgroup backing except for slices. Let's make this a bit more strict for now, and only allow this in service and scope units. Let's also add a generic accessor unit_cgroup_delegate() for checking whether a unit has delegation turned on that checks the new bool first. Also, when doing transient units, let's explcitly refuse turning on delegation for unit types that don#t support it. This is mostly cosmetical as we wouldn't act on the delegation request anyway, but certainly helpful for debugging.
2024-10-31 16:21:26 +03:00 · 2018-02-06 11:57:35 +01:00 · 2018-02-06 11:57:35 +01:00 · 1d9cc8768f
commit 1d9cc8768f
parent b8e2400586
8 changed files with 43 additions and 27 deletions
--- a/src/core/bpf-firewall.c
+++ b/src/core/bpf-firewall.c
@ -569,7 +569,7 @@ int bpf_firewall_install(Unit *u) {
                if (r < 0)
                        return log_error_errno(r, "Kernel upload of egress BPF program failed: %m");

-                r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, cc->delegate ? BPF_F_ALLOW_OVERRIDE : 0);
+                r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, unit_cgroup_delegate(u) ? BPF_F_ALLOW_OVERRIDE : 0);
                if (r < 0)
                        return log_error_errno(r, "Attaching egress BPF program to cgroup %s failed: %m", path);
        } else {
@ -584,7 +584,7 @@ int bpf_firewall_install(Unit *u) {
                if (r < 0)
                        return log_error_errno(r, "Kernel upload of ingress BPF program failed: %m");

-                r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, cc->delegate ? BPF_F_ALLOW_OVERRIDE : 0);
+                r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, unit_cgroup_delegate(u) ? BPF_F_ALLOW_OVERRIDE : 0);
                if (r < 0)
                        return log_error_errno(r, "Attaching ingress BPF program to cgroup %s failed: %m", path);
        } else {
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@ -1124,14 +1124,7 @@ CGroupMask unit_get_delegate_mask(Unit *u) {
         *
         * Note that on the unified hierarchy it is safe to delegate controllers to unprivileged services. */

-        if (u->type == UNIT_SLICE)
-                return 0;
-
-        c = unit_get_cgroup_context(u);
-        if (!c)
-                return 0;
-
-        if (!c->delegate)
+        if (!unit_cgroup_delegate(u))
                return 0;

        if (cg_all_unified() <= 0) {
@ -1142,6 +1135,7 @@ CGroupMask unit_get_delegate_mask(Unit *u) {
                        return 0;
        }

+        assert_se(c = unit_get_cgroup_context(u));
        return c->delegate_controllers;
 }

@ -1496,7 +1490,7 @@ static int unit_create_cgroup(
        u->cgroup_enabled_mask = enable_mask;
        u->cgroup_bpf_state = needs_bpf ? UNIT_CGROUP_BPF_ON : UNIT_CGROUP_BPF_OFF;

-        if (u->type != UNIT_SLICE && !c->delegate) {
+        if (u->type != UNIT_SLICE && !unit_cgroup_delegate(u)) {

                /* Then, possibly move things over, but not if
                 * subgroups may contain processes, which is the case
@ -2563,6 +2557,21 @@ void unit_invalidate_cgroup_bpf(Unit *u) {
        }
 }

+bool unit_cgroup_delegate(Unit *u) {
+        CGroupContext *c;
+
+        assert(u);
+
+        if (!UNIT_VTABLE(u)->can_delegate)
+                return false;
+
+        c = unit_get_cgroup_context(u);
+        if (!c)
+                return false;
+
+        return c->delegate;
+}
+
 void manager_invalidate_startup_units(Manager *m) {
        Iterator i;
        Unit *u;
--- a/src/core/cgroup.h
+++ b/src/core/cgroup.h
@ -219,3 +219,5 @@ void manager_invalidate_startup_units(Manager *m);

 const char* cgroup_device_policy_to_string(CGroupDevicePolicy i) _const_;
 CGroupDevicePolicy cgroup_device_policy_from_string(const char *s) _pure_;
+
+bool unit_cgroup_delegate(Unit *u);
--- a/src/core/dbus-cgroup.c
+++ b/src/core/dbus-cgroup.c
@ -351,6 +351,9 @@ static int bus_cgroup_set_transient_property(
        if (streq(name, "Delegate")) {
                int b;

+                if (!UNIT_VTABLE(u)->can_delegate)
+                        return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Delegation not available for unit type");
+
                r = sd_bus_message_read(message, "b", &b);
                if (r < 0)
                        return r;
@ -367,6 +370,9 @@ static int bus_cgroup_set_transient_property(
        } else if (streq(name, "DelegateControllers")) {
                CGroupMask mask = 0;

+                if (!UNIT_VTABLE(u)->can_delegate)
+                        return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Delegation not available for unit type");
+
                r = sd_bus_message_enter_container(message, 'a', "s");
                if (r < 0)
                        return r;
--- a/src/core/scope.c
+++ b/src/core/scope.c
@ -599,6 +599,7 @@ const UnitVTable scope_vtable = {
        .private_section = "Scope",

        .can_transient = true,
+        .can_delegate = true,

        .init = scope_init,
        .load = scope_load,
--- a/src/core/service.c
+++ b/src/core/service.c
@ -3909,6 +3909,9 @@ const UnitVTable service_vtable = {
                "Install\0",
        .private_section = "Service",

+        .can_transient = true,
+        .can_delegate = true,
+
        .init = service_init,
        .done = service_done,
        .load = service_load,
@ -3954,7 +3957,6 @@ const UnitVTable service_vtable = {

        .get_timeout = service_get_timeout,
        .needs_console = service_needs_console,
-        .can_transient = true,

        .status_message_formats = {
                .starting_stopping = {
--- a/src/core/unit.c
+++ b/src/core/unit.c
@ -4541,22 +4541,15 @@ int unit_kill_context(

                } else if (r > 0) {

-                        /* FIXME: For now, on the legacy hierarchy, we
-                         * will not wait for the cgroup members to die
-                         * if we are running in a container or if this
-                         * is a delegation unit, simply because cgroup
-                         * notification is unreliable in these
-                         * cases. It doesn't work at all in
-                         * containers, and outside of containers it
-                         * can be confused easily by left-over
-                         * directories in the cgroup — which however
-                         * should not exist in non-delegated units. On
-                         * the unified hierarchy that's different,
-                         * there we get proper events. Hence rely on
-                         * them. */
+                        /* FIXME: For now, on the legacy hierarchy, we will not wait for the cgroup members to die if
+                         * we are running in a container or if this is a delegation unit, simply because cgroup
+                         * notification is unreliable in these cases. It doesn't work at all in containers, and outside
+                         * of containers it can be confused easily by left-over directories in the cgroup — which
+                         * however should not exist in non-delegated units. On the unified hierarchy that's different,
+                         * there we get proper events. Hence rely on them. */

                        if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0 ||
-                            (detect_container() == 0 && !UNIT_CGROUP_BOOL(u, delegate)))
+                            (detect_container() == 0 && !unit_cgroup_delegate(u)))
                                wait_for_exit = true;

                        if (send_sighup) {
@ -5007,7 +5000,7 @@ void unit_set_exec_params(Unit *u, ExecParameters *p) {
        assert(p);

        p->cgroup_path = u->cgroup_path;
-        SET_FLAG(p->flags, EXEC_CGROUP_DELEGATE, UNIT_CGROUP_BOOL(u, delegate));
+        SET_FLAG(p->flags, EXEC_CGROUP_DELEGATE, unit_cgroup_delegate(u));
 }

 int unit_fork_helper_process(Unit *u, const char *name, pid_t *ret) {
--- a/src/core/unit.h
+++ b/src/core/unit.h
@ -568,6 +568,9 @@ struct UnitVTable {
        /* True if transient units of this type are OK */
        bool can_transient:1;

+        /* True if cgroup delegation is permissible */
+        bool can_delegate:1;
+
        /* True if queued jobs of this type should be GC'ed if no other job needs them anymore */
        bool gc_jobs:1;
 };