diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 2d303914893..d38452cfcbb 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -2595,22 +2595,45 @@ int cg_unified_flush(void) { return cg_unified_update(); } -int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) { +int cg_enable_everywhere( + CGroupMask supported, + CGroupMask mask, + const char *p, + CGroupMask *ret_result_mask) { + _cleanup_fclose_ FILE *f = NULL; _cleanup_free_ char *fs = NULL; CGroupController c; + CGroupMask ret = 0; int r; assert(p); - if (supported == 0) + if (supported == 0) { + if (ret_result_mask) + *ret_result_mask = 0; return 0; + } r = cg_all_unified(); if (r < 0) return r; - if (r == 0) /* on the legacy hiearchy there's no joining of controllers defined */ + if (r == 0) { + /* On the legacy hiearchy there's no concept of "enabling" controllers in cgroups defined. Let's claim + * complete success right away. (If you wonder why we return the full mask here, rather than zero: the + * caller tends to use the returned mask later on to compare if all controllers where properly joined, + * and if not requeues realization. This use is the primary purpose of the return value, hence let's + * minimize surprises here and reduce triggers for re-realization by always saying we fully + * succeeded.) */ + if (ret_result_mask) + *ret_result_mask = mask & supported & CGROUP_MASK_V2; /* If you wonder why we mask this with + * CGROUP_MASK_V2: The 'supported' mask + * might contain pure-V1 or BPF + * controllers, and we never want to + * claim that we could enable those with + * cgroup.subtree_control */ return 0; + } r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs); if (r < 0) @@ -2644,10 +2667,39 @@ int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) { log_debug_errno(r, "Failed to %s controller %s for %s (%s): %m", FLAGS_SET(mask, bit) ? "enable" : "disable", n, p, fs); clearerr(f); + + /* If we can't turn off a controller, leave it on in the reported resulting mask. This + * happens for example when we attempt to turn off a controller up in the tree that is + * used down in the tree. */ + if (!FLAGS_SET(mask, bit) && r == -EBUSY) /* You might wonder why we check for EBUSY + * only here, and not follow the same logic + * for other errors such as EINVAL or + * EOPNOTSUPP or anything else. That's + * because EBUSY indicates that the + * controllers is currently enabled and + * cannot be disabled because something down + * the hierarchy is still using it. Any other + * error most likely means something like "I + * never heard of this controller" or + * similar. In the former case it's hence + * safe to assume the controller is still on + * after the failed operation, while in the + * latter case it's safer to assume the + * controller is unknown and hence certainly + * not enabled. */ + ret |= bit; + } else { + /* Otherwise, if we managed to turn on a controller, set the bit reflecting that. */ + if (FLAGS_SET(mask, bit)) + ret |= bit; } } } + /* Let's return the precise set of controllers now enabled for the cgroup. */ + if (ret_result_mask) + *ret_result_mask = ret; + return 0; } diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h index 7919864df91..ea9a3332904 100644 --- a/src/basic/cgroup-util.h +++ b/src/basic/cgroup-util.h @@ -245,7 +245,7 @@ int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_m int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t callback, void *userdata); int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t callback, void *userdata); int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root); -int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p); +int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p, CGroupMask *ret_result_mask); int cg_mask_supported(CGroupMask *ret); int cg_mask_from_string(const char *s, CGroupMask *ret); diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 7e5ee1b25b9..ec58d7820b4 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -1593,8 +1593,8 @@ static int unit_create_cgroup( CGroupMask enable_mask) { CGroupContext *c; - int r; bool created; + int r; assert(u); @@ -1618,18 +1618,37 @@ static int unit_create_cgroup( /* Preserve enabled controllers in delegated units, adjust others. */ if (created || !unit_cgroup_delegate(u)) { + CGroupMask result_mask = 0; /* Enable all controllers we need */ - r = cg_enable_everywhere(u->manager->cgroup_supported, enable_mask, u->cgroup_path); + r = cg_enable_everywhere(u->manager->cgroup_supported, enable_mask, u->cgroup_path, &result_mask); if (r < 0) - log_unit_warning_errno(u, r, "Failed to enable controllers on cgroup %s, ignoring: %m", - u->cgroup_path); + log_unit_warning_errno(u, r, "Failed to enable/disable controllers on cgroup %s, ignoring: %m", u->cgroup_path); + + /* If we just turned off a controller, this might release the controller for our parent too, let's + * enqueue the parent for re-realization in that case again. */ + if (UNIT_ISSET(u->slice)) { + CGroupMask turned_off; + + turned_off = (u->cgroup_realized ? u->cgroup_enabled_mask & ~result_mask : 0); + if (turned_off != 0) { + Unit *parent; + + /* Force the parent to propagate the enable mask to the kernel again, by invalidating + * the controller we just turned off. */ + + for (parent = UNIT_DEREF(u->slice); parent; parent = UNIT_DEREF(parent->slice)) + unit_invalidate_cgroup(parent, turned_off); + } + } + + /* Remember what's actually enabled now */ + u->cgroup_enabled_mask = result_mask; } /* Keep track that this is now realized */ u->cgroup_realized = true; u->cgroup_realized_mask = target_mask; - u->cgroup_enabled_mask = enable_mask; if (u->type != UNIT_SLICE && !unit_cgroup_delegate(u)) { @@ -1815,7 +1834,7 @@ static bool unit_has_mask_realized( u->cgroup_invalidated_mask == 0; } -static void unit_add_to_cgroup_realize_queue(Unit *u) { +void unit_add_to_cgroup_realize_queue(Unit *u) { assert(u); if (u->in_cgroup_realize_queue) diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 2c7287841a1..64dec408ccd 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -151,12 +151,13 @@ CGroupMask unit_get_delegate_mask(Unit *u); CGroupMask unit_get_members_mask(Unit *u); CGroupMask unit_get_siblings_mask(Unit *u); CGroupMask unit_get_subtree_mask(Unit *u); - CGroupMask unit_get_target_mask(Unit *u); CGroupMask unit_get_enable_mask(Unit *u); void unit_update_cgroup_members_masks(Unit *u); +void unit_add_to_cgroup_realize_queue(Unit *u); + const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask); char *unit_default_cgroup_path(Unit *u); int unit_set_cgroup_path(Unit *u, const char *path); diff --git a/src/nspawn/nspawn-cgroup.c b/src/nspawn/nspawn-cgroup.c index 0197474dbc6..53c42f0ee47 100644 --- a/src/nspawn/nspawn-cgroup.c +++ b/src/nspawn/nspawn-cgroup.c @@ -189,7 +189,7 @@ int create_subcgroup(pid_t pid, bool keep_unit, CGroupUnified unified_requested) } /* Try to enable as many controllers as possible for the new payload. */ - (void) cg_enable_everywhere(supported, supported, cgroup); + (void) cg_enable_everywhere(supported, supported, cgroup, NULL); return 0; }