diff --git a/src/core/automount.c b/src/core/automount.c index 5ee0937fa7..0c92616575 100644 --- a/src/core/automount.c +++ b/src/core/automount.c @@ -325,6 +325,9 @@ static void automount_enter_dead(Automount *a, AutomountResult f) { if (a->result == AUTOMOUNT_SUCCESS) a->result = f; + if (a->result != AUTOMOUNT_SUCCESS) + log_unit_warning(UNIT(a), "Failed with result '%s'.", automount_result_to_string(a->result)); + automount_set_state(a, a->result != AUTOMOUNT_SUCCESS ? AUTOMOUNT_FAILED : AUTOMOUNT_DEAD); } diff --git a/src/core/cgroup.c b/src/core/cgroup.c index c2e57a0cdd..85f549c4c4 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -1496,9 +1496,9 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) { assert(u); - if (u->in_cgroup_queue) { - LIST_REMOVE(cgroup_queue, u->manager->cgroup_queue, u); - u->in_cgroup_queue = false; + if (u->in_cgroup_realize_queue) { + LIST_REMOVE(cgroup_realize_queue, u->manager->cgroup_realize_queue, u); + u->in_cgroup_realize_queue = false; } target_mask = unit_get_target_mask(u); @@ -1532,26 +1532,28 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) { return 0; } -static void unit_add_to_cgroup_queue(Unit *u) { +static void unit_add_to_cgroup_realize_queue(Unit *u) { assert(u); - if (u->in_cgroup_queue) + if (u->in_cgroup_realize_queue) return; - LIST_PREPEND(cgroup_queue, u->manager->cgroup_queue, u); - u->in_cgroup_queue = true; + LIST_PREPEND(cgroup_realize_queue, u->manager->cgroup_realize_queue, u); + u->in_cgroup_realize_queue = true; } -unsigned manager_dispatch_cgroup_queue(Manager *m) { +unsigned manager_dispatch_cgroup_realize_queue(Manager *m) { ManagerState state; unsigned n = 0; Unit *i; int r; + assert(m); + state = manager_state(m); - while ((i = m->cgroup_queue)) { - assert(i->in_cgroup_queue); + while ((i = m->cgroup_realize_queue)) { + assert(i->in_cgroup_realize_queue); r = unit_realize_cgroup_now(i, state); if (r < 0) @@ -1563,7 +1565,7 @@ unsigned manager_dispatch_cgroup_queue(Manager *m) { return n; } -static void unit_queue_siblings(Unit *u) { +static void unit_add_siblings_to_cgroup_realize_queue(Unit *u) { Unit *slice; /* This adds the siblings of the specified unit and the @@ -1597,7 +1599,7 @@ static void unit_queue_siblings(Unit *u) { unit_get_needs_bpf(m))) continue; - unit_add_to_cgroup_queue(m); + unit_add_to_cgroup_realize_queue(m); } u = slice; @@ -1622,7 +1624,7 @@ int unit_realize_cgroup(Unit *u) { * iteration. */ /* Add all sibling slices to the cgroup queue. */ - unit_queue_siblings(u); + unit_add_siblings_to_cgroup_realize_queue(u); /* And realize this one now (and apply the values) */ return unit_realize_cgroup_now(u, manager_state(u->manager)); @@ -1792,17 +1794,28 @@ int unit_watch_all_pids(Unit *u) { return unit_watch_pids_in_path(u, u->cgroup_path); } -int unit_notify_cgroup_empty(Unit *u) { +static int on_cgroup_empty_event(sd_event_source *s, void *userdata) { + Manager *m = userdata; + Unit *u; int r; - assert(u); + assert(s); + assert(m); - if (!u->cgroup_path) + u = m->cgroup_empty_queue; + if (!u) return 0; - r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path); - if (r <= 0) - return r; + assert(u->in_cgroup_empty_queue); + u->in_cgroup_empty_queue = false; + LIST_REMOVE(cgroup_empty_queue, m->cgroup_empty_queue, u); + + if (m->cgroup_empty_queue) { + /* More stuff queued, let's make sure we remain enabled */ + r = sd_event_source_set_enabled(s, SD_EVENT_ONESHOT); + if (r < 0) + log_debug_errno(r, "Failed to reenable cgroup empty event source: %m"); + } unit_add_to_gc_queue(u); @@ -1812,6 +1825,51 @@ int unit_notify_cgroup_empty(Unit *u) { return 0; } +void unit_add_to_cgroup_empty_queue(Unit *u) { + int r; + + assert(u); + + /* Note that there are four different ways how cgroup empty events reach us: + * + * 1. On the unified hierarchy we get an inotify event on the cgroup + * + * 2. On the legacy hierarchy, when running in system mode, we get a datagram on the cgroup agent socket + * + * 3. On the legacy hierarchy, when running in user mode, we get a D-Bus signal on the system bus + * + * 4. On the legacy hierarchy, in service units we start watching all processes of the cgroup for SIGCHLD as + * soon as we get one SIGCHLD, to deal with unreliable cgroup notifications. + * + * Regardless which way we got the notification, we'll verify it here, and then add it to a separate + * queue. This queue will be dispatched at a lower priority than the SIGCHLD handler, so that we always use + * SIGCHLD if we can get it first, and only use the cgroup empty notifications if there's no SIGCHLD pending + * (which might happen if the cgroup doesn't contain processes that are our own child, which is typically the + * case for scope units). */ + + if (u->in_cgroup_empty_queue) + return; + + /* Let's verify that the cgroup is really empty */ + if (!u->cgroup_path) + return; + r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path); + if (r < 0) { + log_unit_debug_errno(u, r, "Failed to determine whether cgroup %s is empty: %m", u->cgroup_path); + return; + } + if (r == 0) + return; + + LIST_PREPEND(cgroup_empty_queue, u->manager->cgroup_empty_queue, u); + u->in_cgroup_empty_queue = true; + + /* Trigger the defer event */ + r = sd_event_source_set_enabled(u->manager->cgroup_empty_event_source, SD_EVENT_ONESHOT); + if (r < 0) + log_debug_errno(r, "Failed to enable cgroup empty event source: %m"); +} + static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents, void *userdata) { Manager *m = userdata; @@ -1826,7 +1884,7 @@ static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents, l = read(fd, &buffer, sizeof(buffer)); if (l < 0) { - if (errno == EINTR || errno == EAGAIN) + if (IN_SET(errno, EINTR, EAGAIN)) return 0; return log_error_errno(errno, "Failed to read control group inotify events: %m"); @@ -1851,7 +1909,7 @@ static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents, * this here safely. */ continue; - (void) unit_notify_cgroup_empty(u); + unit_add_to_cgroup_empty_queue(u); } } } @@ -1916,11 +1974,26 @@ int manager_setup_cgroup(Manager *m) { log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER_LEGACY ". File system hierarchy is at %s.", path); } - /* 3. Install agent */ + /* 3. Allocate cgroup empty defer event source */ + m->cgroup_empty_event_source = sd_event_source_unref(m->cgroup_empty_event_source); + r = sd_event_add_defer(m->event, &m->cgroup_empty_event_source, on_cgroup_empty_event, m); + if (r < 0) + return log_error_errno(r, "Failed to create cgroup empty event source: %m"); + + r = sd_event_source_set_priority(m->cgroup_empty_event_source, SD_EVENT_PRIORITY_NORMAL-5); + if (r < 0) + return log_error_errno(r, "Failed to set priority of cgroup empty event source: %m"); + + r = sd_event_source_set_enabled(m->cgroup_empty_event_source, SD_EVENT_OFF); + if (r < 0) + return log_error_errno(r, "Failed to disable cgroup empty event source: %m"); + + (void) sd_event_source_set_description(m->cgroup_empty_event_source, "cgroup-empty"); + + /* 4. Install notifier inotify object, or agent */ if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0) { - /* In the unified hierarchy we can get - * cgroup empty notifications via inotify. */ + /* In the unified hierarchy we can get cgroup empty notifications via inotify. */ m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source); safe_close(m->cgroup_inotify_fd); @@ -1935,7 +2008,7 @@ int manager_setup_cgroup(Manager *m) { /* Process cgroup empty notifications early, but after service notifications and SIGCHLD. Also * see handling of cgroup agent notifications, for the classic cgroup hierarchy support. */ - r = sd_event_source_set_priority(m->cgroup_inotify_event_source, SD_EVENT_PRIORITY_NORMAL-5); + r = sd_event_source_set_priority(m->cgroup_inotify_event_source, SD_EVENT_PRIORITY_NORMAL-4); if (r < 0) return log_error_errno(r, "Failed to set priority of inotify event source: %m"); @@ -1955,33 +2028,31 @@ int manager_setup_cgroup(Manager *m) { log_debug("Release agent already installed."); } - /* 4. Make sure we are in the special "init.scope" unit in the root slice. */ + /* 5. Make sure we are in the special "init.scope" unit in the root slice. */ scope_path = strjoina(m->cgroup_root, "/" SPECIAL_INIT_SCOPE); r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, scope_path, 0); if (r < 0) return log_error_errno(r, "Failed to create %s control group: %m", scope_path); - /* also, move all other userspace processes remaining - * in the root cgroup into that scope. */ + /* Also, move all other userspace processes remaining in the root cgroup into that scope. */ r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, 0); if (r < 0) log_warning_errno(r, "Couldn't move remaining userspace processes, ignoring: %m"); - /* 5. And pin it, so that it cannot be unmounted */ + /* 6. And pin it, so that it cannot be unmounted */ safe_close(m->pin_cgroupfs_fd); m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK); if (m->pin_cgroupfs_fd < 0) return log_error_errno(errno, "Failed to open pin file: %m"); - /* 6. Always enable hierarchical support if it exists... */ + /* 7. Always enable hierarchical support if it exists... */ if (!all_unified && m->test_run_flags == 0) (void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1"); - /* 7. Figure out which controllers are supported */ + /* 8. Figure out which controllers are supported, and log about it */ r = cg_mask_supported(&m->cgroup_supported); if (r < 0) return log_error_errno(r, "Failed to determine supported controllers: %m"); - for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) log_debug("Controller '%s' supported: %s", cgroup_controller_to_string(c), yes_no(m->cgroup_supported & CGROUP_CONTROLLER_TO_MASK(c))); @@ -1996,6 +2067,8 @@ void manager_shutdown_cgroup(Manager *m, bool delete) { if (delete && m->cgroup_root) (void) cg_trim(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, false); + m->cgroup_empty_event_source = sd_event_source_unref(m->cgroup_empty_event_source); + m->cgroup_inotify_wd_unit = hashmap_free(m->cgroup_inotify_wd_unit); m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source); @@ -2077,13 +2150,17 @@ int manager_notify_cgroup_empty(Manager *m, const char *cgroup) { assert(m); assert(cgroup); + /* Called on the legacy hierarchy whenever we get an explicit cgroup notification from the cgroup agent process + * or from the --system instance */ + log_debug("Got cgroup empty notification for: %s", cgroup); u = manager_get_unit_by_cgroup(m, cgroup); if (!u) return 0; - return unit_notify_cgroup_empty(u); + unit_add_to_cgroup_empty_queue(u); + return 1; } int unit_get_memory_current(Unit *u, uint64_t *ret) { @@ -2329,7 +2406,7 @@ void unit_invalidate_cgroup(Unit *u, CGroupMask m) { return; u->cgroup_realized_mask &= ~m; - unit_add_to_cgroup_queue(u); + unit_add_to_cgroup_realize_queue(u); } void unit_invalidate_cgroup_bpf(Unit *u) { @@ -2342,7 +2419,7 @@ void unit_invalidate_cgroup_bpf(Unit *u) { return; u->cgroup_bpf_state = UNIT_CGROUP_BPF_INVALIDATED; - unit_add_to_cgroup_queue(u); + unit_add_to_cgroup_realize_queue(u); /* If we are a slice unit, we also need to put compile a new BPF program for all our children, as the IP access * list of our children includes our own. */ diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 10ac322aed..65245fbc43 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -172,12 +172,14 @@ void unit_release_cgroup(Unit *u); void unit_prune_cgroup(Unit *u); int unit_watch_cgroup(Unit *u); +void unit_add_to_cgroup_empty_queue(Unit *u); + int unit_attach_pids_to_cgroup(Unit *u); int manager_setup_cgroup(Manager *m); void manager_shutdown_cgroup(Manager *m, bool delete); -unsigned manager_dispatch_cgroup_queue(Manager *m); +unsigned manager_dispatch_cgroup_realize_queue(Manager *m); Unit *manager_get_unit_by_cgroup(Manager *m, const char *cgroup); Unit *manager_get_unit_by_pid_cgroup(Manager *m, pid_t pid); @@ -200,7 +202,6 @@ int unit_reset_ip_accounting(Unit *u); cc ? cc->name : false; \ }) -int unit_notify_cgroup_empty(Unit *u); int manager_notify_cgroup_empty(Manager *m, const char *group); void unit_invalidate_cgroup(Unit *u, CGroupMask m); diff --git a/src/core/manager.c b/src/core/manager.c index 5cf4bc4ee6..8ebea8d66b 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -859,7 +859,7 @@ static int manager_setup_cgroups_agent(Manager *m) { * SIGCHLD signals, so that a cgroup running empty is always just the last safety net of notification, * and we collected the metadata the notification and SIGCHLD stuff offers first. Also see handling of * cgroup inotify for the unified cgroup stuff. */ - r = sd_event_source_set_priority(m->cgroups_agent_event_source, SD_EVENT_PRIORITY_NORMAL-5); + r = sd_event_source_set_priority(m->cgroups_agent_event_source, SD_EVENT_PRIORITY_NORMAL-4); if (r < 0) return log_error_errno(r, "Failed to set priority of cgroups agent event source: %m"); @@ -2367,7 +2367,7 @@ int manager_loop(Manager *m) { if (manager_dispatch_cleanup_queue(m) > 0) continue; - if (manager_dispatch_cgroup_queue(m) > 0) + if (manager_dispatch_cgroup_realize_queue(m) > 0) continue; if (manager_dispatch_dbus_queue(m) > 0) diff --git a/src/core/manager.h b/src/core/manager.h index e8a6267471..c909916a1e 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -119,7 +119,10 @@ struct Manager { LIST_HEAD(Job, gc_job_queue); /* Units that should be realized */ - LIST_HEAD(Unit, cgroup_queue); + LIST_HEAD(Unit, cgroup_realize_queue); + + /* Units whose cgroup ran empty */ + LIST_HEAD(Unit, cgroup_empty_queue); sd_event *event; @@ -229,12 +232,14 @@ struct Manager { CGroupMask cgroup_supported; char *cgroup_root; - /* Notifications from cgroups, when the unified hierarchy is - * used is done via inotify. */ + /* Notifications from cgroups, when the unified hierarchy is used is done via inotify. */ int cgroup_inotify_fd; sd_event_source *cgroup_inotify_event_source; Hashmap *cgroup_inotify_wd_unit; + /* A defer event for handling cgroup empty events and processing them after SIGCHLD in all cases. */ + sd_event_source *cgroup_empty_event_source; + /* Make sure the user cannot accidentally unmount our cgroup * file system */ int pin_cgroupfs_fd; diff --git a/src/core/mount.c b/src/core/mount.c index 22b3608a2f..903b3a9c1b 100644 --- a/src/core/mount.c +++ b/src/core/mount.c @@ -796,6 +796,9 @@ static void mount_enter_dead(Mount *m, MountResult f) { if (m->result == MOUNT_SUCCESS) m->result = f; + if (m->result != MOUNT_SUCCESS) + log_unit_warning(UNIT(m), "Failed with result '%s'.", mount_result_to_string(m->result)); + mount_set_state(m, m->result != MOUNT_SUCCESS ? MOUNT_FAILED : MOUNT_DEAD); exec_runtime_destroy(m->exec_runtime); diff --git a/src/core/path.c b/src/core/path.c index 83f794be89..7092ed13ba 100644 --- a/src/core/path.c +++ b/src/core/path.c @@ -457,6 +457,9 @@ static void path_enter_dead(Path *p, PathResult f) { if (p->result == PATH_SUCCESS) p->result = f; + if (p->result != PATH_SUCCESS) + log_unit_warning(UNIT(p), "Failed with result '%s'.", path_result_to_string(p->result)); + path_set_state(p, p->result != PATH_SUCCESS ? PATH_FAILED : PATH_DEAD); } diff --git a/src/core/scope.c b/src/core/scope.c index 8f9df3b9b7..9670228694 100644 --- a/src/core/scope.c +++ b/src/core/scope.c @@ -253,6 +253,9 @@ static void scope_enter_dead(Scope *s, ScopeResult f) { if (s->result == SCOPE_SUCCESS) s->result = f; + if (s->result != SCOPE_SUCCESS) + log_unit_warning(UNIT(s), "Failed with result '%s'.", scope_result_to_string(s->result)); + scope_set_state(s, s->result != SCOPE_SUCCESS ? SCOPE_FAILED : SCOPE_DEAD); } diff --git a/src/core/service.c b/src/core/service.c index 38a8010232..1a4455bd22 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -1512,12 +1512,13 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart) if (s->result == SERVICE_SUCCESS) s->result = f; + if (s->result != SERVICE_SUCCESS) + log_unit_warning(UNIT(s), "Failed with result '%s'.", service_result_to_string(s->result)); + service_set_state(s, s->result != SERVICE_SUCCESS ? SERVICE_FAILED : SERVICE_DEAD); - if (s->result != SERVICE_SUCCESS) { - log_unit_warning(UNIT(s), "Failed with result '%s'.", service_result_to_string(s->result)); + if (s->result != SERVICE_SUCCESS) emergency_action(UNIT(s)->manager, s->emergency_action, UNIT(s)->reboot_arg, "service failed"); - } if (allow_restart && service_shall_restart(s)) { @@ -3213,7 +3214,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { /* If the PID set is empty now, then let's finish this off (On unified we use proper notifications) */ if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) == 0 && set_isempty(u->pids)) - service_notify_cgroup_empty_event(u); + unit_add_to_cgroup_empty_queue(u); } static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata) { diff --git a/src/core/socket.c b/src/core/socket.c index ba70756d83..e05daaa1dc 100644 --- a/src/core/socket.c +++ b/src/core/socket.c @@ -2000,6 +2000,9 @@ static void socket_enter_dead(Socket *s, SocketResult f) { if (s->result == SOCKET_SUCCESS) s->result = f; + if (s->result != SOCKET_SUCCESS) + log_unit_warning(UNIT(s), "Failed with result '%s'.", socket_result_to_string(s->result)); + socket_set_state(s, s->result != SOCKET_SUCCESS ? SOCKET_FAILED : SOCKET_DEAD); exec_runtime_destroy(s->exec_runtime); diff --git a/src/core/swap.c b/src/core/swap.c index 6db1c0df69..f475755572 100644 --- a/src/core/swap.c +++ b/src/core/swap.c @@ -665,6 +665,9 @@ static void swap_enter_dead(Swap *s, SwapResult f) { if (s->result == SWAP_SUCCESS) s->result = f; + if (s->result != SWAP_SUCCESS) + log_unit_warning(UNIT(s), "Failed with result '%s'.", swap_result_to_string(s->result)); + swap_set_state(s, s->result != SWAP_SUCCESS ? SWAP_FAILED : SWAP_DEAD); exec_runtime_destroy(s->exec_runtime); diff --git a/src/core/timer.c b/src/core/timer.c index 3032a237b1..a34bc7b14e 100644 --- a/src/core/timer.c +++ b/src/core/timer.c @@ -296,6 +296,9 @@ static void timer_enter_dead(Timer *t, TimerResult f) { if (t->result == TIMER_SUCCESS) t->result = f; + if (t->result != TIMER_SUCCESS) + log_unit_warning(UNIT(t), "Failed with result '%s'.", timer_result_to_string(t->result)); + timer_set_state(t, t->result != TIMER_SUCCESS ? TIMER_FAILED : TIMER_DEAD); } diff --git a/src/core/unit.c b/src/core/unit.c index 6d9b85fb85..60cf30ea3a 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -314,22 +314,16 @@ int unit_choose_id(Unit *u, const char *name) { } int unit_set_description(Unit *u, const char *description) { - char *s; + int r; assert(u); - if (isempty(description)) - s = NULL; - else { - s = strdup(description); - if (!s) - return -ENOMEM; - } + r = free_and_strdup(&u->description, empty_to_null(description)); + if (r < 0) + return r; + if (r > 0) + unit_add_to_dbus_queue(u); - free(u->description); - u->description = s; - - unit_add_to_dbus_queue(u); return 0; } @@ -588,8 +582,11 @@ void unit_free(Unit *u) { if (u->in_gc_queue) LIST_REMOVE(gc_queue, u->manager->gc_unit_queue, u); - if (u->in_cgroup_queue) - LIST_REMOVE(cgroup_queue, u->manager->cgroup_queue, u); + if (u->in_cgroup_realize_queue) + LIST_REMOVE(cgroup_realize_queue, u->manager->cgroup_realize_queue, u); + + if (u->in_cgroup_empty_queue) + LIST_REMOVE(cgroup_empty_queue, u->manager->cgroup_empty_queue, u); unit_release_cgroup(u); @@ -2275,7 +2272,7 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su check_unneeded_dependencies(u); if (ns != os && ns == UNIT_FAILED) { - log_unit_notice(u, "Unit entered failed state."); + log_unit_debug(u, "Unit entered failed state."); unit_start_on_failure(u); } } diff --git a/src/core/unit.h b/src/core/unit.h index 9aa00b056f..7c6fef27b3 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -166,10 +166,10 @@ struct Unit { LIST_FIELDS(Unit, gc_queue); /* CGroup realize members queue */ - LIST_FIELDS(Unit, cgroup_queue); + LIST_FIELDS(Unit, cgroup_realize_queue); - /* Units with the same CGroup netclass */ - LIST_FIELDS(Unit, cgroup_netclass); + /* cgroup empty queue */ + LIST_FIELDS(Unit, cgroup_empty_queue); /* PIDs we keep an eye on. Note that a unit might have many * more, but these are the ones we care enough about to @@ -266,7 +266,8 @@ struct Unit { bool in_dbus_queue:1; bool in_cleanup_queue:1; bool in_gc_queue:1; - bool in_cgroup_queue:1; + bool in_cgroup_realize_queue:1; + bool in_cgroup_empty_queue:1; bool sent_dbus_new_signal:1;