From 5162829ec87df20c7af763bdf274735bf9e53552 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Mon, 29 Apr 2024 16:14:12 +0100 Subject: [PATCH] core: do BindMount/MountImage operations in async control process These operations might require slow I/O, and thus might block PID1's main loop for an undeterminated amount of time. Instead of performing them inline, fork a worker process and stash away the D-Bus message, and reply once we get a SIGCHILD indicating they have completed. That way we don't break compatibility and callers can continue to rely on the fact that when they get the method reply the operation either succeeded or failed. To keep backward compatibility, unlike reload control processes, these are ran inside init.scope and not the target cgroup. Unlike ExecReload, this is under our control and is not defined by the unit. This is necessary because previously the operation also wasn't ran from the target cgroup, so suddenly forking a copy-on-write copy of pid1 into the target cgroup will make memory usage spike, and if there is a MemoryMax= or MemoryHigh= set and the cgroup is already close to the limit, it will cause an OOM kill, where previously it would have worked fine. --- man/org.freedesktop.systemd1.xml | 15 +- man/systemctl.xml | 3 +- man/systemd.xml | 34 +-- src/basic/unit-def.c | 3 + src/basic/unit-def.h | 2 + src/core/dbus-service.c | 60 ++--- src/core/dbus-unit.c | 18 ++ src/core/job.c | 4 +- src/core/manager.c | 1 + src/core/scope.c | 2 +- src/core/service.c | 241 +++++++++++++++++- src/core/service.h | 4 + src/core/socket.c | 6 +- src/core/unit.c | 100 +++++++- src/core/unit.h | 14 +- src/machine/machine-dbus.c | 9 +- src/shared/bus-wait-for-units.c | 12 +- src/shared/mount-util.c | 64 ++--- src/shared/mount-util.h | 12 +- src/systemctl/systemctl-is-active.c | 1 + src/systemctl/systemctl-list-dependencies.c | 1 + src/systemctl/systemctl-list-units.c | 2 +- src/systemctl/systemctl-show.c | 12 +- src/systemctl/systemctl-util.c | 2 +- .../TEST-23-UNIT-FILE.runtime-bind-paths.sh | 4 + 25 files changed, 480 insertions(+), 146 deletions(-) diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml index d445c138fa0..a5c98d3458a 100644 --- a/man/org.freedesktop.systemd1.xml +++ b/man/org.freedesktop.systemd1.xml @@ -2046,6 +2046,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { readonly as CanClean = ['...', ...]; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly b CanFreeze = ...; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly b CanLiveMount = ...; readonly (uo) Job = ...; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly b StopWhenUnneeded = ...; @@ -2178,6 +2180,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + @@ -2382,6 +2386,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + @@ -2761,6 +2767,7 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { readonly s Result = '...'; readonly s ReloadResult = '...'; readonly s CleanResult = '...'; + readonly s LiveMountResult = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s USBFunctionDescriptors = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") @@ -3425,6 +3432,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + @@ -4055,6 +4064,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + @@ -12090,7 +12101,8 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ QueueSignal() was added in version 254. SurviveFinalKillSignal was added in version 255. WantsMountsFor was added in version 256. - DebugInvocation was added in version 257. + DebugInvocation, and + CanLiveMount were added in version 257. Service Unit Objects @@ -12136,6 +12148,7 @@ $ gdbus introspect --system --dest org.freedesktop.systemd1 \ ExecMainHandoffTimestamp were added in version 256. StatusBusError, StatusVarlinkError, + LiveMountResult, PrivateTmpEx, and ImportCredentialEx were added in version 257. diff --git a/man/systemctl.xml b/man/systemctl.xml index 8561e169442..11da7a9b73c 100644 --- a/man/systemctl.xml +++ b/man/systemctl.xml @@ -291,7 +291,8 @@ Jan 12 10:46:45 example.com bluetoothd[8900]: gatt-time-server: Input/output err inactive or maintenance is a white circle ("○"), active is a green dot ("●"), deactivating is a white dot, failed or error is a red cross ("×"), and - reloading is a green clockwise circle arrow ("↻"). + reloading or refreshing is a green clockwise circle arrow + ("↻"). The "Loaded:" line in the output will show loaded if the unit has been loaded into memory. Other possible values for "Loaded:" include: error if diff --git a/man/systemd.xml b/man/systemd.xml index 58e76eecb27..ddd190093eb 100644 --- a/man/systemd.xml +++ b/man/systemd.xml @@ -89,28 +89,22 @@ Units - systemd provides a dependency system between various - entities called "units" of 11 different types. Units encapsulate - various objects that are relevant for system boot-up and - maintenance. The majority of units are configured in unit - configuration files, whose syntax and basic set of options is + systemd provides a dependency system between various entities called "units" of 11 different + types. Units encapsulate various objects that are relevant for system boot-up and maintenance. The + majority of units are configured in unit configuration files, whose syntax and basic set of options is described in systemd.unit5, - however some are created automatically from other configuration - files, dynamically from system state or programmatically at runtime. - Units may be "active" (meaning started, bound, plugged in, …, - depending on the unit type, see below), or "inactive" (meaning - stopped, unbound, unplugged, …), as well as in the process of - being activated or deactivated, i.e. between the two states (these - states are called "activating", "deactivating"). A special - "failed" state is available as well, which is very similar to - "inactive" and is entered when the service failed in some way - (process returned error code on exit, or crashed, an operation - timed out, or after too many restarts). If this state is entered, - the cause will be logged, for later reference. Note that the - various unit types may have a number of additional substates, - which are mapped to the five generalized unit states described - here. + however some are created automatically from other configuration files, dynamically from system state or + programmatically at runtime. Units may be "active" (meaning started, bound, plugged in, …, depending on + the unit type, see below), or "inactive" (meaning stopped, unbound, unplugged, …), as well as in the + process of being activated or deactivated, i.e. between the two states (these states are called + "activating", "deactivating"). A special "failed" state is available as well, which is very similar to + "inactive" and is entered when the service failed in some way (process returned error code on exit, or + crashed, an operation timed out, or after too many restarts). If this state is entered, the cause will + be logged, for later reference. Units may also be in a special transient state for a time, to indicate + that some operation is being performed on them, before reverting to the previous state, such as + "maintenance", "reloading" or "refreshing". Note that the various unit types may have a number of + additional substates, which are mapped to the five generalized unit states described here. The following unit types are available: diff --git a/src/basic/unit-def.c b/src/basic/unit-def.c index 4dc8ceef865..e3292a8a447 100644 --- a/src/basic/unit-def.c +++ b/src/basic/unit-def.c @@ -112,6 +112,7 @@ static const char* const unit_active_state_table[_UNIT_ACTIVE_STATE_MAX] = { [UNIT_ACTIVATING] = "activating", [UNIT_DEACTIVATING] = "deactivating", [UNIT_MAINTENANCE] = "maintenance", + [UNIT_REFRESHING] = "refreshing", }; DEFINE_STRING_TABLE_LOOKUP(unit_active_state, UnitActiveState); @@ -234,6 +235,7 @@ static const char* const service_state_table[_SERVICE_STATE_MAX] = { [SERVICE_AUTO_RESTART] = "auto-restart", [SERVICE_AUTO_RESTART_QUEUED] = "auto-restart-queued", [SERVICE_CLEANING] = "cleaning", + [SERVICE_MOUNTING] = "mounting", }; DEFINE_STRING_TABLE_LOOKUP(service_state, ServiceState); @@ -344,6 +346,7 @@ SpecialGlyph unit_active_state_to_glyph(UnitActiveState state) { static const SpecialGlyph map[_UNIT_ACTIVE_STATE_MAX] = { [UNIT_ACTIVE] = SPECIAL_GLYPH_BLACK_CIRCLE, [UNIT_RELOADING] = SPECIAL_GLYPH_CIRCLE_ARROW, + [UNIT_REFRESHING] = SPECIAL_GLYPH_CIRCLE_ARROW, [UNIT_INACTIVE] = SPECIAL_GLYPH_WHITE_CIRCLE, [UNIT_FAILED] = SPECIAL_GLYPH_MULTIPLICATION_SIGN, [UNIT_ACTIVATING] = SPECIAL_GLYPH_BLACK_CIRCLE, diff --git a/src/basic/unit-def.h b/src/basic/unit-def.h index cf74c9de2f2..d022983bdca 100644 --- a/src/basic/unit-def.h +++ b/src/basic/unit-def.h @@ -47,6 +47,7 @@ typedef enum UnitActiveState { UNIT_ACTIVATING, UNIT_DEACTIVATING, UNIT_MAINTENANCE, + UNIT_REFRESHING, _UNIT_ACTIVE_STATE_MAX, _UNIT_ACTIVE_STATE_INVALID = -EINVAL, } UnitActiveState; @@ -137,6 +138,7 @@ typedef enum ServiceState { SERVICE_RELOAD, /* Reloading via ExecReload= */ SERVICE_RELOAD_SIGNAL, /* Reloading via SIGHUP requested */ SERVICE_RELOAD_NOTIFY, /* Waiting for READY=1 after RELOADING=1 notify */ + SERVICE_MOUNTING, /* Performing a live mount into the namespace of the service */ SERVICE_STOP, /* No STOP_PRE state, instead just register multiple STOP executables */ SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c index 8a144519086..43a8fb06175 100644 --- a/src/core/dbus-service.c +++ b/src/core/dbus-service.c @@ -19,7 +19,6 @@ #include "fileio.h" #include "locale-util.h" #include "missing_fcntl.h" -#include "mount-util.h" #include "open-file.h" #include "parse-util.h" #include "path-util.h" @@ -130,6 +129,7 @@ static int property_get_exit_status_set( } static int bus_service_method_mount(sd_bus_message *message, void *userdata, sd_bus_error *error, bool is_image) { + MountInNamespaceFlags flags = 0; Unit *u = ASSERT_PTR(userdata); int r; @@ -138,8 +138,9 @@ static int bus_service_method_mount(sd_bus_message *message, void *userdata, sd_ if (!MANAGER_IS_SYSTEM(u->manager)) return sd_bus_error_set(error, SD_BUS_ERROR_NOT_SUPPORTED, "Adding bind mounts at runtime is only supported by system manager"); - if (u->type != UNIT_SERVICE) - return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "Unit is not of type .service"); + r = unit_can_live_mount(u, error); + if (r < 0) + return r; r = mac_selinux_unit_access_check(u, message, "start", error); if (r < 0) @@ -178,50 +179,18 @@ static int bus_service_method_mount(sd_bus_message *message, void *userdata, sd_ if (r == 0) return 1; /* No authorization for now, but the async polkit stuff will call us again when it has it */ - const PidRef *unit_pid = unit_main_pid(u); - if (!pidref_is_set(unit_pid) || !UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u))) - return sd_bus_error_set(error, BUS_ERROR_UNIT_INACTIVE, "Unit is not running"); - - /* The context should always be available, but there's an assert in exec_needs_mount_namespace, - * so double-check just in case. */ - ExecContext *c = unit_get_exec_context(u); - if (!c) - return -ENXIO; - - /* Ensure that the unit was started in a private mount namespace */ - if (!exec_needs_mount_namespace(c, NULL, unit_get_exec_runtime(u))) - return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "Unit not running in private mount namespace, cannot activate bind mount"); - - if (mount_point_is_credentials(u->manager->prefix[EXEC_DIRECTORY_RUNTIME], dest)) - return sd_bus_error_set(error, SD_BUS_ERROR_INVALID_ARGS, "Refusing to bind mount over credential mounts"); - - /* If it would be dropped at startup time, return an error. */ - if (path_startswith_strv(dest, c->inaccessible_paths)) - return sd_bus_error_setf(error, SD_BUS_ERROR_ACCESS_DENIED, "%s is not accessible to this unit", dest); - - const char *propagate_directory = strjoina("/run/systemd/propagate/", u->id); if (is_image) - r = mount_image_in_namespace( - unit_pid, - propagate_directory, - "/run/systemd/incoming/", - src, dest, - read_only, - make_file_or_directory, - options, - c->mount_image_policy ?: &image_policy_service); - else - r = bind_mount_in_namespace( - unit_pid, - propagate_directory, - "/run/systemd/incoming/", - src, dest, - read_only, - make_file_or_directory); - if (r < 0) - return sd_bus_error_set_errnof(error, r, "Failed to mount '%s' on '%s' in unit's namespace: %m", src, dest); + flags |= MOUNT_IN_NAMESPACE_IS_IMAGE; + if (read_only) + flags |= MOUNT_IN_NAMESPACE_READ_ONLY; + if (make_file_or_directory) + flags |= MOUNT_IN_NAMESPACE_MAKE_FILE_OR_DIRECTORY; - return sd_bus_reply_method_return(message, NULL); + r = unit_live_mount(u, src, dest, message, flags, options, error); + if (r < 0) + return r; + + return 1; } int bus_service_method_bind_mount(sd_bus_message *message, void *userdata, sd_bus_error *error) { @@ -362,6 +331,7 @@ const sd_bus_vtable bus_service_vtable[] = { SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Service, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("ReloadResult", "s", property_get_result, offsetof(Service, reload_result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("CleanResult", "s", property_get_result, offsetof(Service, clean_result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), + SD_BUS_PROPERTY("LiveMountResult", "s", property_get_result, offsetof(Service, live_mount_result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("USBFunctionDescriptors", "s", NULL, offsetof(Service, usb_function_descriptors), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("USBFunctionStrings", "s", NULL, offsetof(Service, usb_function_strings), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("UID", "u", bus_property_get_uid, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c index f1136a95b78..d7869f115a1 100644 --- a/src/core/dbus-unit.c +++ b/src/core/dbus-unit.c @@ -89,6 +89,23 @@ static int property_get_can_clean( return sd_bus_message_close_container(reply); } +static int property_get_can_live_mount( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + Unit *u = ASSERT_PTR(userdata); + + assert(bus); + assert(reply); + + return sd_bus_message_append(reply, "b", unit_can_live_mount(u, /* error= */ NULL) >= 0); +} + static int property_get_names( sd_bus *bus, const char *path, @@ -882,6 +899,7 @@ const sd_bus_vtable bus_unit_vtable[] = { SD_BUS_PROPERTY("CanIsolate", "b", property_get_can_isolate, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("CanClean", "as", property_get_can_clean, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("CanFreeze", "b", property_get_can_freeze, 0, SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("CanLiveMount", "b", property_get_can_live_mount, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Job", "(uo)", property_get_job, offsetof(Unit, job), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("StopWhenUnneeded", "b", bus_property_get_bool, offsetof(Unit, stop_when_unneeded), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RefuseManualStart", "b", bus_property_get_bool, offsetof(Unit, refuse_manual_start), SD_BUS_VTABLE_PROPERTY_CONST), diff --git a/src/core/job.c b/src/core/job.c index 21083497c09..468571ae71a 100644 --- a/src/core/job.c +++ b/src/core/job.c @@ -425,13 +425,13 @@ bool job_type_is_redundant(JobType a, UnitActiveState b) { switch (a) { case JOB_START: - return IN_SET(b, UNIT_ACTIVE, UNIT_RELOADING); + return IN_SET(b, UNIT_ACTIVE, UNIT_RELOADING, UNIT_REFRESHING); case JOB_STOP: return IN_SET(b, UNIT_INACTIVE, UNIT_FAILED); case JOB_VERIFY_ACTIVE: - return IN_SET(b, UNIT_ACTIVE, UNIT_RELOADING); + return IN_SET(b, UNIT_ACTIVE, UNIT_RELOADING, UNIT_REFRESHING); case JOB_RELOAD: return diff --git a/src/core/manager.c b/src/core/manager.c index 499ecec88e6..f43bfd214ba 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -1890,6 +1890,7 @@ static bool manager_dbus_is_running(Manager *m, bool deserialized) { return false; if (!IN_SET((deserialized ? SERVICE(u)->deserialized_state : SERVICE(u)->state), SERVICE_RUNNING, + SERVICE_MOUNTING, SERVICE_RELOAD, SERVICE_RELOAD_NOTIFY, SERVICE_RELOAD_SIGNAL)) diff --git a/src/core/scope.c b/src/core/scope.c index cfa2aeb03f6..6e66b56dcba 100644 --- a/src/core/scope.c +++ b/src/core/scope.c @@ -356,7 +356,7 @@ static int scope_enter_start_chown(Scope *s) { if (r < 0) return r; - r = unit_fork_helper_process(u, "(sd-chown-cgroup)", &pidref); + r = unit_fork_helper_process(u, "(sd-chown-cgroup)", /* into_cgroup= */ true, &pidref); if (r < 0) goto fail; diff --git a/src/core/service.c b/src/core/service.c index ed3a46ff8c2..663fdb30f2c 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -10,6 +10,7 @@ #include "alloc-util.h" #include "async.h" +#include "bus-common-errors.h" #include "bus-error.h" #include "bus-kernel.h" #include "bus-util.h" @@ -31,6 +32,7 @@ #include "log.h" #include "manager.h" #include "missing_audit.h" +#include "mount-util.h" #include "open-file.h" #include "parse-util.h" #include "path-util.h" @@ -77,6 +79,7 @@ static const UnitActiveState state_translation_table[_SERVICE_STATE_MAX] = { [SERVICE_AUTO_RESTART] = UNIT_ACTIVATING, [SERVICE_AUTO_RESTART_QUEUED] = UNIT_ACTIVATING, [SERVICE_CLEANING] = UNIT_MAINTENANCE, + [SERVICE_MOUNTING] = UNIT_REFRESHING, }; /* For Type=idle we never want to delay any other jobs, hence we @@ -107,6 +110,7 @@ static const UnitActiveState state_translation_table_idle[_SERVICE_STATE_MAX] = [SERVICE_AUTO_RESTART] = UNIT_ACTIVATING, [SERVICE_AUTO_RESTART_QUEUED] = UNIT_ACTIVATING, [SERVICE_CLEANING] = UNIT_MAINTENANCE, + [SERVICE_MOUNTING] = UNIT_REFRESHING, }; static int service_dispatch_inotify_io(sd_event_source *source, int fd, uint32_t events, void *userdata); @@ -122,6 +126,7 @@ static bool SERVICE_STATE_WITH_MAIN_PROCESS(ServiceState state) { SERVICE_START, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY, + SERVICE_MOUNTING, SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL); } @@ -133,7 +138,7 @@ static bool SERVICE_STATE_WITH_CONTROL_PROCESS(ServiceState state) { SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY, SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL, - SERVICE_CLEANING); + SERVICE_CLEANING, SERVICE_MOUNTING); } static void service_init(Unit *u) { @@ -505,6 +510,8 @@ static void service_done(Unit *u) { service_release_socket_fd(s); service_release_stdio_fd(s); service_release_fd_store(s); + + s->mount_request = sd_bus_message_unref(s->mount_request); } static int on_fd_store_io(sd_event_source *e, int fd, uint32_t revents, void *userdata) { @@ -944,6 +951,7 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) { "%sResult: %s\n" "%sReload Result: %s\n" "%sClean Result: %s\n" + "%sMount Result: %s\n" "%sPermissionsStartOnly: %s\n" "%sRootDirectoryStartOnly: %s\n" "%sRemainAfterExit: %s\n" @@ -958,6 +966,7 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) { prefix, service_result_to_string(s->result), prefix, service_result_to_string(s->reload_result), prefix, service_result_to_string(s->clean_result), + prefix, service_result_to_string(s->live_mount_result), prefix, yes_no(s->permissions_start_only), prefix, yes_no(s->root_directory_start_only), prefix, yes_no(s->remain_after_exit), @@ -1252,6 +1261,7 @@ static void service_set_state(Service *s, ServiceState state) { SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST, SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL, SERVICE_AUTO_RESTART, + SERVICE_MOUNTING, SERVICE_CLEANING)) s->timer_event_source = sd_event_source_disable_unref(s->timer_event_source); @@ -1277,7 +1287,7 @@ static void service_set_state(Service *s, ServiceState state) { if (state != SERVICE_START) s->exec_fd_event_source = sd_event_source_disable_unref(s->exec_fd_event_source); - if (!IN_SET(state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY)) + if (!IN_SET(state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY, SERVICE_MOUNTING)) service_stop_watchdog(s); if (state == SERVICE_EXITED && !MANAGER_IS_RELOADING(u->manager)) { @@ -1301,6 +1311,9 @@ static void service_set_state(Service *s, ServiceState state) { unit_destroy_runtime_data(u, &s->exec_context); } + if (state != SERVICE_MOUNTING) /* Just in case */ + s->mount_request = sd_bus_message_unref(s->mount_request); + if (old_state != state) log_unit_debug(u, "Changed %s -> %s", service_state_to_string(old_state), service_state_to_string(state)); @@ -1342,6 +1355,9 @@ static usec_t service_coldplug_timeout(Service *s) { case SERVICE_CLEANING: return usec_add(UNIT(s)->state_change_timestamp.monotonic, s->exec_context.timeout_clean_usec); + case SERVICE_MOUNTING: + return usec_add(UNIT(s)->state_change_timestamp.monotonic, s->timeout_start_usec); + default: return USEC_INFINITY; } @@ -1386,7 +1402,7 @@ static int service_coldplug(Unit *u) { (void) unit_setup_exec_runtime(u); } - if (IN_SET(s->deserialized_state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY)) + if (IN_SET(s->deserialized_state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY, SERVICE_MOUNTING)) service_start_watchdog(s); if (UNIT_ISSET(s->accept_socket)) { @@ -2853,6 +2869,32 @@ static int service_start(Unit *u) { return 1; } +static void service_mount_request_reply(Service *s, bool success, const char *error) { + assert(s); + assert(error); + + if (!s->mount_request) + return; + + if (success) { + (void) sd_bus_reply_method_return(s->mount_request, NULL); + log_unit_debug(UNIT(s), + "'%s' method succeeded", + strna(sd_bus_message_get_member(s->mount_request))); + } else { + (void) sd_bus_reply_method_errorf(s->mount_request, error, + "method '%s' for unit '%s' failed", + strna(sd_bus_message_get_member(s->mount_request)), + UNIT(s)->id); + log_unit_debug(UNIT(s), + "'%s' method failed: %s", + strna(sd_bus_message_get_member(s->mount_request)), + error); + } + + s->mount_request = sd_bus_message_unref(s->mount_request); +} + static int service_stop(Unit *u) { Service *s = ASSERT_PTR(SERVICE(u)); @@ -2877,6 +2919,10 @@ static int service_stop(Unit *u) { service_set_state(s, service_determine_dead_state(s)); return 0; + case SERVICE_MOUNTING: + service_kill_control_process(s); + service_mount_request_reply(s, /* success= */ false, BUS_ERROR_UNIT_INACTIVE); + _fallthrough_; case SERVICE_CONDITION: case SERVICE_START_PRE: case SERVICE_START: @@ -3860,6 +3906,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { case SERVICE_RELOAD: case SERVICE_RELOAD_SIGNAL: case SERVICE_RELOAD_NOTIFY: + case SERVICE_MOUNTING: /* If neither main nor control processes are running then the current * state can never exit cleanly, hence immediately terminate the * service. */ @@ -3974,7 +4021,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { success, code, status); - if (s->state != SERVICE_RELOAD && s->result == SERVICE_SUCCESS) + if (!IN_SET(s->state, SERVICE_RELOAD, SERVICE_MOUNTING) && s->result == SERVICE_SUCCESS) s->result = f; if (s->control_command && @@ -4114,6 +4161,14 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { service_enter_dead(s, SERVICE_SUCCESS, false); break; + case SERVICE_MOUNTING: + s->live_mount_result = f; + + service_mount_request_reply(s, f == SERVICE_SUCCESS, SD_BUS_ERROR_FAILED); + + service_enter_running(s, SERVICE_SUCCESS); + break; + default: assert_not_reached(); } @@ -4187,6 +4242,14 @@ static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *us service_enter_running(s, SERVICE_SUCCESS); break; + case SERVICE_MOUNTING: + log_unit_warning(UNIT(s), "Mount operation timed out. Killing mount process."); + service_kill_control_process(s); + s->live_mount_result = SERVICE_FAILURE_TIMEOUT; + service_mount_request_reply(s, /* success= */ false, SD_BUS_ERROR_TIMEOUT); + service_enter_running(s, SERVICE_SUCCESS); + break; + case SERVICE_STOP: switch (s->timeout_stop_failure_mode) { @@ -4743,7 +4806,8 @@ static bool pick_up_pid_from_bus_name(Service *s) { SERVICE_RUNNING, SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, - SERVICE_RELOAD_NOTIFY); + SERVICE_RELOAD_NOTIFY, + SERVICE_MOUNTING); } static int bus_name_pid_lookup_callback(sd_bus_message *reply, void *userdata, sd_bus_error *ret_error) { @@ -4892,6 +4956,7 @@ static void service_reset_failed(Unit *u) { s->result = SERVICE_SUCCESS; s->reload_result = SERVICE_SUCCESS; s->clean_result = SERVICE_SUCCESS; + s->live_mount_result = SERVICE_SUCCESS; s->n_restarts = 0; service_set_debug_invocation(s, /* enable= */ false); @@ -4928,6 +4993,7 @@ static bool service_needs_console(Unit *u) { SERVICE_RELOAD, SERVICE_RELOAD_SIGNAL, SERVICE_RELOAD_NOTIFY, + SERVICE_MOUNTING, SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, @@ -5035,6 +5101,168 @@ static int service_can_clean(Unit *u, ExecCleanMask *ret) { return 0; } +static int service_live_mount(Unit *u, + const char *src, + const char *dst, + sd_bus_message *message, + MountInNamespaceFlags flags, + const MountOptions *options, + sd_bus_error *error) { + + _cleanup_(pidref_done) PidRef worker = PIDREF_NULL; + Service *s = ASSERT_PTR(SERVICE(u)); + const char *propagate_directory; + int r; + + assert(u); + assert(u->manager); + assert(src); + assert(dst); + assert(message); + assert(!s->mount_request); + + if (s->state != SERVICE_RUNNING || !pidref_is_set(&s->main_pid)) { + log_unit_warning(u, "Service is not running, cannot live mount"); + return sd_bus_error_setf( + error, + BUS_ERROR_UNIT_INACTIVE, + "Live mounting '%s' on '%s' for unit '%s' cannot be scheduled: service not running", + src, + dst, + u->id); + } + + if (mount_point_is_credentials(u->manager->prefix[EXEC_DIRECTORY_RUNTIME], dst)) { + log_unit_warning(u, "Refusing to live mount over credential mount '%s'", dst); + return sd_bus_error_setf( + error, + SD_BUS_ERROR_INVALID_ARGS, + "Live mounting '%s' on '%s' for unit '%s' cannot be scheduled: cannot mount over credential mount", + src, + dst, + u->id); + } + + if (path_startswith_strv(dst, s->exec_context.inaccessible_paths)) { + log_unit_warning(u, "%s is not accessible to this unit, cannot live mount", dst); + return sd_bus_error_setf( + error, + SD_BUS_ERROR_INVALID_ARGS, + "Live mounting '%s' on '%s' for unit '%s' cannot be scheduled: destination is not accessible to this unit", + src, + dst, + u->id); + } + + service_unwatch_control_pid(s); + s->live_mount_result = SERVICE_SUCCESS; + s->control_command = NULL; + s->control_command_id = _SERVICE_EXEC_COMMAND_INVALID; + + r = service_arm_timer(s, /* relative= */ true, s->timeout_start_usec); + if (r < 0) { + log_unit_warning_errno(u, r, "Failed to install timer: %m"); + sd_bus_error_set_errnof( + error, + r, + "Live mounting '%s' on '%s' for unit '%s' cannot be scheduled: failed to install timer", + src, + dst, + u->id); + goto fail; + } + + propagate_directory = strjoina("/run/systemd/propagate/", u->id); + + /* Given we are running from PID1, avoid doing potentially heavy I/O operations like opening images + * directly, and instead fork a worker process. We record the D-Bus message, so that we can reply + * after the operation has finished. This way callers can wait on the message and know that the new + * resource is available (or the operation failed) once they receive the response. */ + r = unit_fork_helper_process(u, "(sd-mount-in-ns)", /* into_cgroup= */ false, &worker); + if (r < 0) { + log_unit_warning_errno( + u, + r, + "Failed to fork process to mount '%s' on '%s' in unit's namespace: %m", + src, + dst); + sd_bus_error_set_errnof( + error, + r, + "Live mounting '%s' on '%s' for unit '%s' cannot be scheduled: failed to fork process", + src, + dst, + u->id); + goto fail; + } + if (r == 0) { + if (flags & MOUNT_IN_NAMESPACE_IS_IMAGE) + r = mount_image_in_namespace( + &s->main_pid, + propagate_directory, + "/run/systemd/incoming/", + src, dst, + flags, + options, + s->exec_context.mount_image_policy ?: &image_policy_service); + else + r = bind_mount_in_namespace( + &s->main_pid, + propagate_directory, + "/run/systemd/incoming/", + src, dst, + flags); + if (r < 0) + log_unit_warning_errno( + u, + r, + "Failed to mount '%s' on '%s' in unit's namespace: %m", + src, + dst); + else + log_unit_debug(u, "Mounted '%s' on '%s' in unit's namespace", src, dst); + _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS); + } + + r = unit_watch_pidref(u, &worker, /* exclusive= */ true); + if (r < 0) { + sd_bus_error_set_errnof( + error, + r, + "Live mounting '%s' on '%s' for unit '%s' failed: failed to watch worker process", + src, + dst, + u->id); + goto fail; + } + + s->mount_request = sd_bus_message_ref(message); + s->control_pid = TAKE_PIDREF(worker); + service_set_state(s, SERVICE_MOUNTING); + return 0; + +fail: + s->live_mount_result = SERVICE_FAILURE_RESOURCES; + s->timer_event_source = sd_event_source_disable_unref(s->timer_event_source); + return r; +} + +static int service_can_live_mount(const Unit *u, sd_bus_error *error) { + assert(u); + + /* Ensure that the unit runs in a private mount namespace */ + if (!exec_needs_mount_namespace(unit_get_exec_context(u), /* params= */ NULL, unit_get_exec_runtime(u))) { + log_unit_debug(u, "Unit not running in private mount namespace, cannot live mount"); + return sd_bus_error_setf( + error, + SD_BUS_ERROR_INVALID_ARGS, + "Live mounting for unit '%s' cannot be scheduled: unit not running in private mount namespace", + u->id); + } + + return 0; +} + static const char* service_finished_job(Unit *u, JobType t, JobResult result) { Service *s = ASSERT_PTR(SERVICE(u)); @@ -5265,6 +5493,9 @@ const UnitVTable service_vtable = { .clean = service_clean, .can_clean = service_can_clean, + .live_mount = service_live_mount, + .can_live_mount = service_can_live_mount, + .freezer_action = unit_cgroup_freezer_action, .serialize = service_serialize, diff --git a/src/core/service.h b/src/core/service.h index 4d67174756f..6a0c4929922 100644 --- a/src/core/service.h +++ b/src/core/service.h @@ -188,6 +188,7 @@ struct Service { ServiceResult result; ServiceResult reload_result; ServiceResult clean_result; + ServiceResult live_mount_result; bool main_pid_known:1; bool main_pid_alien:1; @@ -232,6 +233,9 @@ struct Service { int reload_signal; usec_t reload_begin_usec; + + /* The D-Bus request, we will reply once the operation is finished, so that callers can block */ + sd_bus_message *mount_request; }; static inline usec_t service_timeout_abort_usec(Service *s) { diff --git a/src/core/socket.c b/src/core/socket.c index a1553bcc68b..0701780f038 100644 --- a/src/core/socket.c +++ b/src/core/socket.c @@ -1568,7 +1568,7 @@ static int socket_address_listen_in_cgroup( if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, pair) < 0) return log_unit_error_errno(UNIT(s), errno, "Failed to create communication channel: %m"); - r = unit_fork_helper_process(UNIT(s), "(sd-listen)", &pid); + r = unit_fork_helper_process(UNIT(s), "(sd-listen)", /* into_cgroup= */ true, &pid); if (r < 0) return log_unit_error_errno(UNIT(s), r, "Failed to fork off listener stub process: %m"); if (r == 0) { @@ -1989,7 +1989,7 @@ static int socket_chown(Socket *s, PidRef *ret_pid) { /* We have to resolve the user names out-of-process, hence * let's fork here. It's messy, but well, what can we do? */ - r = unit_fork_helper_process(UNIT(s), "(sd-chown)", &pid); + r = unit_fork_helper_process(UNIT(s), "(sd-chown)", /* into_cgroup= */ true, &pid); if (r < 0) return r; if (r == 0) { @@ -3013,7 +3013,7 @@ static int socket_accept_in_cgroup(Socket *s, SocketPort *p, int fd) { if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, pair) < 0) return log_unit_error_errno(UNIT(s), errno, "Failed to create communication channel: %m"); - r = unit_fork_helper_process(UNIT(s), "(sd-accept)", &pid); + r = unit_fork_helper_process(UNIT(s), "(sd-accept)", /* into_cgroup= */ true, &pid); if (r < 0) return log_unit_error_errno(UNIT(s), r, "Failed to fork off accept stub process: %m"); if (r == 0) { diff --git a/src/core/unit.c b/src/core/unit.c index 66203f27942..4468733a636 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -2561,7 +2561,7 @@ static bool unit_process_job(Job *j, UnitActiveState ns, bool reload_success) { if (j->state == JOB_RUNNING) { if (ns == UNIT_ACTIVE) job_finish_and_invalidate(j, reload_success ? JOB_DONE : JOB_FAILED, true, false); - else if (!IN_SET(ns, UNIT_ACTIVATING, UNIT_RELOADING)) { + else if (!IN_SET(ns, UNIT_ACTIVATING, UNIT_RELOADING, UNIT_REFRESHING)) { unexpected = true; if (UNIT_IS_INACTIVE_OR_FAILED(ns)) @@ -5412,21 +5412,25 @@ int unit_set_exec_params(Unit *u, ExecParameters *p) { return 0; } -int unit_fork_helper_process(Unit *u, const char *name, PidRef *ret) { +int unit_fork_helper_process(Unit *u, const char *name, bool into_cgroup, PidRef *ret) { + CGroupRuntime *crt = NULL; pid_t pid; int r; assert(u); assert(ret); - /* Forks off a helper process and makes sure it is a member of the unit's cgroup. Returns == 0 in the child, - * and > 0 in the parent. The pid parameter is always filled in with the child's PID. */ + /* Forks off a helper process and makes sure it is a member of the unit's cgroup, if configured to + * do so. Returns == 0 in the child, and > 0 in the parent. The pid parameter is always filled in + * with the child's PID. */ - (void) unit_realize_cgroup(u); + if (into_cgroup) { + (void) unit_realize_cgroup(u); - CGroupRuntime *crt = unit_setup_cgroup_runtime(u); - if (!crt) - return -ENOMEM; + crt = unit_setup_cgroup_runtime(u); + if (!crt) + return -ENOMEM; + } r = safe_fork(name, FORK_REOPEN_LOG|FORK_DEATHSIG_SIGTERM, &pid); if (r < 0) @@ -5450,7 +5454,7 @@ int unit_fork_helper_process(Unit *u, const char *name, PidRef *ret) { (void) default_signals(SIGNALS_CRASH_HANDLER, SIGNALS_IGNORE); (void) ignore_signals(SIGPIPE); - if (crt->cgroup_path) { + if (crt && crt->cgroup_path) { r = cg_attach_everywhere(u->manager->cgroup_supported, crt->cgroup_path, 0); if (r < 0) { log_unit_error_errno(u, r, "Failed to join unit cgroup %s: %m", empty_to_root(crt->cgroup_path)); @@ -5468,7 +5472,7 @@ int unit_fork_and_watch_rm_rf(Unit *u, char **paths, PidRef *ret_pid) { assert(u); assert(ret_pid); - r = unit_fork_helper_process(u, "(sd-rmrf)", &pid); + r = unit_fork_helper_process(u, "(sd-rmrf)", /* into_cgroup= */ true, &pid); if (r < 0) return r; if (r == 0) { @@ -6375,6 +6379,82 @@ Condition *unit_find_failed_condition(Unit *u) { return failed_trigger && !has_succeeded_trigger ? failed_trigger : NULL; } +int unit_can_live_mount(const Unit *u, sd_bus_error *error) { + assert(u); + + if (!UNIT_VTABLE(u)->live_mount) { + log_unit_debug(u, "Live mounting not supported for unit type '%s'", unit_type_to_string(u->type)); + return sd_bus_error_setf( + error, + SD_BUS_ERROR_INVALID_ARGS, + "Live mounting for unit '%s' cannot be scheduled: live mounting not supported for unit type '%s'", + u->id, + unit_type_to_string(u->type)); + } + + if (u->load_state != UNIT_LOADED) { + log_unit_debug(u, "Unit not loaded"); + return sd_bus_error_setf( + error, + BUS_ERROR_NO_SUCH_UNIT, + "Live mounting for unit '%s' cannot be scheduled: unit not loaded", + u->id); + } + + if (!UNIT_VTABLE(u)->can_live_mount) + return 0; + + return UNIT_VTABLE(u)->can_live_mount(u, error); +} + +int unit_live_mount( + Unit *u, + const char *src, + const char *dst, + sd_bus_message *message, + MountInNamespaceFlags flags, + const MountOptions *options, + sd_bus_error *error) { + + assert(u); + assert(UNIT_VTABLE(u)->live_mount); + + if (!UNIT_IS_ACTIVE_OR_RELOADING(unit_active_state(u))) { + log_unit_debug(u, "Unit not active"); + return sd_bus_error_setf( + error, + BUS_ERROR_UNIT_INACTIVE, + "Live mounting '%s' on '%s' for unit '%s' cannot be scheduled: unit not active", + src, + dst, + u->id); + } + + if (unit_active_state(u) == UNIT_REFRESHING) { + log_unit_debug(u, "Unit already live mounting"); + return sd_bus_error_setf( + error, + BUS_ERROR_UNIT_BUSY, + "Live mounting '%s' on '%s' for unit '%s' cannot be scheduled: another live mount in progress", + src, + dst, + u->id); + } + + if (u->job) { + log_unit_debug(u, "Unit already has a job in progress, cannot live mount"); + return sd_bus_error_setf( + error, + BUS_ERROR_UNIT_BUSY, + "Live mounting '%s' on '%s' for unit '%s' cannot be scheduled: another operation in progress", + src, + dst, + u->id); + } + + return UNIT_VTABLE(u)->live_mount(u, src, dst, message, flags, options, error); +} + static const char* const collect_mode_table[_COLLECT_MODE_MAX] = { [COLLECT_INACTIVE] = "inactive", [COLLECT_INACTIVE_OR_FAILED] = "inactive-or-failed", diff --git a/src/core/unit.h b/src/core/unit.h index ce713354385..8aac5f59992 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -22,6 +22,7 @@ typedef enum UnitMountDependencyType { #include "emergency-action.h" #include "install.h" #include "list.h" +#include "mount-util.h" #include "pidref.h" #include "unit-file.h" @@ -45,11 +46,11 @@ typedef enum CollectMode { } CollectMode; static inline bool UNIT_IS_ACTIVE_OR_RELOADING(UnitActiveState t) { - return IN_SET(t, UNIT_ACTIVE, UNIT_RELOADING); + return IN_SET(t, UNIT_ACTIVE, UNIT_RELOADING, UNIT_REFRESHING); } static inline bool UNIT_IS_ACTIVE_OR_ACTIVATING(UnitActiveState t) { - return IN_SET(t, UNIT_ACTIVE, UNIT_ACTIVATING, UNIT_RELOADING); + return IN_SET(t, UNIT_ACTIVE, UNIT_ACTIVATING, UNIT_RELOADING, UNIT_REFRESHING); } static inline bool UNIT_IS_INACTIVE_OR_DEACTIVATING(UnitActiveState t) { @@ -584,6 +585,10 @@ typedef struct UnitVTable { bool (*can_reload)(Unit *u); + /* Add a bind/image mount into the unit namespace while it is running. */ + int (*live_mount)(Unit *u, const char *src, const char *dst, sd_bus_message *message, MountInNamespaceFlags flags, const MountOptions *options, sd_bus_error *error); + int (*can_live_mount)(const Unit *u, sd_bus_error *error); + /* Serialize state and file descriptors that should be carried over into the new * instance after reexecution. */ int (*serialize)(Unit *u, FILE *f, FDSet *fds); @@ -980,7 +985,7 @@ int unit_acquire_invocation_id(Unit *u); int unit_set_exec_params(Unit *s, ExecParameters *p); -int unit_fork_helper_process(Unit *u, const char *name, PidRef *ret); +int unit_fork_helper_process(Unit *u, const char *name, bool into_cgroup, PidRef *ret); int unit_fork_and_watch_rm_rf(Unit *u, char **paths, PidRef *ret); void unit_remove_dependencies(Unit *u, UnitDependencyMask mask); @@ -1041,6 +1046,9 @@ void unit_next_freezer_state(Unit *u, FreezerAction action, FreezerState *ret_ne void unit_set_freezer_state(Unit *u, FreezerState state); void unit_freezer_complete(Unit *u, FreezerState kernel_state); +int unit_can_live_mount(const Unit *u, sd_bus_error *error); +int unit_live_mount(Unit *u, const char *src, const char *dst, sd_bus_message *message, MountInNamespaceFlags flags, const MountOptions *options, sd_bus_error *error); + Condition *unit_find_failed_condition(Unit *u); int unit_arm_timer(Unit *u, sd_event_source **source, bool relative, usec_t usec, sd_event_time_handler_t handler); diff --git a/src/machine/machine-dbus.c b/src/machine/machine-dbus.c index d4c6f1bfe39..366be8fbdeb 100644 --- a/src/machine/machine-dbus.c +++ b/src/machine/machine-dbus.c @@ -844,6 +844,7 @@ int bus_machine_method_bind_mount(sd_bus_message *message, void *userdata, sd_bu int read_only, make_file_or_directory; const char *dest, *src, *propagate_directory; Machine *m = ASSERT_PTR(userdata); + MountInNamespaceFlags flags = 0; uid_t uid; int r; @@ -889,14 +890,18 @@ int bus_machine_method_bind_mount(sd_bus_message *message, void *userdata, sd_bu if (uid != 0) return sd_bus_error_set(error, SD_BUS_ERROR_NOT_SUPPORTED, "Can't bind mount on container with user namespacing applied."); + if (read_only) + flags |= MOUNT_IN_NAMESPACE_READ_ONLY; + if (make_file_or_directory) + flags |= MOUNT_IN_NAMESPACE_MAKE_FILE_OR_DIRECTORY; + propagate_directory = strjoina("/run/systemd/nspawn/propagate/", m->name); r = bind_mount_in_namespace( &m->leader, propagate_directory, "/run/host/incoming/", src, dest, - read_only, - make_file_or_directory); + flags); if (r < 0) return sd_bus_error_set_errnof(error, r, "Failed to mount %s on %s in machine's namespace: %m", src, dest); diff --git a/src/shared/bus-wait-for-units.c b/src/shared/bus-wait-for-units.c index 6ccf822064f..f16fe51e6e1 100644 --- a/src/shared/bus-wait-for-units.c +++ b/src/shared/bus-wait-for-units.c @@ -24,6 +24,7 @@ typedef struct WaitForItem { char *active_state; uint32_t job_id; char *clean_result; + char *live_mount_result; } WaitForItem; typedef struct BusWaitForUnits { @@ -67,6 +68,7 @@ static WaitForItem *wait_for_item_free(WaitForItem *item) { free(item->bus_path); free(item->active_state); free(item->clean_result); + free(item->live_mount_result); return mfree(item); } @@ -178,6 +180,9 @@ static void wait_for_item_check_ready(WaitForItem *item) { if (item->clean_result && !streq(item->clean_result, "success")) d->has_failed = true; + if (item->live_mount_result && !streq(item->live_mount_result, "success")) + d->has_failed = true; + if (!item->active_state || streq(item->active_state, "maintenance")) return; } @@ -214,9 +219,10 @@ static int property_map_job_id( static int wait_for_item_parse_properties(WaitForItem *item, sd_bus_message *m) { static const struct bus_properties_map map[] = { - { "ActiveState", "s", NULL, offsetof(WaitForItem, active_state) }, - { "Job", "(uo)", property_map_job_id, offsetof(WaitForItem, job_id) }, - { "CleanResult", "s", NULL, offsetof(WaitForItem, clean_result) }, + { "ActiveState", "s", NULL, offsetof(WaitForItem, active_state) }, + { "Job", "(uo)", property_map_job_id, offsetof(WaitForItem, job_id) }, + { "CleanResult", "s", NULL, offsetof(WaitForItem, clean_result) }, + { "LiveMountResult", "s", NULL, offsetof(WaitForItem, live_mount_result) }, {} }; diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c index aa0b9b40ecf..3992afe8c7d 100644 --- a/src/shared/mount-util.c +++ b/src/shared/mount-util.c @@ -854,11 +854,9 @@ static int mount_in_namespace_legacy( int pidns_fd, int mntns_fd, int root_fd, - bool read_only, - bool make_file_or_directory, + MountInNamespaceFlags flags, const MountOptions *options, - const ImagePolicy *image_policy, - bool is_image) { + const ImagePolicy *image_policy) { _cleanup_close_pair_ int errno_pipe_fd[2] = EBADF_PAIR; char mount_slave[] = "/tmp/propagate.XXXXXX", *mount_tmp, *mount_outside, *p; @@ -877,7 +875,7 @@ static int mount_in_namespace_legacy( assert(pidns_fd >= 0); assert(mntns_fd >= 0); assert(root_fd >= 0); - assert(!options || is_image); + assert(!options || (flags & MOUNT_IN_NAMESPACE_IS_IMAGE)); p = strjoina(propagate_path, "/"); r = laccess(p, F_OK); @@ -910,7 +908,7 @@ static int mount_in_namespace_legacy( /* Second, we mount the source file or directory to a directory inside of our MS_SLAVE playground. */ mount_tmp = strjoina(mount_slave, "/mount"); - if (is_image) + if (flags & MOUNT_IN_NAMESPACE_IS_IMAGE) r = mkdir_p(mount_tmp, 0700); else r = make_mount_point_inode_from_stat(chased_src_st, mount_tmp, 0700); @@ -921,7 +919,7 @@ static int mount_in_namespace_legacy( mount_tmp_created = true; - if (is_image) + if (flags & MOUNT_IN_NAMESPACE_IS_IMAGE) r = verity_dissect_and_mount( chased_src_fd, chased_src_path, @@ -943,7 +941,7 @@ static int mount_in_namespace_legacy( mount_tmp_mounted = true; /* Third, we remount the new bind mount read-only if requested. */ - if (read_only) { + if (flags & MOUNT_IN_NAMESPACE_READ_ONLY) { r = mount_nofollow_verbose(LOG_DEBUG, NULL, mount_tmp, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL); if (r < 0) goto finish; @@ -953,7 +951,7 @@ static int mount_in_namespace_legacy( * right-away. */ mount_outside = strjoina(propagate_path, "/XXXXXX"); - if (is_image || S_ISDIR(chased_src_st->st_mode)) + if ((flags & MOUNT_IN_NAMESPACE_IS_IMAGE) || S_ISDIR(chased_src_st->st_mode)) r = mkdtemp(mount_outside) ? 0 : -errno; else { r = mkostemp_safe(mount_outside); @@ -973,7 +971,7 @@ static int mount_in_namespace_legacy( mount_outside_mounted = true; mount_tmp_mounted = false; - if (is_image || S_ISDIR(chased_src_st->st_mode)) + if ((flags & MOUNT_IN_NAMESPACE_IS_IMAGE) || S_ISDIR(chased_src_st->st_mode)) (void) rmdir(mount_tmp); else (void) unlink(mount_tmp); @@ -999,8 +997,8 @@ static int mount_in_namespace_legacy( errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]); - if (make_file_or_directory) { - if (!is_image) { + if (flags & MOUNT_IN_NAMESPACE_MAKE_FILE_OR_DIRECTORY) { + if (!(flags & MOUNT_IN_NAMESPACE_IS_IMAGE)) { (void) mkdir_parents(dest, 0755); (void) make_mount_point_inode_from_stat(chased_src_st, dest, 0700); } else @@ -1052,7 +1050,7 @@ finish: if (mount_outside_mounted) (void) umount_verbose(LOG_DEBUG, mount_outside, UMOUNT_NOFOLLOW); if (mount_outside_created) { - if (is_image || S_ISDIR(chased_src_st->st_mode)) + if ((flags & MOUNT_IN_NAMESPACE_IS_IMAGE) || S_ISDIR(chased_src_st->st_mode)) (void) rmdir(mount_outside); else (void) unlink(mount_outside); @@ -1061,7 +1059,7 @@ finish: if (mount_tmp_mounted) (void) umount_verbose(LOG_DEBUG, mount_tmp, UMOUNT_NOFOLLOW); if (mount_tmp_created) { - if (is_image || S_ISDIR(chased_src_st->st_mode)) + if ((flags & MOUNT_IN_NAMESPACE_IS_IMAGE) || S_ISDIR(chased_src_st->st_mode)) (void) rmdir(mount_tmp); else (void) unlink(mount_tmp); @@ -1081,9 +1079,7 @@ static int mount_in_namespace( const char *incoming_path, const char *src, const char *dest, - bool read_only, - bool make_file_or_directory, - bool is_image, + MountInNamespaceFlags flags, const MountOptions *options, const ImagePolicy *image_policy) { @@ -1096,7 +1092,7 @@ static int mount_in_namespace( assert(incoming_path); assert(src); assert(dest); - assert(is_image || (!options && !image_policy)); + assert((flags & MOUNT_IN_NAMESPACE_IS_IMAGE) || (!options && !image_policy)); if (!pidref_is_set(target)) return -ESRCH; @@ -1133,18 +1129,16 @@ static int mount_in_namespace( pidns_fd, mntns_fd, root_fd, - read_only, - make_file_or_directory, + flags, options, - image_policy, - is_image); + image_policy); _cleanup_(dissected_image_unrefp) DissectedImage *img = NULL; _cleanup_close_ int new_mount_fd = -EBADF; _cleanup_close_pair_ int errno_pipe_fd[2] = EBADF_PAIR; pid_t child; - if (is_image) { + if (flags & MOUNT_IN_NAMESPACE_IS_IMAGE) { r = verity_dissect_and_mount( chased_src_fd, chased_src_path, @@ -1173,7 +1167,7 @@ static int mount_in_namespace( "Failed to open mount source '%s': %m", chased_src_path); - if (read_only && mount_setattr(new_mount_fd, "", AT_EMPTY_PATH, + if ((flags & MOUNT_IN_NAMESPACE_READ_ONLY) && mount_setattr(new_mount_fd, "", AT_EMPTY_PATH, &(struct mount_attr) { .attr_set = MOUNT_ATTR_RDONLY, }, MOUNT_ATTR_SIZE_VER0) < 0) @@ -1201,7 +1195,7 @@ static int mount_in_namespace( if (r == 0) { errno_pipe_fd[0] = safe_close(errno_pipe_fd[0]); - if (make_file_or_directory) + if (flags & MOUNT_IN_NAMESPACE_MAKE_FILE_OR_DIRECTORY) (void) mkdir_parents(dest, 0755); if (img) { @@ -1209,10 +1203,10 @@ static int mount_in_namespace( DISSECT_IMAGE_TRY_ATOMIC_MOUNT_EXCHANGE | DISSECT_IMAGE_ALLOW_USERSPACE_VERITY; - if (make_file_or_directory) + if (flags & MOUNT_IN_NAMESPACE_MAKE_FILE_OR_DIRECTORY) f |= DISSECT_IMAGE_MKDIR; - if (read_only) + if (flags & MOUNT_IN_NAMESPACE_READ_ONLY) f |= DISSECT_IMAGE_READ_ONLY; r = dissected_image_mount( @@ -1223,7 +1217,7 @@ static int mount_in_namespace( /* userns_fd= */ -EBADF, f); } else { - if (make_file_or_directory) + if (flags & MOUNT_IN_NAMESPACE_MAKE_FILE_OR_DIRECTORY) (void) make_mount_point_inode_from_stat(&st, dest, 0700); r = mount_exchange_graceful(new_mount_fd, dest, /* mount_beneath= */ true); @@ -1259,17 +1253,14 @@ int bind_mount_in_namespace( const char *incoming_path, const char *src, const char *dest, - bool read_only, - bool make_file_or_directory) { + MountInNamespaceFlags flags) { return mount_in_namespace(target, propagate_path, incoming_path, src, dest, - read_only, - make_file_or_directory, - /* is_image = */ false, + flags & ~MOUNT_IN_NAMESPACE_IS_IMAGE, /* options = */ NULL, /* image_policy = */ NULL); } @@ -1280,8 +1271,7 @@ int mount_image_in_namespace( const char *incoming_path, const char *src, const char *dest, - bool read_only, - bool make_file_or_directory, + MountInNamespaceFlags flags, const MountOptions *options, const ImagePolicy *image_policy) { @@ -1290,9 +1280,7 @@ int mount_image_in_namespace( incoming_path, src, dest, - read_only, - make_file_or_directory, - /* is_image = */ true, + flags | MOUNT_IN_NAMESPACE_IS_IMAGE, options, image_policy); } diff --git a/src/shared/mount-util.h b/src/shared/mount-util.h index 8014eb48cff..069378cf4d2 100644 --- a/src/shared/mount-util.h +++ b/src/shared/mount-util.h @@ -106,22 +106,26 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(char*, umount_and_unlink_and_free); int mount_exchange_graceful(int fsmount_fd, const char *dest, bool mount_beneath); +typedef enum MountInNamespaceFlags { + MOUNT_IN_NAMESPACE_READ_ONLY = 1 << 0, + MOUNT_IN_NAMESPACE_MAKE_FILE_OR_DIRECTORY = 1 << 1, + MOUNT_IN_NAMESPACE_IS_IMAGE = 1 << 2, +} MountInNamespaceFlags; + int bind_mount_in_namespace( const PidRef *target, const char *propagate_path, const char *incoming_path, const char *src, const char *dest, - bool read_only, - bool make_file_or_directory); + MountInNamespaceFlags flags); int mount_image_in_namespace( const PidRef *target, const char *propagate_path, const char *incoming_path, const char *src, const char *dest, - bool read_only, - bool make_file_or_directory, + MountInNamespaceFlags flags, const MountOptions *options, const ImagePolicy *image_policy); diff --git a/src/systemctl/systemctl-is-active.c b/src/systemctl/systemctl-is-active.c index 596320a8c61..ae834f1071a 100644 --- a/src/systemctl/systemctl-is-active.c +++ b/src/systemctl/systemctl-is-active.c @@ -57,6 +57,7 @@ int verb_is_active(int argc, char *argv[], void *userdata) { static const UnitActiveState states[] = { UNIT_ACTIVE, UNIT_RELOADING, + UNIT_REFRESHING, }; /* According to LSB: 3, "program is not running" */ diff --git a/src/systemctl/systemctl-list-dependencies.c b/src/systemctl/systemctl-list-dependencies.c index 3df9b7abdff..8c1ebf9d0f7 100644 --- a/src/systemctl/systemctl-list-dependencies.c +++ b/src/systemctl/systemctl-list-dependencies.c @@ -22,6 +22,7 @@ static int list_dependencies_print(const char *name, UnitActiveState state, int switch (state) { case UNIT_ACTIVE: case UNIT_RELOADING: + case UNIT_REFRESHING: case UNIT_ACTIVATING: on = ansi_highlight_green(); break; diff --git a/src/systemctl/systemctl-list-units.c b/src/systemctl/systemctl-list-units.c index b4ccc8ebd76..a2f3074358e 100644 --- a/src/systemctl/systemctl-list-units.c +++ b/src/systemctl/systemctl-list-units.c @@ -149,7 +149,7 @@ static int output_units_list(const UnitInfo *unit_infos, size_t c) { /* Here override any load_state highlighting */ on_circle = ansi_highlight_red(); circle = true; - } else if (STR_IN_SET(u->active_state, "reloading", "activating", "maintenance", "deactivating")) { + } else if (STR_IN_SET(u->active_state, "reloading", "activating", "maintenance", "refreshing", "deactivating")) { on_sub = on_active = ansi_highlight(); if (!circle) { /* Here we let load_state highlighting win */ diff --git a/src/systemctl/systemctl-show.c b/src/systemctl/systemctl-show.c index 50f30d85658..2f39bc2b12f 100644 --- a/src/systemctl/systemctl-show.c +++ b/src/systemctl/systemctl-show.c @@ -301,7 +301,7 @@ static void format_active_state(const char *active_state, const char **active_on if (streq_ptr(active_state, "failed")) { *active_on = ansi_highlight_red(); *active_off = ansi_normal(); - } else if (STRPTR_IN_SET(active_state, "active", "reloading")) { + } else if (STRPTR_IN_SET(active_state, "active", "reloading", "refreshing")) { *active_on = ansi_highlight_green(); *active_off = ansi_normal(); } else @@ -440,10 +440,10 @@ static void print_status_info( if (!isempty(i->result) && !streq(i->result, "success")) printf(" (Result: %s)", i->result); - timestamp = STRPTR_IN_SET(i->active_state, "active", "reloading") ? i->active_enter_timestamp : - STRPTR_IN_SET(i->active_state, "inactive", "failed") ? i->inactive_enter_timestamp : - STRPTR_IN_SET(i->active_state, "activating") ? i->inactive_exit_timestamp : - i->active_exit_timestamp; + timestamp = STRPTR_IN_SET(i->active_state, "active", "reloading", "refreshing") ? i->active_enter_timestamp : + STRPTR_IN_SET(i->active_state, "inactive", "failed") ? i->inactive_enter_timestamp : + STRPTR_IN_SET(i->active_state, "activating") ? i->inactive_exit_timestamp : + i->active_exit_timestamp; if (timestamp_is_set(timestamp)) { printf(" since %s; %s\n", @@ -2199,7 +2199,7 @@ static int show_one( if (show_mode == SYSTEMCTL_SHOW_STATUS) { print_status_info(bus, &info, ellipsized); - if (info.active_state && !STR_IN_SET(info.active_state, "active", "reloading")) + if (info.active_state && !STR_IN_SET(info.active_state, "active", "reloading", "refreshing")) return EXIT_PROGRAM_NOT_RUNNING; return EXIT_PROGRAM_RUNNING_OR_SERVICE_OK; diff --git a/src/systemctl/systemctl-util.c b/src/systemctl/systemctl-util.c index f00b2d00229..848012de663 100644 --- a/src/systemctl/systemctl-util.c +++ b/src/systemctl/systemctl-util.c @@ -361,7 +361,7 @@ int get_active_triggering_units(sd_bus *bus, const char *unit, bool ignore_maske if (r < 0) return r; - if (!IN_SET(active_state, UNIT_ACTIVE, UNIT_RELOADING)) + if (!IN_SET(active_state, UNIT_ACTIVE, UNIT_RELOADING, UNIT_REFRESHING)) continue; r = strv_extend(&active, *i); diff --git a/test/units/TEST-23-UNIT-FILE.runtime-bind-paths.sh b/test/units/TEST-23-UNIT-FILE.runtime-bind-paths.sh index 3a78234cdc7..32adcf785f7 100755 --- a/test/units/TEST-23-UNIT-FILE.runtime-bind-paths.sh +++ b/test/units/TEST-23-UNIT-FILE.runtime-bind-paths.sh @@ -34,10 +34,14 @@ systemctl bind --mkdir TEST-23-UNIT-FILE-namespaced.service /run/TEST-23-UNIT-FI timeout 10 bash -xec 'while [[ "$(systemctl show -P SubState TEST-23-UNIT-FILE-namespaced.service)" == running ]]; do sleep .5; done' systemctl is-active TEST-23-UNIT-FILE-namespaced.service +test "$(busctl --json=short get-property org.freedesktop.systemd1 /org/freedesktop/systemd1/unit/TEST_2d23_2dUNIT_2dFILE_2dnamespaced_2eservice org.freedesktop.systemd1.Unit CanLiveMount)" = "{\"type\":\"b\",\"data\":true}" + # Now test that systemctl bind fails when attempted on a non-namespaced unit systemctl start TEST-23-UNIT-FILE-non-namespaced.service (! systemctl bind --mkdir TEST-23-UNIT-FILE-non-namespaced.service /run/TEST-23-UNIT-FILE-marker-runtime /tmp/testfile-marker-runtime) +test "$(busctl --json=short get-property org.freedesktop.systemd1 /org/freedesktop/systemd1/unit/TEST_2d23_2dUNIT_2dFILE_2dnon_2dnamespaced_2eservice org.freedesktop.systemd1.Unit CanLiveMount)" = "{\"type\":\"b\",\"data\":false}" + timeout 10 bash -xec 'while [[ "$(systemctl show -P SubState TEST-23-UNIT-FILE-non-namespaced.service)" == running ]]; do sleep .5; done' (! systemctl is-active TEST-23-UNIT-FILE-non-namespaced.service)