From 280b3781bd03a728e1e2f78ad85ac3df4b4b010b Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Mon, 22 Feb 2021 18:45:36 +0000 Subject: [PATCH 1/3] sysext: split parsing SYSTEMD_SYSEXT_HIERARCHIES in a common helper --- src/shared/extension-release.c | 15 +++++++++++++++ src/shared/extension-release.h | 3 +++ src/sysext/sysext.c | 8 +------- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/shared/extension-release.c b/src/shared/extension-release.c index 5eecf5a3236..5676e2c0630 100644 --- a/src/shared/extension-release.c +++ b/src/shared/extension-release.c @@ -77,3 +77,18 @@ int extension_release_validate( log_debug("Version info of extension '%s' matches host.", name); return 1; } + +int parse_env_extension_hierarchies(char ***ret_hierarchies) { + int r; + + r = getenv_path_list("SYSTEMD_SYSEXT_HIERARCHIES", ret_hierarchies); + if (r < 0) + return log_debug_errno(r, "Failed to parse SYSTEMD_SYSEXT_HIERARCHIES environment variable : %m"); + if (!*ret_hierarchies) { + *ret_hierarchies = strv_new("/usr", "/opt"); + if (!*ret_hierarchies) + return -ENOMEM; + } + + return 0; +} diff --git a/src/shared/extension-release.h b/src/shared/extension-release.h index 5c77010b0e1..d026a9b225b 100644 --- a/src/shared/extension-release.h +++ b/src/shared/extension-release.h @@ -10,3 +10,6 @@ int extension_release_validate( const char *host_os_release_version_id, const char *host_os_release_sysext_level, char **extension_release); + +/* Parse SYSTEMD_SYSEXT_HIERARCHIES and if not set, return "/usr /opt" */ +int parse_env_extension_hierarchies(char ***ret_hierarchies); diff --git a/src/sysext/sysext.c b/src/sysext/sysext.c index 60f4dee3530..f68eaa12dba 100644 --- a/src/sysext/sysext.c +++ b/src/sysext/sysext.c @@ -982,16 +982,10 @@ static int run(int argc, char *argv[]) { /* For debugging purposes it might make sense to do this for other hierarchies than /usr/ and * /opt/, but let's make that a hacker/debugging feature, i.e. env var instead of cmdline * switch. */ - r = getenv_path_list("SYSTEMD_SYSEXT_HIERARCHIES", &arg_hierarchies); + r = parse_env_extension_hierarchies(&arg_hierarchies); if (r < 0) return log_error_errno(r, "Failed to parse $SYSTEMD_SYSEXT_HIERARCHIES environment variable: %m"); - if (!arg_hierarchies) { - arg_hierarchies = strv_new("/usr", "/opt"); - if (!arg_hierarchies) - return log_oom(); - } - return sysext_main(argc, argv); } From 82fb2da21347b750b3de53cde588ee1189f7acb7 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Mon, 22 Feb 2021 12:12:54 +0000 Subject: [PATCH 2/3] core/namespace: reafactor applying mounts in a separate function The setup_namespace code to apply mounts is a big if block that keeps growing, so refactor it in a separate function. --- src/core/namespace.c | 204 +++++++++++++++++++++++-------------------- 1 file changed, 111 insertions(+), 93 deletions(-) diff --git a/src/core/namespace.c b/src/core/namespace.c index 285a87dfcdd..151fc913976 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -1003,7 +1003,7 @@ static int mount_run(const MountEntry *m) { return mount_tmpfs(m); } -static int mount_images(const MountEntry *m) { +static int mount_image(const MountEntry *m) { int r; assert(m); @@ -1049,7 +1049,7 @@ static int follow_symlink( return 0; } -static int apply_mount( +static int apply_one_mount( const char *root_directory, MountEntry *m, const NamespaceInfo *ns_info) { @@ -1173,7 +1173,7 @@ static int apply_mount( return mount_run(m); case MOUNT_IMAGES: - return mount_images(m); + return mount_image(m); default: assert_not_reached("Unknown mode"); @@ -1378,6 +1378,110 @@ static void normalize_mounts(const char *root_directory, MountEntry *mounts, siz drop_nop(mounts, n_mounts); } +static int apply_mounts( + const char *root, + const NamespaceInfo *ns_info, + MountEntry *mounts, + size_t *n_mounts, + char **error_path) { + + _cleanup_fclose_ FILE *proc_self_mountinfo = NULL; + _cleanup_free_ char **deny_list = NULL; + size_t j; + int r; + + if (n_mounts == 0) /* Shortcut: nothing to do */ + return 0; + + assert(root); + assert(mounts); + assert(n_mounts); + + /* Open /proc/self/mountinfo now as it may become unavailable if we mount anything on top of + * /proc. For example, this is the case with the option: 'InaccessiblePaths=/proc'. */ + proc_self_mountinfo = fopen("/proc/self/mountinfo", "re"); + if (!proc_self_mountinfo) { + if (error_path) + *error_path = strdup("/proc/self/mountinfo"); + return log_debug_errno(errno, "Failed to open /proc/self/mountinfo: %m"); + } + + /* First round, establish all mounts we need */ + for (;;) { + bool again = false; + + for (MountEntry *m = mounts; m < mounts + *n_mounts; ++m) { + + if (m->applied) + continue; + + r = follow_symlink(root, m); + if (r < 0) { + if (error_path && mount_entry_path(m)) + *error_path = strdup(mount_entry_path(m)); + return r; + } + if (r == 0) { + /* We hit a symlinked mount point. The entry got rewritten and might + * point to a very different place now. Let's normalize the changed + * list, and start from the beginning. After all to mount the entry + * at the new location we might need some other mounts first */ + again = true; + break; + } + + r = apply_one_mount(root, m, ns_info); + if (r < 0) { + if (error_path && mount_entry_path(m)) + *error_path = strdup(mount_entry_path(m)); + return r; + } + + m->applied = true; + } + + if (!again) + break; + + normalize_mounts(root, mounts, n_mounts); + } + + /* Create a deny list we can pass to bind_mount_recursive() */ + deny_list = new(char*, (*n_mounts)+1); + if (!deny_list) + return -ENOMEM; + for (j = 0; j < *n_mounts; j++) + deny_list[j] = (char*) mount_entry_path(mounts+j); + deny_list[j] = NULL; + + /* Second round, flip the ro bits if necessary. */ + for (MountEntry *m = mounts; m < mounts + *n_mounts; ++m) { + r = make_read_only(m, deny_list, proc_self_mountinfo); + if (r < 0) { + if (error_path && mount_entry_path(m)) + *error_path = strdup(mount_entry_path(m)); + return r; + } + } + + /* Third round, flip the noexec bits with a simplified deny list. */ + for (j = 0; j < *n_mounts; j++) + if (IN_SET((mounts+j)->mode, EXEC, NOEXEC)) + deny_list[j] = (char*) mount_entry_path(mounts+j); + deny_list[j] = NULL; + + for (MountEntry *m = mounts; m < mounts + *n_mounts; ++m) { + r = make_noexec(m, deny_list, proc_self_mountinfo); + if (r < 0) { + if (error_path && mount_entry_path(m)) + *error_path = strdup(mount_entry_path(m)); + return r; + } + } + + return 1; +} + static bool root_read_only( char **read_only_paths, ProtectSystem protect_system) { @@ -1894,96 +1998,10 @@ int setup_namespace( if (root_image || root_directory) (void) base_filesystem_create(root, UID_INVALID, GID_INVALID); - if (n_mounts > 0) { - _cleanup_fclose_ FILE *proc_self_mountinfo = NULL; - _cleanup_free_ char **deny_list = NULL; - size_t j; - - /* Open /proc/self/mountinfo now as it may become unavailable if we mount anything on top of - * /proc. For example, this is the case with the option: 'InaccessiblePaths=/proc'. */ - proc_self_mountinfo = fopen("/proc/self/mountinfo", "re"); - if (!proc_self_mountinfo) { - r = log_debug_errno(errno, "Failed to open /proc/self/mountinfo: %m"); - if (error_path) - *error_path = strdup("/proc/self/mountinfo"); - goto finish; - } - - /* First round, establish all mounts we need */ - for (;;) { - bool again = false; - - for (m = mounts; m < mounts + n_mounts; ++m) { - - if (m->applied) - continue; - - r = follow_symlink(root, m); - if (r < 0) { - if (error_path && mount_entry_path(m)) - *error_path = strdup(mount_entry_path(m)); - goto finish; - } - if (r == 0) { - /* We hit a symlinked mount point. The entry got rewritten and might - * point to a very different place now. Let's normalize the changed - * list, and start from the beginning. After all to mount the entry - * at the new location we might need some other mounts first */ - again = true; - break; - } - - r = apply_mount(root, m, ns_info); - if (r < 0) { - if (error_path && mount_entry_path(m)) - *error_path = strdup(mount_entry_path(m)); - goto finish; - } - - m->applied = true; - } - - if (!again) - break; - - normalize_mounts(root, mounts, &n_mounts); - } - - /* Create a deny list we can pass to bind_mount_recursive() */ - deny_list = new(char*, n_mounts+1); - if (!deny_list) { - r = -ENOMEM; - goto finish; - } - for (j = 0; j < n_mounts; j++) - deny_list[j] = (char*) mount_entry_path(mounts+j); - deny_list[j] = NULL; - - /* Second round, flip the ro bits if necessary. */ - for (m = mounts; m < mounts + n_mounts; ++m) { - r = make_read_only(m, deny_list, proc_self_mountinfo); - if (r < 0) { - if (error_path && mount_entry_path(m)) - *error_path = strdup(mount_entry_path(m)); - goto finish; - } - } - - /* Third round, flip the noexec bits with a simplified deny list. */ - for (m = mounts, j = 0; m < mounts + n_mounts; ++m) - if (IN_SET(m->mode, EXEC, NOEXEC)) - deny_list[j++] = (char*) mount_entry_path(m); - deny_list[j] = NULL; - - for (m = mounts; m < mounts + n_mounts; ++m) { - r = make_noexec(m, deny_list, proc_self_mountinfo); - if (r < 0) { - if (error_path && mount_entry_path(m)) - *error_path = strdup(mount_entry_path(m)); - goto finish; - } - } - } + /* Now make the magic happen */ + r = apply_mounts(root, ns_info, mounts, &n_mounts, error_path); + if (r < 0) + goto finish; /* MS_MOVE does not work on MS_SHARED so the remount MS_SHARED will be done later */ r = mount_move_root(root); From 93f597013a82298c5922f2f06de98be22b635e7b Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Mon, 22 Feb 2021 12:20:33 +0000 Subject: [PATCH 3/3] Add ExtensionImages directive to form overlays Add support for overlaying images for services on top of their root fs, using a read-only overlay. --- man/org.freedesktop.systemd1.xml | 83 +++------ man/systemd.exec.xml | 42 +++++ src/core/dbus-execute.c | 145 +++++++++++++++ src/core/execute.c | 19 ++ src/core/execute.h | 2 + src/core/load-fragment-gperf.gperf.m4 | 1 + src/core/load-fragment.c | 142 ++++++++++++++ src/core/load-fragment.h | 1 + src/core/namespace.c | 196 +++++++++++++++++++- src/core/namespace.h | 12 +- src/shared/bus-unit-util.c | 104 +++++++++++ src/shared/dissect-image.c | 34 +++- src/shared/dissect-image.h | 2 +- src/shared/mount-util.c | 2 +- src/test/test-namespace.c | 2 + src/test/test-ns.c | 2 + test/TEST-50-DISSECT/test.sh | 3 + test/fuzz/fuzz-unit-file/directives.service | 1 + test/test-functions | 50 ++++- test/units/testsuite-50.sh | 21 +++ 20 files changed, 794 insertions(+), 70 deletions(-) diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml index 8c370ba9a45..21630478962 100644 --- a/man/org.freedesktop.systemd1.xml +++ b/man/org.freedesktop.systemd1.xml @@ -2565,6 +2565,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s RootVerity = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly a(sba(ss)) ExtensionImages = [...]; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly a(ssba(ss)) MountImages = [...]; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly i OOMScoreAdjust = ...; @@ -3070,24 +3072,10 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { - - - - - - - - - - - - - - @@ -3656,6 +3644,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + @@ -3978,6 +3968,17 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { ControlGroup indicates the control group path the processes of this service unit are placed in. + + The following properties map 1:1 to corresponding settings in the unit file: + RootDirectory + RootImage + RootImageOptions + RootVerity + RootHash + RootHashSignature + MountImages + ExtensionImages + see systemd.exec(5) for their meaning. @@ -4325,6 +4326,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s RootVerity = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly a(sba(ss)) ExtensionImages = [...]; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly a(ssba(ss)) MountImages = [...]; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly i OOMScoreAdjust = ...; @@ -4858,24 +4861,10 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { - - - - - - - - - - - - - - @@ -5442,6 +5431,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2esocket { + + @@ -6024,6 +6015,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s RootVerity = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly a(sba(ss)) ExtensionImages = [...]; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly a(ssba(ss)) MountImages = [...]; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly i OOMScoreAdjust = ...; @@ -6485,24 +6478,10 @@ node /org/freedesktop/systemd1/unit/home_2emount { - - - - - - - - - - - - - - @@ -6987,6 +6966,8 @@ node /org/freedesktop/systemd1/unit/home_2emount { + + @@ -7690,6 +7671,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s RootVerity = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly a(sba(ss)) ExtensionImages = [...]; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly a(ssba(ss)) MountImages = [...]; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly i OOMScoreAdjust = ...; @@ -8137,24 +8120,10 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { - - - - - - - - - - - - - - @@ -8625,6 +8594,8 @@ node /org/freedesktop/systemd1/unit/dev_2dsda3_2eswap { + + diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index 1ebce6188e4..bac8f9cdff9 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -433,6 +433,48 @@ + + + ExtensionImages= + + This setting is similar to MountImages= in that it mounts a file + system hierarchy from a block device node or loopback file, but instead of providing a destination path, + an overlay will be set up. This option expects a whitespace separated list of mount definitions. Each + definition consists of a source path, optionally followed by a colon and a list of mount options. + + A read-only OverlayFS will be set up on top of /usr/ and + /opt/ hierarchies from the root. The order in which the images are listed + will determine the order in which the overlay is laid down: images specified first to last will result + in overlayfs layers bottom to top. + + Mount options may be defined as a single comma-separated list of options, in which case they + will be implicitly applied to the root partition on the image, or a series of colon-separated tuples + of partition name and mount options. Valid partition names and mount options are the same as for + RootImageOptions= setting described above. + + Each mount definition may be prefixed with -, in which case it will be + ignored when its source path does not exist. The source argument is a path to a block device node or + regular file. If the source path contains a :, it needs to be escaped as + \:. The device node or file system image file needs to follow the same rules as + specified for RootImage=. Any mounts created with this option are specific to the + unit, and are not visible in the host's mount table. + + These settings may be used more than once, each usage appends to the unit's list of image + paths. If the empty string is assigned, the entire list of mount paths defined prior to this is + reset. + + When DevicePolicy= is set to closed or + strict, or set to auto and DeviceAllow= is + set, then this setting adds /dev/loop-control with rw mode, + block-loop and block-blkext with rwm mode + to DeviceAllow=. See + systemd.resource-control5 + for the details about DevicePolicy= or DeviceAllow=. Also, see + PrivateDevices= below, as it may change the setting of + DevicePolicy=. + + + diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 1f0e27a1417..a4817ca6de7 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -996,6 +996,60 @@ static int property_get_mount_images( return sd_bus_message_close_container(reply); } +static int property_get_extension_images( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + ExecContext *c = userdata; + int r; + + assert(bus); + assert(c); + assert(property); + assert(reply); + + r = sd_bus_message_open_container(reply, 'a', "(sba(ss))"); + if (r < 0) + return r; + + for (size_t i = 0; i < c->n_extension_images; i++) { + MountOptions *m; + + r = sd_bus_message_open_container(reply, SD_BUS_TYPE_STRUCT, "sba(ss)"); + if (r < 0) + return r; + r = sd_bus_message_append( + reply, "sb", + c->extension_images[i].source, + c->extension_images[i].ignore_enoent); + if (r < 0) + return r; + r = sd_bus_message_open_container(reply, 'a', "(ss)"); + if (r < 0) + return r; + LIST_FOREACH(mount_options, m, c->extension_images[i].mount_options) { + r = sd_bus_message_append(reply, "(ss)", + partition_designator_to_string(m->partition_designator), + m->options); + if (r < 0) + return r; + } + r = sd_bus_message_close_container(reply); + if (r < 0) + return r; + r = sd_bus_message_close_container(reply); + if (r < 0) + return r; + } + + return sd_bus_message_close_container(reply); +} + const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_VTABLE_START(0), SD_BUS_PROPERTY("Environment", "as", NULL, offsetof(ExecContext, environment), SD_BUS_VTABLE_PROPERTY_CONST), @@ -1044,6 +1098,7 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("RootHashSignature", "ay", property_get_root_hash_sig, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RootHashSignaturePath", "s", NULL, offsetof(ExecContext, root_hash_sig_path), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("RootVerity", "s", NULL, offsetof(ExecContext, root_verity), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("ExtensionImages", "a(sba(ss))", property_get_extension_images, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("MountImages", "a(ssba(ss))", property_get_mount_images, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("OOMScoreAdjust", "i", property_get_oom_score_adjust, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("CoredumpFilter", "t", property_get_coredump_filter, 0, SD_BUS_VTABLE_PROPERTY_CONST), @@ -3356,6 +3411,7 @@ int bus_exec_context_set_transient_property( .destination = destination, .mount_options = options, .ignore_enoent = permissive, + .type = MOUNT_IMAGE_DISCRETE, }); if (r < 0) return r; @@ -3389,6 +3445,95 @@ int bus_exec_context_set_transient_property( mount_images = mount_image_free_many(mount_images, &n_mount_images); + return 1; + } else if (streq(name, "ExtensionImages")) { + _cleanup_free_ char *format_str = NULL; + MountImage *extension_images = NULL; + size_t n_extension_images = 0; + + r = sd_bus_message_enter_container(message, 'a', "(sba(ss))"); + if (r < 0) + return r; + + for (;;) { + _cleanup_(mount_options_free_allp) MountOptions *options = NULL; + _cleanup_free_ char *source_escaped = NULL; + char *source, *tuple; + int permissive; + + r = sd_bus_message_enter_container(message, 'r', "sba(ss)"); + if (r < 0) + return r; + + r = sd_bus_message_read(message, "sb", &source, &permissive); + if (r <= 0) + break; + + if (!path_is_absolute(source)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Source path %s is not absolute.", source); + if (!path_is_normalized(source)) + return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Source path %s is not normalized.", source); + + /* Need to store them in the unit with the escapes, so that they can be parsed again */ + source_escaped = shell_escape(source, ":"); + if (!source_escaped) + return -ENOMEM; + + tuple = strjoin(format_str, + format_str ? " " : "", + permissive ? "-" : "", + source_escaped); + if (!tuple) + return -ENOMEM; + free_and_replace(format_str, tuple); + + r = bus_read_mount_options(message, error, &options, &format_str, ":"); + if (r < 0) + return r; + + r = sd_bus_message_exit_container(message); + if (r < 0) + return r; + + r = mount_image_add(&extension_images, &n_extension_images, + &(MountImage) { + .source = source, + .mount_options = options, + .ignore_enoent = permissive, + .type = MOUNT_IMAGE_EXTENSION, + }); + if (r < 0) + return r; + } + if (r < 0) + return r; + + r = sd_bus_message_exit_container(message); + if (r < 0) + return r; + + if (!UNIT_WRITE_FLAGS_NOOP(flags)) { + if (n_extension_images == 0) { + c->extension_images = mount_image_free_many(c->extension_images, &c->n_extension_images); + + unit_write_settingf(u, flags, name, "%s=", name); + } else { + for (size_t i = 0; i < n_extension_images; ++i) { + r = mount_image_add(&c->extension_images, &c->n_extension_images, &extension_images[i]); + if (r < 0) + return r; + } + + unit_write_settingf(u, flags|UNIT_ESCAPE_C|UNIT_ESCAPE_SPECIFIERS, + name, + "%s=%s", + name, + format_str); + } + } + + extension_images = mount_image_free_many(extension_images, &n_extension_images); + return 1; } diff --git a/src/core/execute.c b/src/core/execute.c index d27adbbba54..60d107477b1 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -2018,6 +2018,9 @@ bool exec_needs_mount_namespace( if (context->n_mount_images > 0) return true; + if (context->n_extension_images > 0) + return true; + if (!IN_SET(context->mount_flags, 0, MS_SHARED)) return true; @@ -3230,6 +3233,8 @@ static int apply_mount_namespace( context->root_hash, context->root_hash_size, context->root_hash_path, context->root_hash_sig, context->root_hash_sig_size, context->root_hash_sig_path, context->root_verity, + context->extension_images, + context->n_extension_images, propagate_dir, incoming_dir, root_dir || root_image ? params->notify_socket : NULL, @@ -4816,6 +4821,7 @@ void exec_context_done(ExecContext *c) { c->root_hash_sig_size = 0; c->root_hash_sig_path = mfree(c->root_hash_sig_path); c->root_verity = mfree(c->root_verity); + c->extension_images = mount_image_free_many(c->extension_images, &c->n_extension_images); c->tty_path = mfree(c->tty_path); c->syslog_identifier = mfree(c->syslog_identifier); c->user = mfree(c->user); @@ -5658,6 +5664,19 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) { strempty(o->options)); fprintf(f, "\n"); } + + for (size_t i = 0; i < c->n_extension_images; i++) { + MountOptions *o; + + fprintf(f, "%sExtensionImages: %s%s", prefix, + c->extension_images[i].ignore_enoent ? "-": "", + c->extension_images[i].source); + LIST_FOREACH(mount_options, o, c->extension_images[i].mount_options) + fprintf(f, ":%s:%s", + partition_designator_to_string(o->partition_designator), + strempty(o->options)); + fprintf(f, "\n"); + } } bool exec_context_maintains_privileges(const ExecContext *c) { diff --git a/src/core/execute.h b/src/core/execute.h index cf0c8b868b1..20e1799b46a 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -251,6 +251,8 @@ struct ExecContext { size_t n_temporary_filesystems; MountImage *mount_images; size_t n_mount_images; + MountImage *extension_images; + size_t n_extension_images; uint64_t capability_bounding_set; uint64_t capability_ambient_set; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 6ed6b07db21..6a11ef0d9d7 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -28,6 +28,7 @@ $1.RootImageOptions, config_parse_root_image_options, $1.RootHash, config_parse_exec_root_hash, 0, offsetof($1, exec_context) $1.RootHashSignature, config_parse_exec_root_hash_sig, 0, offsetof($1, exec_context) $1.RootVerity, config_parse_unit_path_printf, true, offsetof($1, exec_context.root_verity) +$1.ExtensionImages, config_parse_extension_images, 0, offsetof($1, exec_context) $1.MountImages, config_parse_mount_images, 0, offsetof($1, exec_context) $1.User, config_parse_user_group_compat, 0, offsetof($1, exec_context.user) $1.Group, config_parse_user_group_compat, 0, offsetof($1, exec_context.group) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 3e7081bf609..c27814ad387 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -5117,6 +5117,148 @@ int config_parse_mount_images( .destination = dresolved, .mount_options = options, .ignore_enoent = permissive, + .type = MOUNT_IMAGE_DISCRETE, + }); + if (r < 0) + return log_oom(); + } +} + +int config_parse_extension_images( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + ExecContext *c = data; + const Unit *u = userdata; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + if (isempty(rvalue)) { + /* Empty assignment resets the list */ + c->extension_images = mount_image_free_many(c->extension_images, &c->n_extension_images); + return 0; + } + + for (const char *p = rvalue;;) { + _cleanup_free_ char *source = NULL, *tuple = NULL, *sresolved = NULL; + _cleanup_(mount_options_free_allp) MountOptions *options = NULL; + bool permissive = false; + const char *q = NULL; + char *s = NULL; + + r = extract_first_word(&p, &tuple, NULL, EXTRACT_UNQUOTE|EXTRACT_RETAIN_ESCAPE); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Invalid syntax %s=%s, ignoring: %m", lvalue, rvalue); + return 0; + } + if (r == 0) + return 0; + + q = tuple; + r = extract_first_word(&q, &source, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Invalid syntax in %s=, ignoring: %s", lvalue, tuple); + return 0; + } + if (r == 0) + continue; + + s = source; + if (s[0] == '-') { + permissive = true; + s++; + } + + r = unit_full_printf(u, s, &sresolved); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to resolve unit specifiers in \"%s\", ignoring: %m", s); + continue; + } + + r = path_simplify_and_warn(sresolved, PATH_CHECK_ABSOLUTE, unit, filename, line, lvalue); + if (r < 0) + continue; + + for (;;) { + _cleanup_free_ char *partition = NULL, *mount_options = NULL, *mount_options_resolved = NULL; + MountOptions *o = NULL; + PartitionDesignator partition_designator; + + r = extract_many_words(&q, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS, &partition, &mount_options, NULL); + if (r == -ENOMEM) + return log_oom(); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", q); + return 0; + } + if (r == 0) + break; + /* Single set of options, applying to the root partition/single filesystem */ + if (r == 1) { + r = unit_full_printf(u, partition, &mount_options_resolved); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", partition); + continue; + } + + o = new(MountOptions, 1); + if (!o) + return log_oom(); + *o = (MountOptions) { + .partition_designator = PARTITION_ROOT, + .options = TAKE_PTR(mount_options_resolved), + }; + LIST_APPEND(mount_options, options, o); + + break; + } + + partition_designator = partition_designator_from_string(partition); + if (partition_designator < 0) { + log_syntax(unit, LOG_WARNING, filename, line, 0, "Invalid partition name %s, ignoring", partition); + continue; + } + r = unit_full_printf(u, mount_options, &mount_options_resolved); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, "Failed to resolve unit specifiers in %s, ignoring: %m", mount_options); + continue; + } + + o = new(MountOptions, 1); + if (!o) + return log_oom(); + *o = (MountOptions) { + .partition_designator = partition_designator, + .options = TAKE_PTR(mount_options_resolved), + }; + LIST_APPEND(mount_options, options, o); + } + + r = mount_image_add(&c->extension_images, &c->n_extension_images, + &(MountImage) { + .source = sresolved, + .mount_options = options, + .ignore_enoent = permissive, + .type = MOUNT_IMAGE_EXTENSION, }); if (r < 0) return log_oom(); diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index e4a5cb79869..b8a6d5feadc 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -138,6 +138,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_timeout_abort); CONFIG_PARSER_PROTOTYPE(config_parse_swap_priority); CONFIG_PARSER_PROTOTYPE(config_parse_mount_images); CONFIG_PARSER_PROTOTYPE(config_parse_socket_timestamping); +CONFIG_PARSER_PROTOTYPE(config_parse_extension_images); /* gperf prototypes */ const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, GPERF_LEN_TYPE length); diff --git a/src/core/namespace.c b/src/core/namespace.c index 151fc913976..ed07db5c731 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -11,6 +11,9 @@ #include "alloc-util.h" #include "base-filesystem.h" #include "dev-setup.h" +#include "env-util.h" +#include "escape.h" +#include "extension-release.h" #include "fd-util.h" #include "format-util.h" #include "fs-util.h" @@ -24,6 +27,7 @@ #include "namespace-util.h" #include "namespace.h" #include "nulstr-util.h" +#include "os-util.h" #include "path-util.h" #include "selinux-util.h" #include "socket-util.h" @@ -41,6 +45,7 @@ typedef enum MountMode { /* This is ordered by priority! */ INACCESSIBLE, + OVERLAY_MOUNT, MOUNT_IMAGES, BIND_MOUNT, BIND_MOUNT_RECURSIVE, @@ -57,6 +62,7 @@ typedef enum MountMode { NOEXEC, EXEC, TMPFS, + EXTENSION_IMAGES, /* Mounted outside the root directory, and used by subsequent mounts */ READWRITE_IMPLICIT, /* Should have the lowest priority. */ _MOUNT_MODE_MAX, } MountMode; @@ -205,6 +211,7 @@ static const MountEntry protect_system_strict_table[] = { static const char * const mount_mode_table[_MOUNT_MODE_MAX] = { [INACCESSIBLE] = "inaccessible", + [OVERLAY_MOUNT] = "overlay", [BIND_MOUNT] = "bind", [BIND_MOUNT_RECURSIVE] = "rbind", [PRIVATE_TMP] = "private-tmp", @@ -392,6 +399,101 @@ static int append_mount_images(MountEntry **p, const MountImage *mount_images, s return 0; } +static int append_extension_images( + MountEntry **p, + const char *root, + const char *extension_dir, + char **hierarchies, + const MountImage *mount_images, + size_t n) { + + _cleanup_strv_free_ char **overlays = NULL; + char **hierarchy; + int r; + + assert(p); + assert(extension_dir); + + if (n == 0) + return 0; + + /* Prepare a list of overlays, that will have as each element a string suitable for being + * passed as a lowerdir= parameter, so start with the hierachy on the root. + * The overlays vector will have the same number of elements and will correspond to the + * hierarchies vector, so they can be iterated upon together. */ + STRV_FOREACH(hierarchy, hierarchies) { + _cleanup_free_ char *prefixed_hierarchy = NULL; + + prefixed_hierarchy = path_join(root, *hierarchy); + if (!prefixed_hierarchy) + return -ENOMEM; + + r = strv_consume(&overlays, TAKE_PTR(prefixed_hierarchy)); + if (r < 0) + return r; + } + + /* First, prepare a mount for each image, but these won't be visible to the unit, instead + * they will be mounted in our propagate directory, and used as a source for the overlay. */ + for (size_t i = 0; i < n; i++) { + _cleanup_free_ char *mount_point = NULL; + const MountImage *m = mount_images + i; + + r = asprintf(&mount_point, "%s/%zu", extension_dir, i); + if (r < 0) + return -ENOMEM; + + for (size_t j = 0; hierarchies && hierarchies[j]; ++j) { + _cleanup_free_ char *prefixed_hierarchy = NULL, *escaped = NULL, *lowerdir = NULL; + + prefixed_hierarchy = path_join(mount_point, hierarchies[j]); + if (!prefixed_hierarchy) + return -ENOMEM; + + escaped = shell_escape(prefixed_hierarchy, ",:"); + if (!escaped) + return -ENOMEM; + + /* Note that lowerdir= parameters are in 'reverse' order, so the + * top-most directory in the overlay comes first in the list. */ + lowerdir = strjoin(escaped, ":", overlays[j]); + if (!lowerdir) + return -ENOMEM; + + free_and_replace(overlays[j], lowerdir); + } + + *((*p)++) = (MountEntry) { + .path_malloc = TAKE_PTR(mount_point), + .image_options = m->mount_options, + .ignore = m->ignore_enoent, + .source_const = m->source, + .mode = EXTENSION_IMAGES, + .has_prefix = true, + }; + } + + /* Then, for each hierarchy, prepare an overlay with the list of lowerdir= strings + * set up earlier. */ + for (size_t i = 0; hierarchies && hierarchies[i]; ++i) { + _cleanup_free_ char *prefixed_hierarchy = NULL; + + prefixed_hierarchy = path_join(root, hierarchies[i]); + if (!prefixed_hierarchy) + return -ENOMEM; + + *((*p)++) = (MountEntry) { + .path_malloc = TAKE_PTR(prefixed_hierarchy), + .options_malloc = TAKE_PTR(overlays[i]), + .mode = OVERLAY_MOUNT, + .has_prefix = true, + .ignore = true, /* If the source image doesn't set the ignore bit it will fail earlier. */ + }; + } + + return 0; +} + static int append_tmpfs_mounts(MountEntry **p, const TemporaryFileSystem *tmpfs, size_t n) { assert(p); @@ -494,6 +596,12 @@ static int append_protect_system(MountEntry **p, ProtectSystem protect_system, b static int mount_path_compare(const MountEntry *a, const MountEntry *b) { int d; + /* EXTENSION_IMAGES will be used by other mounts as a base, so sort them first + * regardless of the prefix - they are set up in the propagate directory anyway */ + d = -CMP(a->mode == EXTENSION_IMAGES, b->mode == EXTENSION_IMAGES); + if (d != 0) + return d; + /* If the paths are not equal, then order prefixes first */ d = path_compare(mount_entry_path(a), mount_entry_path(b)); if (d != 0) @@ -640,7 +748,8 @@ static void drop_outside_root(const char *root_directory, MountEntry *m, size_t for (f = m, t = m; f < m + *n; f++) { - if (!path_startswith(mount_entry_path(f), root_directory)) { + /* ExtensionImages bases are opened in /run/systemd/unit-extensions on the host */ + if (f->mode != EXTENSION_IMAGES && !path_startswith(mount_entry_path(f), root_directory)) { log_debug("%s is outside of root directory.", mount_entry_path(f)); mount_entry_done(f); continue; @@ -1003,12 +1112,28 @@ static int mount_run(const MountEntry *m) { return mount_tmpfs(m); } -static int mount_image(const MountEntry *m) { +static int mount_image(const MountEntry *m, const char *root_directory) { + + _cleanup_free_ char *host_os_release_id = NULL, *host_os_release_version_id = NULL, + *host_os_release_sysext_level = NULL; int r; assert(m); - r = verity_dissect_and_mount(mount_entry_source(m), mount_entry_path(m), m->image_options); + if (m->mode == EXTENSION_IMAGES) { + r = parse_os_release( + empty_to_root(root_directory), + "ID", &host_os_release_id, + "VERSION_ID", &host_os_release_version_id, + "SYSEXT_LEVEL", &host_os_release_sysext_level, + NULL); + if (r < 0) + return log_debug_errno(r, "Failed to acquire 'os-release' data of OS tree '%s': %m", empty_to_root(root_directory)); + } + + r = verity_dissect_and_mount( + mount_entry_source(m), mount_entry_path(m), m->image_options, + host_os_release_id, host_os_release_version_id, host_os_release_sysext_level); if (r == -ENOENT && m->ignore) return 0; if (r < 0) @@ -1017,6 +1142,25 @@ static int mount_image(const MountEntry *m) { return 1; } +static int mount_overlay(const MountEntry *m) { + const char *options; + int r; + + assert(m); + + options = strjoina("lowerdir=", mount_entry_options(m)); + + (void) mkdir_p_label(mount_entry_path(m), 0755); + + r = mount_nofollow_verbose(LOG_DEBUG, "overlay", mount_entry_path(m), "overlay", MS_RDONLY, options); + if (r == -ENOENT && m->ignore) + return 0; + if (r < 0) + return r; + + return 1; +} + static int follow_symlink( const char *root_directory, MountEntry *m) { @@ -1173,7 +1317,13 @@ static int apply_one_mount( return mount_run(m); case MOUNT_IMAGES: - return mount_image(m); + return mount_image(m, NULL); + + case EXTENSION_IMAGES: + return mount_image(m, root_directory); + + case OVERLAY_MOUNT: + return mount_overlay(m); default: assert_not_reached("Unknown mode"); @@ -1317,6 +1467,8 @@ static size_t namespace_calculate_mounts( size_t n_bind_mounts, size_t n_temporary_filesystems, size_t n_mount_images, + size_t n_extension_images, + size_t n_hierarchies, const char* tmp_dir, const char* var_tmp_dir, const char *creds_path, @@ -1350,6 +1502,7 @@ static size_t namespace_calculate_mounts( strv_length(empty_directories) + n_bind_mounts + n_mount_images + + (n_extension_images > 0 ? n_hierarchies + n_extension_images : 0) + /* Mount each image plus an overlay per hierarchy */ n_temporary_filesystems + ns_info->private_dev + (ns_info->protect_kernel_tunables ? ELEMENTSOF(protect_kernel_tunables_table) : 0) + @@ -1415,7 +1568,8 @@ static int apply_mounts( if (m->applied) continue; - r = follow_symlink(root, m); + /* ExtensionImages are first opened in the propagate directory, not in the root_directory */ + r = follow_symlink(m->mode != EXTENSION_IMAGES ? root : NULL, m); if (r < 0) { if (error_path && mount_entry_path(m)) *error_path = strdup(mount_entry_path(m)); @@ -1618,6 +1772,8 @@ int setup_namespace( size_t root_hash_sig_size, const char *root_hash_sig_path, const char *verity_data_path, + const MountImage *extension_images, + size_t n_extension_images, const char *propagate_dir, const char *incoming_dir, const char *notify_socket, @@ -1628,9 +1784,10 @@ int setup_namespace( _cleanup_(decrypted_image_unrefp) DecryptedImage *decrypted_image = NULL; _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL; _cleanup_(verity_settings_done) VeritySettings verity = VERITY_SETTINGS_DEFAULT; + _cleanup_strv_free_ char **hierarchies = NULL; MountEntry *m = NULL, *mounts = NULL; bool require_prefix = false, setup_propagate = false; - const char *root; + const char *root, *extension_dir = "/run/systemd/unit-extensions"; size_t n_mounts; int r; @@ -1711,6 +1868,12 @@ int setup_namespace( require_prefix = true; } + if (n_extension_images > 0) { + r = parse_env_extension_hierarchies(&hierarchies); + if (r < 0) + return r; + } + n_mounts = namespace_calculate_mounts( ns_info, read_write_paths, @@ -1722,6 +1885,8 @@ int setup_namespace( n_bind_mounts, n_temporary_filesystems, n_mount_images, + n_extension_images, + strv_length(hierarchies), tmp_dir, var_tmp_dir, creds_path, log_namespace, @@ -1789,6 +1954,10 @@ int setup_namespace( if (r < 0) goto finish; + r = append_extension_images(&m, root, extension_dir, hierarchies, extension_images, n_extension_images); + if (r < 0) + goto finish; + if (ns_info->private_dev) *(m++) = (MountEntry) { .path_const = "/dev", @@ -1948,6 +2117,12 @@ int setup_namespace( if (setup_propagate) (void) mkdir_p(propagate_dir, 0600); + if (n_extension_images > 0) { + /* ExtensionImages mountpoint directories will be created + * while parsing the mounts to create, so have the parent ready */ + (void) mkdir_p(extension_dir, 0600); + } + /* Remount / as SLAVE so that nothing now mounted in the namespace * shows up in the parent */ if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) { @@ -2114,9 +2289,11 @@ int mount_image_add(MountImage **m, size_t *n, const MountImage *item) { if (!s) return -ENOMEM; - d = strdup(item->destination); - if (!d) - return -ENOMEM; + if (item->destination) { + d = strdup(item->destination); + if (!d) + return -ENOMEM; + } LIST_FOREACH(mount_options, i, item->mount_options) { _cleanup_(mount_options_free_allp) MountOptions *o; @@ -2146,6 +2323,7 @@ int mount_image_add(MountImage **m, size_t *n, const MountImage *item) { .destination = TAKE_PTR(d), .mount_options = TAKE_PTR(options), .ignore_enoent = item->ignore_enoent, + .type = item->type, }; return 0; diff --git a/src/core/namespace.h b/src/core/namespace.h index 54d4985f80f..cb9d5a5d388 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -93,11 +93,19 @@ struct TemporaryFileSystem { char *options; }; +typedef enum MountImageType { + MOUNT_IMAGE_DISCRETE, + MOUNT_IMAGE_EXTENSION, + _MOUNT_IMAGE_TYPE_MAX, + _MOUNT_IMAGE_TYPE_INVALID = -EINVAL, +} MountImageType; + struct MountImage { char *source; - char *destination; + char *destination; /* Unused if MountImageType == MOUNT_IMAGE_EXTENSION */ LIST_HEAD(MountOptions, mount_options); bool ignore_enoent; + MountImageType type; }; int setup_namespace( @@ -129,6 +137,8 @@ int setup_namespace( size_t root_hash_sig_size, const char *root_hash_sig_path, const char *root_verity, + const MountImage *extension_images, + size_t n_extension_images, const char *propagate_dir, const char *incoming_dir, const char *notify_socket, diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index 83130db2fa1..eaec48fd5e2 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -1766,6 +1766,110 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con return 1; } + if (streq(field, "ExtensionImages")) { + const char *p = eq; + + r = sd_bus_message_open_container(m, SD_BUS_TYPE_STRUCT, "sv"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'v', "a(sba(ss))"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'a', "(sba(ss))"); + if (r < 0) + return bus_log_create_error(r); + + for (;;) { + _cleanup_free_ char *source = NULL, *tuple = NULL; + const char *q = NULL, *s = NULL; + bool permissive = false; + + r = extract_first_word(&p, &tuple, NULL, EXTRACT_UNQUOTE|EXTRACT_RETAIN_ESCAPE); + if (r < 0) + return r; + if (r == 0) + break; + + q = tuple; + r = extract_first_word(&q, &source, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS); + if (r < 0) + return r; + if (r == 0) + continue; + + s = source; + if (s[0] == '-') { + permissive = true; + s++; + } + + r = sd_bus_message_open_container(m, 'r', "sba(ss)"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append(m, "sb", s, permissive); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_open_container(m, 'a', "(ss)"); + if (r < 0) + return bus_log_create_error(r); + + for (;;) { + _cleanup_free_ char *partition = NULL, *mount_options = NULL; + + r = extract_many_words(&q, ":", EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS, &partition, &mount_options, NULL); + if (r < 0) + return r; + if (r == 0) + break; + /* Single set of options, applying to the root partition/single filesystem */ + if (r == 1) { + r = sd_bus_message_append(m, "(ss)", "root", partition); + if (r < 0) + return bus_log_create_error(r); + + break; + } + + if (partition_designator_from_string(partition) < 0) + return bus_log_create_error(-EINVAL); + + r = sd_bus_message_append(m, "(ss)", partition, mount_options); + if (r < 0) + return bus_log_create_error(r); + } + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + } + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_close_container(m); + if (r < 0) + return bus_log_create_error(r); + + return 1; + } + return 0; } diff --git a/src/shared/dissect-image.c b/src/shared/dissect-image.c index 791d747136e..aa4c01bf6c7 100644 --- a/src/shared/dissect-image.c +++ b/src/shared/dissect-image.c @@ -27,6 +27,7 @@ #include "dissect-image.h" #include "dm-util.h" #include "env-file.h" +#include "extension-release.h" #include "fd-util.h" #include "fileio.h" #include "fs-util.h" @@ -2621,7 +2622,14 @@ static const char *const partition_designator_table[] = { [PARTITION_VAR] = "var", }; -int verity_dissect_and_mount(const char *src, const char *dest, const MountOptions *options) { +int verity_dissect_and_mount( + const char *src, + const char *dest, + const MountOptions *options, + const char *required_host_os_release_id, + const char *required_host_os_release_version_id, + const char *required_host_os_release_sysext_level) { + _cleanup_(loop_device_unrefp) LoopDevice *loop_device = NULL; _cleanup_(decrypted_image_unrefp) DecryptedImage *decrypted_image = NULL; _cleanup_(dissected_image_unrefp) DissectedImage *dissected_image = NULL; @@ -2683,6 +2691,30 @@ int verity_dissect_and_mount(const char *src, const char *dest, const MountOptio if (r < 0) return log_debug_errno(r, "Failed to mount image: %m"); + /* If we got os-release values from the caller, then we need to match them with the image's + * extension-release.d/ content. Return -EINVAL if there's any mismatch. + * First, check the distro ID. If that matches, then check the new SYSEXT_LEVEL value if + * available, or else fallback to VERSION_ID. */ + if (required_host_os_release_id && + (required_host_os_release_version_id || required_host_os_release_sysext_level)) { + _cleanup_strv_free_ char **extension_release = NULL; + + r = load_extension_release_pairs(dest, dissected_image->image_name, &extension_release); + if (r < 0) + return log_debug_errno(r, "Failed to parse image %s extension-release metadata: %m", dissected_image->image_name); + + r = extension_release_validate( + dissected_image->image_name, + required_host_os_release_id, + required_host_os_release_version_id, + required_host_os_release_sysext_level, + extension_release); + if (r == 0) + return log_debug_errno(SYNTHETIC_ERRNO(ESTALE), "Image %s extension-release metadata does not match the root's", dissected_image->image_name); + if (r < 0) + return log_debug_errno(r, "Failed to compare image %s extension-release metadata with the root's os-release: %m", dissected_image->image_name); + } + if (decrypted_image) { r = decrypted_image_relinquish(decrypted_image); if (r < 0) diff --git a/src/shared/dissect-image.h b/src/shared/dissect-image.h index 89078500007..77e7c80c20a 100644 --- a/src/shared/dissect-image.h +++ b/src/shared/dissect-image.h @@ -164,4 +164,4 @@ bool dissected_image_has_verity(const DissectedImage *image, PartitionDesignator int mount_image_privately_interactively(const char *path, DissectImageFlags flags, char **ret_directory, LoopDevice **ret_loop_device, DecryptedImage **ret_decrypted_image); -int verity_dissect_and_mount(const char *src, const char *dest, const MountOptions *options); +int verity_dissect_and_mount(const char *src, const char *dest, const MountOptions *options, const char *required_host_os_release_id, const char *required_host_os_release_version_id, const char *required_host_os_release_sysext_level); diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c index 183a686706e..576e4054c29 100644 --- a/src/shared/mount-util.c +++ b/src/shared/mount-util.c @@ -855,7 +855,7 @@ static int mount_in_namespace( mount_tmp_created = true; if (is_image) - r = verity_dissect_and_mount(chased_src, mount_tmp, options); + r = verity_dissect_and_mount(chased_src, mount_tmp, options, NULL, NULL, NULL); else r = mount_follow_verbose(LOG_DEBUG, chased_src, mount_tmp, NULL, MS_BIND, NULL); if (r < 0) diff --git a/src/test/test-namespace.c b/src/test/test-namespace.c index b4db78492ea..b162928482c 100644 --- a/src/test/test-namespace.c +++ b/src/test/test-namespace.c @@ -175,6 +175,8 @@ static void test_protect_kernel_logs(void) { NULL, NULL, NULL, + 0, + NULL, NULL, NULL, 0, diff --git a/src/test/test-ns.c b/src/test/test-ns.c index 71ccfb88f42..761ee5da866 100644 --- a/src/test/test-ns.c +++ b/src/test/test-ns.c @@ -103,6 +103,8 @@ int main(int argc, char *argv[]) { NULL, NULL, NULL, + 0, + NULL, NULL, NULL, 0, diff --git a/test/TEST-50-DISSECT/test.sh b/test/TEST-50-DISSECT/test.sh index 2ff81bf4fa9..9d42e4891c1 100755 --- a/test/TEST-50-DISSECT/test.sh +++ b/test/TEST-50-DISSECT/test.sh @@ -9,6 +9,8 @@ TEST_INSTALL_VERITY_MINIMAL=1 . $TEST_BASE_DIR/test-functions +command -v mksquashfs >/dev/null 2>&1 || exit 0 +command -v veritysetup >/dev/null 2>&1 || exit 0 command -v sfdisk >/dev/null 2>&1 || exit 0 # Need loop devices for systemd-dissect @@ -17,6 +19,7 @@ test_append_files() { instmods loop =block instmods squashfs =squashfs instmods dm_verity =md + instmods overlay =overlayfs install_dmevent generate_module_dependencies inst_binary losetup diff --git a/test/fuzz/fuzz-unit-file/directives.service b/test/fuzz/fuzz-unit-file/directives.service index 0c7ded6786a..e2e8f61c67c 100644 --- a/test/fuzz/fuzz-unit-file/directives.service +++ b/test/fuzz/fuzz-unit-file/directives.service @@ -206,6 +206,7 @@ RootImage= RootHash= RootHashSignature= RootVerity= +ExtensionImages= RuntimeMaxSec= SELinuxContextFromNet= SecureBits= diff --git a/test/test-functions b/test/test-functions index 39df122ef23..b2a875ffce4 100644 --- a/test/test-functions +++ b/test/test-functions @@ -480,18 +480,20 @@ install_verity_minimal() { BASICTOOLS=( bash cat + grep mount sleep ) oldinitdir=$initdir rm -rfv $TESTDIR/minimal export initdir=$TESTDIR/minimal - mkdir -p $initdir/usr/lib/systemd/system $initdir/etc + mkdir -p $initdir/usr/lib/systemd/system $initdir/usr/lib/extension-release.d $initdir/etc $initdir/var/tmp $initdir/opt setup_basic_dirs install_basic_tools cp $os_release $initdir/usr/lib/os-release ln -s ../usr/lib/os-release $initdir/etc/os-release touch $initdir/etc/machine-id $initdir/etc/resolv.conf + touch $initdir/opt/some_file echo MARKER=1 >> $initdir/usr/lib/os-release echo -e "[Service]\nExecStartPre=cat /usr/lib/os-release\nExecStart=sleep 120" > $initdir/usr/lib/systemd/system/app0.service cp $initdir/usr/lib/systemd/system/app0.service $initdir/usr/lib/systemd/system/app0-foo.service @@ -507,6 +509,52 @@ install_verity_minimal() { mksquashfs $initdir $oldinitdir/usr/share/minimal_1.raw veritysetup format $oldinitdir/usr/share/minimal_1.raw $oldinitdir/usr/share/minimal_1.verity | \ grep '^Root hash:' | cut -f2 | tr -d '\n' > $oldinitdir/usr/share/minimal_1.roothash + + # Rolling distros like Arch do not set VERSION_ID + local version_id="" + if grep -q "^VERSION_ID=" $os_release; then + version_id="$(grep "^VERSION_ID=" $os_release)" + fi + + export initdir=$TESTDIR/app0 + mkdir -p $initdir/usr/lib/extension-release.d $initdir/usr/lib/systemd/system $initdir/opt + grep "^ID=" $os_release > $initdir/usr/lib/extension-release.d/extension-release.app0 + echo "${version_id}" >> $initdir/usr/lib/extension-release.d/extension-release.app0 + cat < $initdir/usr/lib/systemd/system/app0.service +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/opt/script0.sh +EOF + cat < $initdir/opt/script0.sh +#!/bin/bash +set -e +test -e /usr/lib/os-release +cat /usr/lib/extension-release.d/extension-release.app0 +EOF + chmod +x $initdir/opt/script0.sh + echo MARKER=1 > $initdir/usr/lib/systemd/system/some_file + mksquashfs $initdir $oldinitdir/usr/share/app0.raw + + export initdir=$TESTDIR/app1 + mkdir -p $initdir/usr/lib/extension-release.d $initdir/usr/lib/systemd/system $initdir/opt + grep "^ID=" $os_release > $initdir/usr/lib/extension-release.d/extension-release.app1 + echo "${version_id}" >> $initdir/usr/lib/extension-release.d/extension-release.app1 + cat < $initdir/usr/lib/systemd/system/app1.service +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/opt/script1.sh +EOF + cat < $initdir/opt/script1.sh +#!/bin/bash +set -e +test -e /usr/lib/os-release +cat /usr/lib/extension-release.d/extension-release.app1 +EOF + chmod +x $initdir/opt/script1.sh + echo MARKER=1 > $initdir/usr/lib/systemd/system/other_file + mksquashfs $initdir $oldinitdir/usr/share/app1.raw ) } diff --git a/test/units/testsuite-50.sh b/test/units/testsuite-50.sh index 1dd4b5dbd1c..f3781e6d15d 100755 --- a/test/units/testsuite-50.sh +++ b/test/units/testsuite-50.sh @@ -227,6 +227,27 @@ done systemctl is-active testservice-50d.service +# ExtensionImages will set up an overlay +systemd-run -t --property ExtensionImages=/usr/share/app0.raw --property RootImage=${image}.raw cat /opt/script0.sh | grep -q -F "extension-release.app0" +systemd-run -t --property ExtensionImages=/usr/share/app0.raw --property RootImage=${image}.raw cat /usr/lib/systemd/system/some_file | grep -q -F "MARKER=1" +systemd-run -t --property ExtensionImages="/usr/share/app0.raw /usr/share/app1.raw" --property RootImage=${image}.raw cat /opt/script0.sh | grep -q -F "extension-release.app0" +systemd-run -t --property ExtensionImages="/usr/share/app0.raw /usr/share/app1.raw" --property RootImage=${image}.raw cat /usr/lib/systemd/system/some_file | grep -q -F "MARKER=1" +systemd-run -t --property ExtensionImages="/usr/share/app0.raw /usr/share/app1.raw" --property RootImage=${image}.raw cat /opt/script1.sh | grep -q -F "extension-release.app1" +systemd-run -t --property ExtensionImages="/usr/share/app0.raw /usr/share/app1.raw" --property RootImage=${image}.raw cat /usr/lib/systemd/system/other_file | grep -q -F "MARKER=1" +cat >/run/systemd/system/testservice-50e.service </testok exit 0