From 1db98c0c18309f74805436467d3f46cea10f81db Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Wed, 18 Dec 2024 15:57:37 +0800 Subject: [PATCH 01/27] prepare-sysroot: Bind var under /run instead of inplace --- src/libostree/ostree-impl-system-generator.c | 46 +------------------- src/switchroot/ostree-prepare-root.c | 31 ++++++------- 2 files changed, 15 insertions(+), 62 deletions(-) diff --git a/src/libostree/ostree-impl-system-generator.c b/src/libostree/ostree-impl-system-generator.c index 6968c738..3fe1d67d 100644 --- a/src/libostree/ostree-impl-system-generator.c +++ b/src/libostree/ostree-impl-system-generator.c @@ -126,34 +126,6 @@ require_internal_units (const char *normal_dir, const char *early_dir, const cha #endif } -// Resolve symlink to return osname -static gboolean -_ostree_sysroot_parse_bootlink_aboot (const char *bootlink, char **out_osname, GError **error) -{ - static gsize regex_initialized; - static GRegex *regex; - g_autofree char *symlink_val = glnx_readlinkat_malloc (-1, bootlink, NULL, error); - if (!symlink_val) - return glnx_prefix_error (error, "Failed to read '%s' symlink", bootlink); - - if (g_once_init_enter (®ex_initialized)) - { - regex = g_regex_new ("^deploy/([^/]+)/", 0, 0, NULL); - g_assert (regex); - g_once_init_leave (®ex_initialized, 1); - } - - g_autoptr (GMatchInfo) match = NULL; - if (!g_regex_match (regex, symlink_val, 0, &match)) - return glnx_throw (error, - "Invalid aboot symlink in /ostree, expected symlink to resolve to " - "deploy/OSNAME/... instead it resolves to '%s'", - symlink_val); - - *out_osname = g_match_info_fetch (match, 1); - return TRUE; -} - /* Generate var.mount */ static gboolean fstab_generator (const char *ostree_target, const bool is_aboot, const char *normal_dir, @@ -166,20 +138,6 @@ fstab_generator (const char *ostree_target, const bool is_aboot, const char *nor static const char fstab_path[] = "/etc/fstab"; static const char var_path[] = "/var"; - /* Written by ostree-sysroot-deploy.c. We parse out the stateroot here since we - * need to know it to mount /var. Unfortunately we can't easily use the - * libostree API to find the booted deployment since /boot might not have been - * mounted yet. - */ - g_autofree char *stateroot = NULL; - if (is_aboot) - { - if (!_ostree_sysroot_parse_bootlink_aboot (ostree_target, &stateroot, error)) - return glnx_prefix_error (error, "Parsing aboot stateroot"); - } - else if (!_ostree_sysroot_parse_bootlink (ostree_target, NULL, &stateroot, NULL, NULL, error)) - return glnx_prefix_error (error, "Parsing stateroot"); - /* Load /etc/fstab if it exists, and look for a /var mount */ g_autoptr (OtLibMountFile) fstab = setmntent (fstab_path, "re"); gboolean found_var_mnt = FALSE; @@ -219,7 +177,7 @@ fstab_generator (const char *ostree_target, const bool is_aboot, const char *nor return FALSE; /* Generate our bind mount unit */ - const char *stateroot_var_path = glnx_strjoina ("/sysroot/ostree/deploy/", stateroot, "/var"); + const char *var_dir = OTCORE_RUN_OSTREE_PRIVATE "/var"; g_auto (GLnxTmpfile) tmpf = { 0, @@ -253,7 +211,7 @@ fstab_generator (const char *ostree_target, const bool is_aboot, const char *nor "Where=%s\n" "What=%s\n" "Options=bind,slave,shared\n", - var_path, stateroot_var_path)) + var_path, var_dir)) return FALSE; if (!g_output_stream_flush (outstream, cancellable, error)) return FALSE; diff --git a/src/switchroot/ostree-prepare-root.c b/src/switchroot/ostree-prepare-root.c index 8e161be7..b0518b08 100644 --- a/src/switchroot/ostree-prepare-root.c +++ b/src/switchroot/ostree-prepare-root.c @@ -393,11 +393,8 @@ main (int argc, char *argv[]) // however, see // https://github.com/systemd/systemd/blob/604b2001081adcbd64ee1fbe7de7a6d77c5209fe/src/basic/mountpoint-util.h#L36 // which bumps up these defaults for the rootfs a bit. - g_autofree char *root_upperdir - = root_transient ? g_build_filename (OTCORE_RUN_OSTREE_PRIVATE, "root/upper", NULL) - : NULL; - g_autofree char *root_workdir - = root_transient ? g_build_filename (OTCORE_RUN_OSTREE_PRIVATE, "root/work", NULL) : NULL; + const char *root_upperdir = OTCORE_RUN_OSTREE_PRIVATE "/root/upper"; + const char *root_workdir = OTCORE_RUN_OSTREE_PRIVATE "/root/work"; // Propagate these options for transient root, if provided if (root_transient) @@ -611,17 +608,15 @@ main (int argc, char *argv[]) err (EXIT_FAILURE, "failed to bind mount (class:readonly) /usr"); } - /* Prepare /var. - * When a read-only sysroot is configured, this adds a dedicated bind-mount (to itself) - * so that the stateroot location stays writable. */ - if (sysroot_readonly) - { - /* Bind-mount /var (at stateroot path), and remount as writable. */ - if (mount ("../../var", "../../var", NULL, MS_BIND | MS_SILENT, NULL) < 0) - err (EXIT_FAILURE, "failed to prepare /var bind-mount at %s", srcpath); - if (mount ("../../var", "../../var", NULL, MS_BIND | MS_REMOUNT | MS_SILENT, NULL) < 0) - err (EXIT_FAILURE, "failed to make writable /var bind-mount at %s", srcpath); - } + const char *var_dir = OTCORE_RUN_OSTREE_PRIVATE "/var"; + + /* Bind-mount /var, and remount as writable. */ + if (mkdirat (AT_FDCWD, var_dir, 0) < 0) + err (EXIT_FAILURE, "failed to mkdir %s", var_dir); + if (mount ("../../var", var_dir, NULL, MS_BIND | MS_SILENT, NULL) < 0) + err (EXIT_FAILURE, "failed to prepare /var bind-mount at %s", var_dir); + if (mount (var_dir, var_dir, NULL, MS_BIND | MS_REMOUNT | MS_SILENT, NULL) < 0) + err (EXIT_FAILURE, "failed to make writable /var bind-mount at %s", var_dir); /* When running under systemd, /var will be handled by a 'var.mount' unit outside * of initramfs. @@ -640,8 +635,8 @@ main (int argc, char *argv[]) */ if (mount_var) { - if (mount ("../../var", TMP_SYSROOT "/var", NULL, MS_BIND | MS_SILENT, NULL) < 0) - err (EXIT_FAILURE, "failed to bind mount ../../var to var"); + if (mount (var_dir, TMP_SYSROOT "/var", NULL, MS_BIND | MS_SILENT, NULL) < 0) + err (EXIT_FAILURE, "failed to bind mount /var"); /* To avoid having submounts of /var propagate into $stateroot/var, the * mount is made with slave+shared propagation. See the comment in From bfb84a315459a7ff632c6d04a51ad98878e48748 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Thu, 19 Dec 2024 21:16:15 +0800 Subject: [PATCH 02/27] prepare-root: Unmount temporary var mount after /var is mounted --- src/libostree/ostree-impl-system-generator.c | 128 +++++++++++-------- src/switchroot/ostree-prepare-root.c | 5 +- 2 files changed, 77 insertions(+), 56 deletions(-) diff --git a/src/libostree/ostree-impl-system-generator.c b/src/libostree/ostree-impl-system-generator.c index 3fe1d67d..dc502b28 100644 --- a/src/libostree/ostree-impl-system-generator.c +++ b/src/libostree/ostree-impl-system-generator.c @@ -126,6 +126,35 @@ require_internal_units (const char *normal_dir, const char *early_dir, const cha #endif } +static gboolean +write_unit_file (int dir_fd, const char *path, GCancellable *cancellable, GError **error, const char *fmt, ...) +{ + g_auto (GLnxTmpfile) tmpf = { + 0, + }; + if (!glnx_open_tmpfile_linkable_at (dir_fd, ".", O_WRONLY | O_CLOEXEC, &tmpf, error)) + return FALSE; + g_autoptr (GOutputStream) outstream = g_unix_output_stream_new (tmpf.fd, FALSE); + gsize bytes_written; + va_list args; + va_start (args, fmt); + const gboolean r = g_output_stream_vprintf (outstream, &bytes_written, cancellable, error, fmt, args); + va_end (args); + if (!r) + return FALSE; + if (!g_output_stream_flush (outstream, cancellable, error)) + return FALSE; + g_clear_object (&outstream); + /* It should be readable */ + if (!glnx_fchmod (tmpf.fd, 0644, error)) + return FALSE; + /* Error out if somehow it already exists, that'll help us debug conflicts */ + if (!glnx_link_tmpfile_at (&tmpf, GLNX_LINK_TMPFILE_NOREPLACE, dir_fd, path, + error)) + return FALSE; + return TRUE; +} + /* Generate var.mount */ static gboolean fstab_generator (const char *ostree_target, const bool is_aboot, const char *normal_dir, @@ -135,8 +164,37 @@ fstab_generator (const char *ostree_target, const bool is_aboot, const char *nor /* Not currently cancellable, but define a var in case we care later */ GCancellable *cancellable = NULL; /* Some path constants to avoid typos */ - static const char fstab_path[] = "/etc/fstab"; - static const char var_path[] = "/var"; + const char *fstab_path = "/etc/fstab"; + const char *var_dst = "/var"; + const char *var_src = OTCORE_RUN_OSTREE_PRIVATE "/var"; + + /* Prepare to write to the output unit dir; we use the "normal" dir + * that overrides /usr, but not /etc. + */ + glnx_autofd int normal_dir_dfd = -1; + if (!glnx_opendirat (AT_FDCWD, normal_dir, TRUE, &normal_dir_dfd, error)) + return FALSE; + + /* Generate a unit to unmount var_src */ + if (!write_unit_file (normal_dir_dfd, "ostree-unmount-temp-var.service", cancellable, error, + "##\n# Automatically generated by ostree-system-generator\n##\n\n" + "[Unit]\n" + "Documentation=man:ostree(1)\n" + "ConditionPathIsMountPoint=%s\n" + "After=var.mount\n" + "\n" + "[Service]\n" + "Type=oneshot\n" + "ExecStart=/usr/bin/umount --lazy %s\n", + var_src, var_src)) + return FALSE; + + if (!glnx_shutil_mkdir_p_at (normal_dir_dfd, "local-fs.target.wants", 0755, cancellable, + error)) + return FALSE; + if (symlinkat ("../ostree-unmount-temp-var.service", normal_dir_dfd, + "local-fs.target.wants/ostree-unmount-temp-var.service") < 0) + return glnx_throw_errno_prefix (error, "symlinkat"); /* Load /etc/fstab if it exists, and look for a /var mount */ g_autoptr (OtLibMountFile) fstab = setmntent (fstab_path, "re"); @@ -157,7 +215,7 @@ fstab_generator (const char *ostree_target, const bool is_aboot, const char *nor path_kill_slashes (where); /* We're only looking for /var here */ - if (strcmp (where, var_path) != 0) + if (strcmp (where, var_dst) != 0) continue; found_var_mnt = TRUE; @@ -169,59 +227,19 @@ fstab_generator (const char *ostree_target, const bool is_aboot, const char *nor if (found_var_mnt) return TRUE; - /* Prepare to write to the output unit dir; we use the "normal" dir - * that overrides /usr, but not /etc. - */ - glnx_autofd int normal_dir_dfd = -1; - if (!glnx_opendirat (AT_FDCWD, normal_dir, TRUE, &normal_dir_dfd, error)) - return FALSE; - /* Generate our bind mount unit */ - const char *var_dir = OTCORE_RUN_OSTREE_PRIVATE "/var"; - - g_auto (GLnxTmpfile) tmpf = { - 0, - }; - if (!glnx_open_tmpfile_linkable_at (normal_dir_dfd, ".", O_WRONLY | O_CLOEXEC, &tmpf, error)) - return FALSE; - g_autoptr (GOutputStream) outstream = g_unix_output_stream_new (tmpf.fd, FALSE); - gsize bytes_written; - /* This code is inspired by systemd's fstab-generator.c. - * - * Note that our unit doesn't run if systemd.volatile is enabled; - * see https://github.com/ostreedev/ostree/pull/856 - * - * To avoid having submounts of /var propagate into $stateroot/var, the mount - * is made with slave+shared propagation. This means that /var will receive - * mount events from the parent /sysroot mount, but not vice versa. Adding a - * shared peer group below the slave group means that submounts of /var will - * inherit normal shared propagation. See mount_namespaces(7), Linux - * Documentation/filesystems/sharedsubtree.txt and - * https://github.com/ostreedev/ostree/issues/2086. This also happens in - * ostree-prepare-root.c for the INITRAMFS_MOUNT_VAR case. - */ - if (!g_output_stream_printf (outstream, &bytes_written, cancellable, error, - "##\n# Automatically generated by ostree-system-generator\n##\n\n" - "[Unit]\n" - "Documentation=man:ostree(1)\n" - "ConditionKernelCommandLine=!systemd.volatile\n" - "Before=local-fs.target\n" - "\n" - "[Mount]\n" - "Where=%s\n" - "What=%s\n" - "Options=bind,slave,shared\n", - var_path, var_dir)) - return FALSE; - if (!g_output_stream_flush (outstream, cancellable, error)) - return FALSE; - g_clear_object (&outstream); - /* It should be readable */ - if (!glnx_fchmod (tmpf.fd, 0644, error)) - return FALSE; - /* Error out if somehow it already exists, that'll help us debug conflicts */ - if (!glnx_link_tmpfile_at (&tmpf, GLNX_LINK_TMPFILE_NOREPLACE, normal_dir_dfd, "var.mount", - error)) + if (!write_unit_file (normal_dir_dfd, "var.mount", cancellable, error, + "##\n# Automatically generated by ostree-system-generator\n##\n\n" + "[Unit]\n" + "Documentation=man:ostree(1)\n" + "ConditionKernelCommandLine=!systemd.volatile\n" + "Before=local-fs.target\n" + "\n" + "[Mount]\n" + "Where=%s\n" + "What=%s\n" + "Options=bind,slave,shared\n", + var_dst, var_src)) return FALSE; /* And ensure it's required; newer systemd will auto-inject fs dependencies diff --git a/src/switchroot/ostree-prepare-root.c b/src/switchroot/ostree-prepare-root.c index b0518b08..5d3504c7 100644 --- a/src/switchroot/ostree-prepare-root.c +++ b/src/switchroot/ostree-prepare-root.c @@ -636,7 +636,10 @@ main (int argc, char *argv[]) if (mount_var) { if (mount (var_dir, TMP_SYSROOT "/var", NULL, MS_BIND | MS_SILENT, NULL) < 0) - err (EXIT_FAILURE, "failed to bind mount /var"); + err (EXIT_FAILURE, "failed to bind mount %s to /var", var_dir); + + if (umount2 (var_dir, MNT_DETACH) < 0) + err (EXIT_FAILURE, "failed to umount %s", var_dir); /* To avoid having submounts of /var propagate into $stateroot/var, the * mount is made with slave+shared propagation. See the comment in From cb03e4466e30fdf20008316c4202ea59b7f2fc4f Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Fri, 20 Dec 2024 18:55:47 +0800 Subject: [PATCH 03/27] prepare-root: It's not necessary to make /var slave anymore --- src/libostree/ostree-impl-system-generator.c | 2 +- src/switchroot/ostree-prepare-root.c | 10 ---------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/src/libostree/ostree-impl-system-generator.c b/src/libostree/ostree-impl-system-generator.c index dc502b28..6653b826 100644 --- a/src/libostree/ostree-impl-system-generator.c +++ b/src/libostree/ostree-impl-system-generator.c @@ -238,7 +238,7 @@ fstab_generator (const char *ostree_target, const bool is_aboot, const char *nor "[Mount]\n" "Where=%s\n" "What=%s\n" - "Options=bind,slave,shared\n", + "Options=bind\n", var_dst, var_src)) return FALSE; diff --git a/src/switchroot/ostree-prepare-root.c b/src/switchroot/ostree-prepare-root.c index 5d3504c7..70b1a887 100644 --- a/src/switchroot/ostree-prepare-root.c +++ b/src/switchroot/ostree-prepare-root.c @@ -640,16 +640,6 @@ main (int argc, char *argv[]) if (umount2 (var_dir, MNT_DETACH) < 0) err (EXIT_FAILURE, "failed to umount %s", var_dir); - - /* To avoid having submounts of /var propagate into $stateroot/var, the - * mount is made with slave+shared propagation. See the comment in - * ostree-impl-system-generator.c when /var isn't mounted in the - * initramfs for further explanation. - */ - if (mount (NULL, TMP_SYSROOT "/var", NULL, MS_SLAVE | MS_SILENT, NULL) < 0) - err (EXIT_FAILURE, "failed to change /var to slave mount"); - if (mount (NULL, TMP_SYSROOT "/var", NULL, MS_SHARED | MS_SILENT, NULL) < 0) - err (EXIT_FAILURE, "failed to change /var to slave+shared mount"); } /* This can be used by other things to signal ostree is in use */ From 91dda713d76e1b3cf2153d38162101ad61d6a757 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Wed, 18 Dec 2024 15:59:11 +0800 Subject: [PATCH 04/27] prepare-sysroot: Add sysroot.invisible --- src/libotcore/otcore.h | 2 + src/switchroot/ostree-prepare-root.c | 98 ++++++++++++++++++++++++++-- 2 files changed, 93 insertions(+), 7 deletions(-) diff --git a/src/libotcore/otcore.h b/src/libotcore/otcore.h index 2d256c80..776ec4e8 100644 --- a/src/libotcore/otcore.h +++ b/src/libotcore/otcore.h @@ -101,6 +101,8 @@ ComposefsConfig *otcore_load_composefs_config (const char *cmdline, GKeyFile *co #define OTCORE_RUN_BOOTED_KEY_COMPOSEFS_SIGNATURE "composefs.signed" // This key will be present if the root is transient #define OTCORE_RUN_BOOTED_KEY_ROOT_TRANSIENT "root.transient" +// This key will be present if the sysroot is made invisible +#define OTCORE_RUN_BOOTED_KEY_SYSROOT_INVISIBLE "sysroot-invisible" // This key will be present if the sysroot-ro flag was found #define OTCORE_RUN_BOOTED_KEY_SYSROOT_RO "sysroot-ro" // Always holds the (device, inode) pair of the booted deployment diff --git a/src/switchroot/ostree-prepare-root.c b/src/switchroot/ostree-prepare-root.c index 70b1a887..781c7a39 100644 --- a/src/switchroot/ostree-prepare-root.c +++ b/src/switchroot/ostree-prepare-root.c @@ -68,6 +68,7 @@ #include #include #include +#include #include #include @@ -77,6 +78,7 @@ #include "otcore.h" #define SYSROOT_KEY "sysroot" +#define INVISIBLE_KEY "invisible" #define READONLY_KEY "readonly" /* This key configures the / mount in the deployment root */ @@ -254,6 +256,33 @@ composefs_error_message (int errsv) #endif +static int +invisible_helper (void*) +{ + if (mount (NULL, "/", NULL, MS_PRIVATE | MS_REC | MS_SILENT, NULL) < 0) + return EXIT_FAILURE; + + if (chdir ("sysroot") < 0) + return EXIT_FAILURE; + + if (mount (".", "/", NULL, MS_BIND | MS_SILENT, NULL) < 0) + return EXIT_FAILURE; + + if (chroot (".") < 0) + return EXIT_FAILURE; + + sigset_t sigset; + sigemptyset (&sigset); + sigaddset (&sigset, SIGUSR1); + while (sigwaitinfo (&sigset, NULL) < 0) + { + if (errno != EINTR) + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} + int main (int argc, char *argv[]) { @@ -280,13 +309,18 @@ main (int argc, char *argv[]) if (!config) errx (EXIT_FAILURE, "Failed to parse config: %s", error->message); - gboolean sysroot_readonly = FALSE; gboolean root_transient = FALSE; + gboolean sysroot_invisible = FALSE; + gboolean sysroot_readonly = FALSE; if (!ot_keyfile_get_boolean_with_default (config, ROOT_KEY, TRANSIENT_KEY, FALSE, &root_transient, &error)) return FALSE; + if (!ot_keyfile_get_boolean_with_default (config, SYSROOT_KEY, INVISIBLE_KEY, FALSE, &sysroot_invisible, + &error)) + return FALSE; + // We always parse the composefs config, because we want to detect and error // out if it's enabled, but not supported at compile time. g_autoptr (ComposefsConfig) composefs_config @@ -503,7 +537,9 @@ main (int argc, char *argv[]) g_variant_builder_add (&metadata_builder, "{sv}", OTCORE_RUN_BOOTED_KEY_ROOT_TRANSIENT, g_variant_new_boolean (root_transient)); - /* Pass on the state for use by ostree-prepare-root */ + g_variant_builder_add (&metadata_builder, "{sv}", OTCORE_RUN_BOOTED_KEY_SYSROOT_INVISIBLE, + g_variant_new_boolean (sysroot_invisible)); + g_variant_builder_add (&metadata_builder, "{sv}", OTCORE_RUN_BOOTED_KEY_SYSROOT_RO, g_variant_new_boolean (sysroot_readonly)); @@ -672,12 +708,60 @@ main (int argc, char *argv[]) if (rmdir (TMP_SYSROOT) < 0) err (EXIT_FAILURE, "couldn't remove temporary sysroot %s", TMP_SYSROOT); - /* Now that we've set up all the mount points, if configured we remount the physical - * rootfs as read-only; what is visibly mutable to the OS by default is just /etc and /var. - * But ostree knows how to mount /boot and /sysroot read-write to perform operations. - */ - if (sysroot_readonly) + if (sysroot_invisible) { + /* Keep a living sysroot in a private mount namespace, + * and unmount sysroot in the root mount namespace to make it invisible. + */ + const char *sysroot_ns = OTCORE_RUN_OSTREE_PRIVATE "/sysroot-ns"; + glnx_autofd int ns_fd = open (sysroot_ns, O_WRONLY | O_CREAT, 0); + if (ns_fd < 0) + err (EXIT_FAILURE, "failed to create %s", sysroot_ns); + + const gsize stack_size = 0x8000; + g_autofree void *stack = g_malloc (stack_size); + + /* Block signals */ + sigset_t oldset, newset; + sigfillset (&newset); + sigprocmask (SIG_SETMASK, &newset, &oldset); + + int pid = clone (invisible_helper, (char*)stack + stack_size, CLONE_VM | CLONE_NEWNS | SIGCHLD, NULL); + + sigprocmask (SIG_SETMASK, &oldset, NULL); + + if (pid < 0) + err (EXIT_FAILURE, "failed to create child process"); + + /* Bind mount the private mount namespace */ + g_autofree char *ns = g_strdup_printf ("/proc/%d/ns/mnt", pid); + if (mount (ns, sysroot_ns, NULL, MS_BIND | MS_SILENT, NULL) < 0) + err (EXIT_FAILURE, "failed to bind-mount sysroot-ns"); + + /* Finish child process */ + kill (pid, SIGUSR1); + + /* Wait child process to exit. */ + int status; + while (waitpid (pid, &status, 0) < 0) + { + if (errno != EINTR) + err (EXIT_FAILURE, "waitpid failed"); + } + + if (!WIFEXITED (status) || WEXITSTATUS (status) != EXIT_SUCCESS) + err (EXIT_FAILURE, "child exited abnormally"); + + /* Unmount /sysroot */ + if (umount2 ("sysroot", MNT_DETACH) < 0) + err (EXIT_FAILURE, "failed to unmount /sysroot"); + } + else if (sysroot_readonly) + { + /* Now that we've set up all the mount points, if configured we remount the physical + * rootfs as read-only; what is visibly mutable to the OS by default is just /etc and /var. + * But ostree knows how to mount /boot and /sysroot read-write to perform operations. + */ if (mount ("sysroot", "sysroot", NULL, MS_BIND | MS_REMOUNT | MS_RDONLY | MS_SILENT, NULL) < 0) err (EXIT_FAILURE, "failed to make /sysroot read-only"); From 412ec417448fa7662eda1a800fc19072faf137c2 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Wed, 18 Dec 2024 16:44:21 +0800 Subject: [PATCH 05/27] remount: Skip remount /sysroot if invisible --- src/switchroot/ostree-remount.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/switchroot/ostree-remount.c b/src/switchroot/ostree-remount.c index f0a4b3d9..b816d61e 100644 --- a/src/switchroot/ostree-remount.c +++ b/src/switchroot/ostree-remount.c @@ -225,13 +225,19 @@ main (int argc, char *argv[]) exit (EXIT_SUCCESS); } - /* Handle remounting /sysroot; if it's explicitly marked as read-only (opt in) + /* Handle remounting /sysroot; + * If it's made invisible, do nothing. + * if it's explicitly marked as read-only (opt in) * then ensure it's readonly, otherwise mount writable, the same as / */ + gboolean sysroot_configured_invisible = FALSE; + g_variant_dict_lookup (ostree_run_metadata, OTCORE_RUN_BOOTED_KEY_SYSROOT_INVISIBLE, "b", + &sysroot_configured_invisible); gboolean sysroot_configured_readonly = FALSE; g_variant_dict_lookup (ostree_run_metadata, OTCORE_RUN_BOOTED_KEY_SYSROOT_RO, "b", &sysroot_configured_readonly); - do_remount ("/sysroot", !sysroot_configured_readonly); + if (!sysroot_configured_invisible) + do_remount ("/sysroot", !sysroot_configured_readonly); /* And also make sure to make /etc rw again. We make this conditional on * sysroot_configured_readonly && !transient_etc because only in that case is it a From a762caeb308483e69769d39b86ed4f922d973dbe Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Wed, 18 Dec 2024 19:50:55 +0800 Subject: [PATCH 06/27] sysroot: Handle invisible sysroot --- src/libostree/ostree-sysroot-private.h | 6 + src/libostree/ostree-sysroot.c | 146 ++++++++++++++++++++----- 2 files changed, 125 insertions(+), 27 deletions(-) diff --git a/src/libostree/ostree-sysroot-private.h b/src/libostree/ostree-sysroot-private.h index 3dd6939b..596cb8bc 100644 --- a/src/libostree/ostree-sysroot-private.h +++ b/src/libostree/ostree-sysroot-private.h @@ -114,8 +114,14 @@ struct OstreeSysroot // Relative to /boot, consumed by ostree-boot-complete.service #define _OSTREE_FINALIZE_STAGED_FAILURE_PATH "ostree/finalize-failure.stamp" +gboolean _ostree_sysroot_ensure_visible (OstreeSysroot *self, GError **error); + gboolean _ostree_sysroot_ensure_writable (OstreeSysroot *self, GError **error); +gboolean +_ostree_sysroot_enter_mount_namespace (OstreeSysroot *self, GCancellable *cancellable, + GError **error); + void _ostree_sysroot_emit_journal_msg (OstreeSysroot *self, const char *msg); gboolean _ostree_sysroot_read_boot_loader_configs (OstreeSysroot *self, int bootversion, diff --git a/src/libostree/ostree-sysroot.c b/src/libostree/ostree-sysroot.c index 925c66a7..86c6026c 100644 --- a/src/libostree/ostree-sysroot.c +++ b/src/libostree/ostree-sysroot.c @@ -254,6 +254,47 @@ ostree_sysroot_set_mount_namespace_in_use (OstreeSysroot *self) self->mount_namespace_in_use = TRUE; } +gboolean +_ostree_sysroot_enter_mount_namespace (OstreeSysroot *self, GCancellable *cancellable, + GError **error) +{ + /* Do nothing if we're already in mount namespace */ + if (self->mount_namespace_in_use) + return TRUE; + + /* Do nothing if we're not privileged */ + if (getuid () != 0) + return TRUE; + + /* We also assume operating on non-booted roots won't have a readonly sysroot */ + if (!self->root_is_ostree_booted) + return TRUE; + + g_autofree char *mntns_pid1 + = glnx_readlinkat_malloc (AT_FDCWD, "/proc/1/ns/mnt", cancellable, error); + if (!mntns_pid1) + return glnx_prefix_error (error, "Reading /proc/1/ns/mnt"); + g_autofree char *mntns_self + = glnx_readlinkat_malloc (AT_FDCWD, "/proc/self/ns/mnt", cancellable, error); + if (!mntns_self) + return glnx_prefix_error (error, "Reading /proc/self/ns/mnt"); + + // If the mount namespaces are the same, we need to unshare(). + if (strcmp (mntns_pid1, mntns_self) == 0) + { + if (unshare (CLONE_NEWNS) < 0) + return glnx_throw_errno_prefix (error, "Failed to invoke unshare(CLONE_NEWNS)"); + + /* Ensure what we do in our mount namespace do not leak to outside */ + if (mount (NULL, "/", NULL, MS_PRIVATE | MS_REC | MS_SILENT, NULL) < 0) + return glnx_throw_errno_prefix (error, "Failed to set the mount propagation to private"); + } + + ostree_sysroot_set_mount_namespace_in_use (self); + + return TRUE; +} + /** * ostree_sysroot_initialize_with_mount_namespace: * @@ -284,32 +325,7 @@ ostree_sysroot_initialize_with_mount_namespace (OstreeSysroot *self, GCancellabl if (!ostree_sysroot_initialize (self, error)) return FALSE; - /* Do nothing if we're not privileged */ - if (getuid () != 0) - return TRUE; - - /* We also assume operating on non-booted roots won't have a readonly sysroot */ - if (!self->root_is_ostree_booted) - return TRUE; - - g_autofree char *mntns_pid1 - = glnx_readlinkat_malloc (AT_FDCWD, "/proc/1/ns/mnt", cancellable, error); - if (!mntns_pid1) - return glnx_prefix_error (error, "Reading /proc/1/ns/mnt"); - g_autofree char *mntns_self - = glnx_readlinkat_malloc (AT_FDCWD, "/proc/self/ns/mnt", cancellable, error); - if (!mntns_self) - return glnx_prefix_error (error, "Reading /proc/self/ns/mnt"); - - // If the mount namespaces are the same, we need to unshare(). - if (strcmp (mntns_pid1, mntns_self) == 0) - { - if (unshare (CLONE_NEWNS) < 0) - return glnx_throw_errno_prefix (error, "Failed to invoke unshare(CLONE_NEWNS)"); - } - - ostree_sysroot_set_mount_namespace_in_use (self); - return TRUE; + return _ostree_sysroot_enter_mount_namespace (self, cancellable, error); } /** @@ -374,11 +390,75 @@ remount_writable (const char *path, gboolean *did_remount, GError **error) return TRUE; } +static gboolean +is_sysroot_invisible (void) +{ + struct stat stbuf; + + if (lstat (OTCORE_RUN_OSTREE_PRIVATE "/sysroot-ns", &stbuf) < 0) + return FALSE; + + if (lstat ("/sysroot/ostree", &stbuf) == 0) + return FALSE; + + return TRUE; +} + +/* Unmount covering tmpfs to make /sysroot visible */ +gboolean +_ostree_sysroot_ensure_visible (OstreeSysroot *self, GError **error) +{ + if (!ostree_sysroot_initialize (self, error)) + return FALSE; + + /* Do nothing if no mount namespace is in use */ + if (!self->mount_namespace_in_use) + return TRUE; + + /* If we aren't operating on a booted system, then we don't + * do anything with mounts. + */ + if (!self->root_is_ostree_booted) + return TRUE; + + /* Handle invisible sysroot */ + if (is_sysroot_invisible ()) + { + glnx_autofd int sysroot_ns_fd = open (OTCORE_RUN_OSTREE_PRIVATE "/sysroot-ns", O_RDONLY); + if (sysroot_ns_fd < 0) + return FALSE; + + glnx_autofd int cur_ns_fd = open("/proc/self/ns/mnt", O_RDONLY); + if (cur_ns_fd < 0) + return FALSE; + + if (setns (sysroot_ns_fd, CLONE_NEWNS) < 0) + return FALSE; + + glnx_autofd int tree_fd = open_tree (AT_FDCWD, "/", OPEN_TREE_CLONE); + if (tree_fd < 0) + return FALSE; + + if (setns (cur_ns_fd, CLONE_NEWNS) < 0) + abort (); // it's unsafe to continue if we cannot switch back + + if (move_mount (tree_fd, "", AT_FDCWD, "/sysroot", MOVE_MOUNT_F_EMPTY_PATH) < 0) + return FALSE; + } + + /* Now close and reopen our file descriptors */ + ostree_sysroot_unload (self); + if (!ensure_sysroot_fd (self, error)) + return FALSE; + + return TRUE; +} + /* Remount /sysroot read-write if necessary */ gboolean _ostree_sysroot_ensure_writable (OstreeSysroot *self, GError **error) { - if (!ostree_sysroot_initialize (self, error)) + if (!_ostree_sysroot_ensure_visible (self, error)) return FALSE; /* Do nothing if no mount namespace is in use */ @@ -1063,6 +1143,18 @@ ostree_sysroot_initialize (OstreeSysroot *self, GError **error) g_debug ("root_is_ostree_booted: %d", self->root_is_ostree_booted); self->loadstate = OSTREE_SYSROOT_LOAD_STATE_INIT; } + else + { + return TRUE; + } + + if (is_sysroot_invisible ()) + { + if (!_ostree_sysroot_enter_mount_namespace (self, NULL, error)) + return FALSE; + if (!_ostree_sysroot_ensure_visible (self, error)) + return FALSE; + } return TRUE; } From b7e46b4e5401257abb784faca558fdbca69f2fb8 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Wed, 18 Dec 2024 21:00:22 +0800 Subject: [PATCH 07/27] ostree: Add command admin nsenter --- Makefile-ostree.am | 1 + src/ostree/ot-admin-builtin-nsenter.c | 126 ++++++++++++++++++++++++++ src/ostree/ot-admin-builtins.h | 1 + src/ostree/ot-builtin-admin.c | 7 +- 4 files changed, 130 insertions(+), 5 deletions(-) create mode 100644 src/ostree/ot-admin-builtin-nsenter.c diff --git a/Makefile-ostree.am b/Makefile-ostree.am index d2447ffe..3df930de 100644 --- a/Makefile-ostree.am +++ b/Makefile-ostree.am @@ -86,6 +86,7 @@ ostree_SOURCES += \ src/ostree/ot-admin-builtin-upgrade.c \ src/ostree/ot-admin-builtin-unlock.c \ src/ostree/ot-admin-builtin-state-overlay.c \ + src/ostree/ot-admin-builtin-nsenter.c \ src/ostree/ot-admin-builtins.h \ src/ostree/ot-admin-instutil-builtin-selinux-ensure-labeled.c \ src/ostree/ot-admin-instutil-builtin-set-kargs.c \ diff --git a/src/ostree/ot-admin-builtin-nsenter.c b/src/ostree/ot-admin-builtin-nsenter.c new file mode 100644 index 00000000..fab7964e --- /dev/null +++ b/src/ostree/ot-admin-builtin-nsenter.c @@ -0,0 +1,126 @@ +/* +* Copyright (C) 2024 Colin Walters + * + * SPDX-License-Identifier: LGPL-2.0+ + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see . + * + * Author: Misaki Kasumi + */ + +#include "config.h" + +#include "libglnx.h" +#include "ostree.h" +#include "ot-admin-builtins.h" +#include "ot-admin-functions.h" + +#include +#include + +static gboolean opt_lock; +static gboolean opt_exec; + +static GOptionEntry options[] = { + { "lock", 0, 0, G_OPTION_ARG_NONE, &opt_lock, + "Make /sysroot writable in the mount namespace and acquire an exclusive multi-process write lock", NULL }, + { "exec", 0, 0, G_OPTION_ARG_NONE, &opt_exec, + "Replace the process instead of spawning the program as child", NULL}, + { NULL } }; + +gboolean +ot_admin_builtin_nsenter (int argc, char **argv, OstreeCommandInvocation *invocation, + GCancellable *cancellable, GError **error) +{ + g_autoptr (GOptionContext) context = NULL; + g_autoptr (OstreeSysroot) sysroot = NULL; + g_autofree char **arguments = NULL; + + context = g_option_context_new ("[PROGRAM [ARGUMENTS...]]"); + + int new_argc = argc; + char **new_argv = argv; + + for (int i = 1; i < argc; i++) + { + if (g_str_equal (argv[i], "--")) + { + new_argc -= i; + argc = i; + new_argv = argv + i; + argv[i] = NULL; + break; + } + } + + if (!ostree_admin_option_context_parse (context, options, &argc, &argv, + OSTREE_ADMIN_BUILTIN_FLAG_UNLOCKED, invocation, &sysroot, + cancellable, error)) + return FALSE; + + argc = new_argc; + argv = new_argv; + if (argc <= 1) + { + arguments = g_malloc_n (2, sizeof (char *)); + if ((arguments[0] = getenv ("SHELL")) == NULL) + arguments[0] = "/bin/sh"; + arguments[1] = NULL; + } + else + { + arguments = g_malloc_n (argc, sizeof (char *)); + memcpy (arguments, argv + 1, (argc - 1) * sizeof (char *)); + arguments[argc - 1] = NULL; + } + + if (opt_lock) + { + if (opt_exec) + return glnx_throw (error, "cannot specify both --lock and --exec"); + if (!ostree_sysroot_lock (sysroot, error)) + return FALSE; + } + + pid_t child_pid; + if (opt_exec) + { + if (execvp (arguments[0], arguments) < 0) + return glnx_throw_errno_prefix (error, "execvp"); + } + else + { + if (posix_spawnp (&child_pid, arguments[0], NULL, NULL, arguments, environ) != 0) + return glnx_throw_errno_prefix (error, "posix_spawnp"); + } + + int status; + while (waitpid (child_pid, &status, 0) < 0) + { + if (errno != EINTR) + return glnx_throw_errno_prefix (error, "waitpid"); + } + + if (opt_lock) + ostree_sysroot_unlock (sysroot); + + if (!WIFEXITED (status)) + return glnx_throw (error, "child process killed by signal"); + + int exit_status = WEXITSTATUS (status); + if (exit_status != EXIT_SUCCESS) + exit (exit_status); + + return TRUE; +} diff --git a/src/ostree/ot-admin-builtins.h b/src/ostree/ot-admin-builtins.h index cd1472bf..9f19345f 100644 --- a/src/ostree/ot-admin-builtins.h +++ b/src/ostree/ot-admin-builtins.h @@ -51,6 +51,7 @@ BUILTINPROTO (kargs); BUILTINPROTO (post_copy); BUILTINPROTO (lock_finalization); BUILTINPROTO (state_overlay); +BUILTINPROTO (nsenter); #undef BUILTINPROTO diff --git a/src/ostree/ot-builtin-admin.c b/src/ostree/ot-builtin-admin.c index 53face6a..b5f0395c 100644 --- a/src/ostree/ot-builtin-admin.c +++ b/src/ostree/ot-builtin-admin.c @@ -70,6 +70,8 @@ static OstreeCommand admin_subcommands[] = { { "upgrade", OSTREE_BUILTIN_FLAG_NO_REPO, ot_admin_builtin_upgrade, "Construct new tree from current origin and deploy it, if it changed" }, { "kargs", OSTREE_BUILTIN_FLAG_NO_REPO, ot_admin_builtin_kargs, "Change kernel arguments" }, + {"nsenter", OSTREE_BUILTIN_FLAG_NO_REPO | OSTREE_BUILTIN_FLAG_HIDDEN, ot_admin_builtin_nsenter, + "Run program in the mount namespace where /sysroot is present"}, { NULL, 0, NULL, NULL } }; @@ -121,11 +123,6 @@ ostree_builtin_admin (int argc, char **argv, OstreeCommandInvocation *invocation } } - else if (g_str_equal (argv[in], "--")) - { - break; - } - argv[out] = argv[in]; } From c5c414a921a557f81cc850decaf3c70b369900a4 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Wed, 18 Dec 2024 23:48:41 +0800 Subject: [PATCH 08/27] chore: Use geteuid() instead of getuid() to check privilege --- src/libostree/ostree-bootloader-zipl.c | 2 +- src/libostree/ostree-repo-commit.c | 2 +- src/libostree/ostree-sysroot.c | 2 +- src/libotutil/ot-unix-utils.c | 7 +++++++ src/libotutil/ot-unix-utils.h | 2 ++ src/ostree/ot-main.c | 4 ++-- 6 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/libostree/ostree-bootloader-zipl.c b/src/libostree/ostree-bootloader-zipl.c index 2804ed26..f0c18cbc 100644 --- a/src/libostree/ostree-bootloader-zipl.c +++ b/src/libostree/ostree-bootloader-zipl.c @@ -432,7 +432,7 @@ _ostree_bootloader_zipl_post_bls_sync (OstreeBootloader *bootloader, int bootver // This can happen in a unit testing environment; at some point what we want to do here // is move all of the zipl logic to a systemd unit instead that's keyed of // ostree-finalize-staged.service. - if (getuid () != 0) + if (!ot_util_process_privileged ()) return TRUE; // If we're in a booted deployment, we don't need to spawn a container. diff --git a/src/libostree/ostree-repo-commit.c b/src/libostree/ostree-repo-commit.c index 17b8a97f..18b2562c 100644 --- a/src/libostree/ostree-repo-commit.c +++ b/src/libostree/ostree-repo-commit.c @@ -1658,7 +1658,7 @@ ostree_repo_prepare_transaction (OstreeRepo *self, gboolean *out_transaction_res self->reserved_blocks = reserved_bytes / self->txn.blocksize; /* Use the appropriate free block count if we're unprivileged */ - guint64 bfree = (getuid () != 0 ? stvfsbuf.f_bavail : stvfsbuf.f_bfree); + guint64 bfree = (ot_util_process_privileged () ? stvfsbuf.f_bfree : stvfsbuf.f_bavail); if (bfree > self->reserved_blocks) self->txn.max_blocks = bfree - self->reserved_blocks; else diff --git a/src/libostree/ostree-sysroot.c b/src/libostree/ostree-sysroot.c index 86c6026c..0e8fc07c 100644 --- a/src/libostree/ostree-sysroot.c +++ b/src/libostree/ostree-sysroot.c @@ -263,7 +263,7 @@ _ostree_sysroot_enter_mount_namespace (OstreeSysroot *self, GCancellable *cancel return TRUE; /* Do nothing if we're not privileged */ - if (getuid () != 0) + if (!ot_util_process_privileged ()) return TRUE; /* We also assume operating on non-booted roots won't have a readonly sysroot */ diff --git a/src/libotutil/ot-unix-utils.c b/src/libotutil/ot-unix-utils.c index 33cd1c02..7a3192fe 100644 --- a/src/libotutil/ot-unix-utils.c +++ b/src/libotutil/ot-unix-utils.c @@ -102,3 +102,10 @@ ot_util_path_split_validate (const char *path, GPtrArray **out_components, GErro ot_transfer_out_value (out_components, &ret_components); return TRUE; } + +/* Check if current process is privileged */ +gboolean +ot_util_process_privileged (void) +{ + return geteuid() == 0; +} diff --git a/src/libotutil/ot-unix-utils.h b/src/libotutil/ot-unix-utils.h index 3e4be2f9..38f73e49 100644 --- a/src/libotutil/ot-unix-utils.h +++ b/src/libotutil/ot-unix-utils.h @@ -39,4 +39,6 @@ gboolean ot_util_filename_validate (const char *name, GError **error); gboolean ot_util_path_split_validate (const char *path, GPtrArray **out_components, GError **error); +gboolean ot_util_process_privileged (void); + G_END_DECLS diff --git a/src/ostree/ot-main.c b/src/ostree/ot-main.c index fa4eb53f..d47a59ca 100644 --- a/src/ostree/ot-main.c +++ b/src/ostree/ot-main.c @@ -116,7 +116,7 @@ maybe_setup_mount_namespace (gboolean *out_ns, GError **error) *out_ns = FALSE; /* If we're not root, then we almost certainly can't be remounting anything */ - if (getuid () != 0) + if (!ot_util_process_privileged ()) return TRUE; /* If the system isn't booted via libostree, also nothing to do */ @@ -580,7 +580,7 @@ ostree_admin_sysroot_load (OstreeSysroot *sysroot, OstreeAdminBuiltinFlags flags /* Only require root if we're manipulating a booted sysroot. (Mostly * useful for the test suite) */ - if (booted && getuid () != 0) + if (booted && !ot_util_process_privileged ()) { g_set_error (error, G_IO_ERROR, G_IO_ERROR_PERMISSION_DENIED, "You must be root to perform this command"); From 83438a10e6b575ca881c4f2104d323f48e1f47e0 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Thu, 19 Dec 2024 00:34:16 +0800 Subject: [PATCH 09/27] sysroot: Assert not in root mount namespace --- src/libostree/ostree-sysroot-private.h | 5 ++++ src/libostree/ostree-sysroot.c | 33 ++++++++++++++++++-------- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/src/libostree/ostree-sysroot-private.h b/src/libostree/ostree-sysroot-private.h index 596cb8bc..35cc8535 100644 --- a/src/libostree/ostree-sysroot-private.h +++ b/src/libostree/ostree-sysroot-private.h @@ -69,6 +69,11 @@ struct OstreeSysroot GLnxLockFile lock; OstreeSysrootLoadState loadstate; + /* + * XXX: It's very bad that mount namespaces are per thread, not per process. + * In a multi-threading environment, it's troublesome to ensure current thread is always in the ns. + * So, do not use OstreeSysroot from another thread if you want mount namespace. + */ gboolean mount_namespace_in_use; /* TRUE if caller has told us they used CLONE_NEWNS */ gboolean root_is_ostree_booted; /* TRUE if sysroot is / and we are booted via ostree */ /* The device/inode for / and /etc, used to detect booted deployment */ diff --git a/src/libostree/ostree-sysroot.c b/src/libostree/ostree-sysroot.c index 0e8fc07c..6380fa53 100644 --- a/src/libostree/ostree-sysroot.c +++ b/src/libostree/ostree-sysroot.c @@ -227,6 +227,23 @@ ostree_sysroot_new_default (void) return ostree_sysroot_new (NULL); } +static gboolean +is_in_root_mount_namespace (GCancellable *cancellable, GError **error) +{ + g_autofree char *mntns_pid1 + = glnx_readlinkat_malloc (AT_FDCWD, "/proc/1/ns/mnt", cancellable, error); + if (!mntns_pid1) + return glnx_prefix_error (error, "Reading /proc/1/ns/mnt"); + /* mount namespace is per-thread, not per-process */ + g_autofree char *cur_thread = g_strdup_printf ("/proc/%d/ns/mnt", gettid ()); + g_autofree char *mntns_cur + = glnx_readlinkat_malloc (AT_FDCWD, cur_thread, cancellable, error); + if (!mntns_cur) + return glnx_prefix_error (error, "Reading %s", cur_thread); + + return g_str_equal (mntns_pid1, mntns_cur); +} + /** * ostree_sysroot_set_mount_namespace_in_use: * @@ -251,6 +268,9 @@ ostree_sysroot_set_mount_namespace_in_use (OstreeSysroot *self) /* Must be before we're loaded, as otherwise we'd have to close/reopen all our fds, e.g. the repo */ g_return_if_fail (self->loadstate < OSTREE_SYSROOT_LOAD_STATE_LOADED); + g_autoptr (GError) local_error = NULL; + g_assert (!is_in_root_mount_namespace (NULL, &local_error)); + g_assert (local_error == NULL); self->mount_namespace_in_use = TRUE; } @@ -270,17 +290,8 @@ _ostree_sysroot_enter_mount_namespace (OstreeSysroot *self, GCancellable *cancel if (!self->root_is_ostree_booted) return TRUE; - g_autofree char *mntns_pid1 - = glnx_readlinkat_malloc (AT_FDCWD, "/proc/1/ns/mnt", cancellable, error); - if (!mntns_pid1) - return glnx_prefix_error (error, "Reading /proc/1/ns/mnt"); - g_autofree char *mntns_self - = glnx_readlinkat_malloc (AT_FDCWD, "/proc/self/ns/mnt", cancellable, error); - if (!mntns_self) - return glnx_prefix_error (error, "Reading /proc/self/ns/mnt"); - // If the mount namespaces are the same, we need to unshare(). - if (strcmp (mntns_pid1, mntns_self) == 0) + if (is_in_root_mount_namespace (cancellable, error)) { if (unshare (CLONE_NEWNS) < 0) return glnx_throw_errno_prefix (error, "Failed to invoke unshare(CLONE_NEWNS)"); @@ -289,6 +300,8 @@ _ostree_sysroot_enter_mount_namespace (OstreeSysroot *self, GCancellable *cancel if (mount (NULL, "/", NULL, MS_PRIVATE | MS_REC | MS_SILENT, NULL) < 0) return glnx_throw_errno_prefix (error, "Failed to set the mount propagation to private"); } + else + g_return_val_if_fail (error == NULL || *error == NULL, FALSE); ostree_sysroot_set_mount_namespace_in_use (self); From bf10b40387def453efd4104ce97beaaa81dcbc46 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Thu, 19 Dec 2024 03:02:27 +0800 Subject: [PATCH 10/27] sysroot: Do not trust mount_namespace_in_use as it's per-thread --- src/libostree/ostree-sysroot.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/libostree/ostree-sysroot.c b/src/libostree/ostree-sysroot.c index 6380fa53..04184a07 100644 --- a/src/libostree/ostree-sysroot.c +++ b/src/libostree/ostree-sysroot.c @@ -278,10 +278,6 @@ gboolean _ostree_sysroot_enter_mount_namespace (OstreeSysroot *self, GCancellable *cancellable, GError **error) { - /* Do nothing if we're already in mount namespace */ - if (self->mount_namespace_in_use) - return TRUE; - /* Do nothing if we're not privileged */ if (!ot_util_process_privileged ()) return TRUE; From fb8a0cec65fe6456b6ad6d1b91d7cf435f2658b6 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Thu, 19 Dec 2024 03:18:15 +0800 Subject: [PATCH 11/27] sysroot: Fix _ostree_sysroot_ensure_visible ns --- src/libostree/ostree-sysroot.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/libostree/ostree-sysroot.c b/src/libostree/ostree-sysroot.c index 04184a07..51e441b2 100644 --- a/src/libostree/ostree-sysroot.c +++ b/src/libostree/ostree-sysroot.c @@ -437,7 +437,8 @@ _ostree_sysroot_ensure_visible (OstreeSysroot *self, GError **error) if (sysroot_ns_fd < 0) return FALSE; - glnx_autofd int cur_ns_fd = open("/proc/self/ns/mnt", O_RDONLY); + g_autofree char *cur_ns = g_strdup_printf ("/proc/%d/ns/mnt", gettid ()); + glnx_autofd int cur_ns_fd = open(cur_ns, O_RDONLY); if (cur_ns_fd < 0) return FALSE; From 52d03d762ee43a2fcaad0782a7744739f5ae8362 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Thu, 19 Dec 2024 13:58:40 +0800 Subject: [PATCH 12/27] prepare-root: Use sysroot.readonly=invisible --- src/switchroot/ostree-prepare-root.c | 32 ++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/src/switchroot/ostree-prepare-root.c b/src/switchroot/ostree-prepare-root.c index 781c7a39..6275aa6f 100644 --- a/src/switchroot/ostree-prepare-root.c +++ b/src/switchroot/ostree-prepare-root.c @@ -78,8 +78,8 @@ #include "otcore.h" #define SYSROOT_KEY "sysroot" -#define INVISIBLE_KEY "invisible" #define READONLY_KEY "readonly" +#define INVISIBLE_VALUE "invisible" /* This key configures the / mount in the deployment root */ #define ROOT_KEY "root" @@ -317,9 +317,15 @@ main (int argc, char *argv[]) &error)) return FALSE; - if (!ot_keyfile_get_boolean_with_default (config, SYSROOT_KEY, INVISIBLE_KEY, FALSE, &sysroot_invisible, + { + g_autofree char *value = NULL; + if (!ot_keyfile_get_value_with_default (config, SYSROOT_KEY, READONLY_KEY, NULL, &value, &error)) - return FALSE; + return FALSE; + + if (value && g_str_equal (value, INVISIBLE_VALUE)) + sysroot_invisible = TRUE; + } // We always parse the composefs config, because we want to detect and error // out if it's enabled, but not supported at compile time. @@ -328,12 +334,20 @@ main (int argc, char *argv[]) if (!composefs_config) errx (EXIT_FAILURE, "%s", error->message); - // If composefs is enabled, that also implies sysroot.readonly=true because it's - // the new default we want to use (not because it's actually required) - const bool sysroot_readonly_default = composefs_config->enabled == OT_TRISTATE_YES; - if (!ot_keyfile_get_boolean_with_default (config, SYSROOT_KEY, READONLY_KEY, - sysroot_readonly_default, &sysroot_readonly, &error)) - errx (EXIT_FAILURE, "Failed to parse sysroot.readonly value: %s", error->message); + if (sysroot_invisible) + { + // sysroot_invisible implies sysroot_readonly + sysroot_readonly = TRUE; + } + else + { + // If composefs is enabled, that also implies sysroot.readonly=true because it's + // the new default we want to use (not because it's actually required) + const bool sysroot_readonly_default = composefs_config->enabled == OT_TRISTATE_YES; + if (!ot_keyfile_get_boolean_with_default (config, SYSROOT_KEY, READONLY_KEY, + sysroot_readonly_default, &sysroot_readonly, &error)) + errx (EXIT_FAILURE, "Failed to parse sysroot.readonly value: %s", error->message); + } /* This is the final target where we should prepare the rootfs. The usual * case with systemd in the initramfs is that root_mountpoint = "/sysroot". From a5c64da05e5bbe4fc5c1888475dc8d02fb5e46c6 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Thu, 19 Dec 2024 14:17:35 +0800 Subject: [PATCH 13/27] prepare-root: Add sysroot.protect as alias of sysroot.readonly --- src/switchroot/ostree-prepare-root.c | 54 +++++++++++++++------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/src/switchroot/ostree-prepare-root.c b/src/switchroot/ostree-prepare-root.c index 6275aa6f..172ef57f 100644 --- a/src/switchroot/ostree-prepare-root.c +++ b/src/switchroot/ostree-prepare-root.c @@ -79,6 +79,7 @@ #define SYSROOT_KEY "sysroot" #define READONLY_KEY "readonly" +#define PROTECT_KEY "protect" #define INVISIBLE_VALUE "invisible" /* This key configures the / mount in the deployment root */ @@ -317,16 +318,6 @@ main (int argc, char *argv[]) &error)) return FALSE; - { - g_autofree char *value = NULL; - if (!ot_keyfile_get_value_with_default (config, SYSROOT_KEY, READONLY_KEY, NULL, &value, - &error)) - return FALSE; - - if (value && g_str_equal (value, INVISIBLE_VALUE)) - sysroot_invisible = TRUE; - } - // We always parse the composefs config, because we want to detect and error // out if it's enabled, but not supported at compile time. g_autoptr (ComposefsConfig) composefs_config @@ -334,20 +325,35 @@ main (int argc, char *argv[]) if (!composefs_config) errx (EXIT_FAILURE, "%s", error->message); - if (sysroot_invisible) - { - // sysroot_invisible implies sysroot_readonly - sysroot_readonly = TRUE; - } - else - { - // If composefs is enabled, that also implies sysroot.readonly=true because it's - // the new default we want to use (not because it's actually required) - const bool sysroot_readonly_default = composefs_config->enabled == OT_TRISTATE_YES; - if (!ot_keyfile_get_boolean_with_default (config, SYSROOT_KEY, READONLY_KEY, - sysroot_readonly_default, &sysroot_readonly, &error)) - errx (EXIT_FAILURE, "Failed to parse sysroot.readonly value: %s", error->message); - } + // If composefs is enabled, that also implies sysroot.readonly=true because it's + // the new default we want to use (not because it's actually required) + sysroot_readonly = composefs_config->enabled == OT_TRISTATE_YES; + { + const char *keys[] = {PROTECT_KEY, READONLY_KEY}; + g_autofree char *value = NULL; + for (int i = 0; i < 2; i++) + { + if (!ot_keyfile_get_value_with_default (config, SYSROOT_KEY, keys[i], NULL, &value, + &error)) + errx (EXIT_FAILURE, "%s", error->message); + + if (value) + { + if (g_str_equal (value, INVISIBLE_VALUE)) + { + sysroot_invisible = TRUE; + // sysroot_invisible implies sysroot_readonly + sysroot_readonly = TRUE; + } + else + { + if (!_ostree_parse_boolean (value, &sysroot_readonly, &error)) + errx (EXIT_FAILURE, "%s", error->message); + } + break; + } + } + } /* This is the final target where we should prepare the rootfs. The usual * case with systemd in the initramfs is that root_mountpoint = "/sysroot". From 1d4dc03de8b052f7aaf99d3508f553325398b92e Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Thu, 19 Dec 2024 14:32:48 +0800 Subject: [PATCH 14/27] prepare-root: Make leftover /sysroot immutable --- Makefile-switchroot.am | 3 ++- src/switchroot/ostree-prepare-root.c | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/Makefile-switchroot.am b/Makefile-switchroot.am index 9a1d4137..a1d70f7d 100644 --- a/Makefile-switchroot.am +++ b/Makefile-switchroot.am @@ -50,7 +50,8 @@ CLEANFILES += ostree-prepare-root else ostree_boot_PROGRAMS += ostree-prepare-root ostree_prepare_root_CFLAGS += $(AM_CFLAGS) -Isrc/switchroot -I$(srcdir)/src/libostree -I$(srcdir)/src/libotcore -I$(srcdir)/src/libotutil -ostree_prepare_root_SOURCES += src/switchroot/ostree-prepare-root.c +ostree_prepare_root_SOURCES += src/switchroot/ostree-prepare-root.c \ + src/libostree/ostree-linuxfsutil.c ostree_prepare_root_CPPFLAGS += $(OT_INTERNAL_GIO_UNIX_CFLAGS) $(OT_DEP_CRYPTO_CFLAGS) -I $(srcdir)/libglnx ostree_prepare_root_LDADD += $(AM_LDFLAGS) $(OT_INTERNAL_GIO_UNIX_LIBS) $(OT_DEP_CRYPTO_LIBS) libotcore.la libotutil.la libglnx.la endif # BUILDOPT_USE_STATIC_COMPILER diff --git a/src/switchroot/ostree-prepare-root.c b/src/switchroot/ostree-prepare-root.c index 172ef57f..9659adb3 100644 --- a/src/switchroot/ostree-prepare-root.c +++ b/src/switchroot/ostree-prepare-root.c @@ -98,6 +98,7 @@ #include #endif +#include "ostree-linuxfsutil.h" #include "ostree-mount-util.h" static bool @@ -775,6 +776,22 @@ main (int argc, char *argv[]) /* Unmount /sysroot */ if (umount2 ("sysroot", MNT_DETACH) < 0) err (EXIT_FAILURE, "failed to unmount /sysroot"); + + /* Attempt to make the leftover empty /sysroot immutable. + * This is to prevent accidental modification when root.transient is enabled. + */ + do + { + g_autoptr (GError) local_error = NULL; + glnx_autofd int fd = -1; + if (!glnx_opendirat (AT_FDCWD, "sysroot", TRUE, &fd, &local_error)) + err (EXIT_FAILURE, "failed to open /sysroot"); + /* It's funny that we need to first touch it to move it to upper layer */ + if (futimens (fd, NULL) < 0) + break; + if (!_ostree_linuxfs_fd_alter_immutable_flag (fd, TRUE, NULL, &local_error)) + break; + } while (FALSE); } else if (sysroot_readonly) { From 6626f51e13f7aa531f7d6072432348524d144436 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Thu, 19 Dec 2024 14:38:11 +0800 Subject: [PATCH 15/27] prepare-root: Add some comments --- src/switchroot/ostree-prepare-root.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/switchroot/ostree-prepare-root.c b/src/switchroot/ostree-prepare-root.c index 9659adb3..62458a46 100644 --- a/src/switchroot/ostree-prepare-root.c +++ b/src/switchroot/ostree-prepare-root.c @@ -742,11 +742,20 @@ main (int argc, char *argv[]) const gsize stack_size = 0x8000; g_autofree void *stack = g_malloc (stack_size); - /* Block signals */ + /* Block signals. + * This is necessary to deliver SIGUSR1 to finish the child process in a deterministic way. + * If we do not block signals here, kill(SIGUSR1) may accidentally kill the child + * before it has set up a signal mask. + */ sigset_t oldset, newset; sigfillset (&newset); sigprocmask (SIG_SETMASK, &newset, &oldset); + /* We use clone() instead of fork() + setns() here, + * so that the child process is created with a new mount namespace, + * and in parent we can bind mount the new mount namespace immediately + * without race condition. + */ int pid = clone (invisible_helper, (char*)stack + stack_size, CLONE_VM | CLONE_NEWNS | SIGCHLD, NULL); sigprocmask (SIG_SETMASK, &oldset, NULL); From 3dc167e2eea5cd271c6814157c4d7555286093fc Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Thu, 19 Dec 2024 14:42:50 +0800 Subject: [PATCH 16/27] prepare-root: Fail if sysroot-ns already exists --- src/switchroot/ostree-prepare-root.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/switchroot/ostree-prepare-root.c b/src/switchroot/ostree-prepare-root.c index 62458a46..81cea5f6 100644 --- a/src/switchroot/ostree-prepare-root.c +++ b/src/switchroot/ostree-prepare-root.c @@ -735,7 +735,7 @@ main (int argc, char *argv[]) * and unmount sysroot in the root mount namespace to make it invisible. */ const char *sysroot_ns = OTCORE_RUN_OSTREE_PRIVATE "/sysroot-ns"; - glnx_autofd int ns_fd = open (sysroot_ns, O_WRONLY | O_CREAT, 0); + glnx_autofd int ns_fd = open (sysroot_ns, O_RDONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0); if (ns_fd < 0) err (EXIT_FAILURE, "failed to create %s", sysroot_ns); From 300d21c1cbd327594d7bc0f15c7fa7f26e4cc8c2 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Thu, 19 Dec 2024 15:04:55 +0800 Subject: [PATCH 17/27] sysroot: _ostree_sysroot_invisible & _ostree_in_root_mount_namespace --- src/libostree/ostree-sysroot-private.h | 3 +- src/libostree/ostree-sysroot.c | 58 +++++++++++++++++--------- src/libotutil/ot-fs-utils.c | 23 ++++++++++ src/libotutil/ot-fs-utils.h | 2 + 4 files changed, 64 insertions(+), 22 deletions(-) diff --git a/src/libostree/ostree-sysroot-private.h b/src/libostree/ostree-sysroot-private.h index 35cc8535..4cf8414a 100644 --- a/src/libostree/ostree-sysroot-private.h +++ b/src/libostree/ostree-sysroot-private.h @@ -124,8 +124,7 @@ gboolean _ostree_sysroot_ensure_visible (OstreeSysroot *self, GError **error); gboolean _ostree_sysroot_ensure_writable (OstreeSysroot *self, GError **error); gboolean -_ostree_sysroot_enter_mount_namespace (OstreeSysroot *self, GCancellable *cancellable, - GError **error); +_ostree_sysroot_enter_mount_namespace (OstreeSysroot *self, GError **error); void _ostree_sysroot_emit_journal_msg (OstreeSysroot *self, const char *msg); diff --git a/src/libostree/ostree-sysroot.c b/src/libostree/ostree-sysroot.c index 51e441b2..bdd1e583 100644 --- a/src/libostree/ostree-sysroot.c +++ b/src/libostree/ostree-sysroot.c @@ -228,20 +228,22 @@ ostree_sysroot_new_default (void) } static gboolean -is_in_root_mount_namespace (GCancellable *cancellable, GError **error) +_ostree_in_root_mount_namespace (gboolean *out_val, GError **error) { + /* glnx_readlinkat_malloc does not use cancellable acually. */ g_autofree char *mntns_pid1 - = glnx_readlinkat_malloc (AT_FDCWD, "/proc/1/ns/mnt", cancellable, error); + = glnx_readlinkat_malloc (AT_FDCWD, "/proc/1/ns/mnt", NULL, error); if (!mntns_pid1) return glnx_prefix_error (error, "Reading /proc/1/ns/mnt"); /* mount namespace is per-thread, not per-process */ g_autofree char *cur_thread = g_strdup_printf ("/proc/%d/ns/mnt", gettid ()); g_autofree char *mntns_cur - = glnx_readlinkat_malloc (AT_FDCWD, cur_thread, cancellable, error); + = glnx_readlinkat_malloc (AT_FDCWD, cur_thread, NULL, error); if (!mntns_cur) return glnx_prefix_error (error, "Reading %s", cur_thread); - return g_str_equal (mntns_pid1, mntns_cur); + *out_val = g_str_equal (mntns_pid1, mntns_cur); + return TRUE; } /** @@ -268,15 +270,14 @@ ostree_sysroot_set_mount_namespace_in_use (OstreeSysroot *self) /* Must be before we're loaded, as otherwise we'd have to close/reopen all our fds, e.g. the repo */ g_return_if_fail (self->loadstate < OSTREE_SYSROOT_LOAD_STATE_LOADED); + gboolean in_root; g_autoptr (GError) local_error = NULL; - g_assert (!is_in_root_mount_namespace (NULL, &local_error)); - g_assert (local_error == NULL); + g_assert (_ostree_in_root_mount_namespace (&in_root, &local_error) && !in_root); self->mount_namespace_in_use = TRUE; } gboolean -_ostree_sysroot_enter_mount_namespace (OstreeSysroot *self, GCancellable *cancellable, - GError **error) +_ostree_sysroot_enter_mount_namespace (OstreeSysroot *self, GError **error) { /* Do nothing if we're not privileged */ if (!ot_util_process_privileged ()) @@ -287,7 +288,9 @@ _ostree_sysroot_enter_mount_namespace (OstreeSysroot *self, GCancellable *cancel return TRUE; // If the mount namespaces are the same, we need to unshare(). - if (is_in_root_mount_namespace (cancellable, error)) + gboolean in_root; + g_return_val_if_fail (_ostree_in_root_mount_namespace (&in_root, error), FALSE); + if (in_root) { if (unshare (CLONE_NEWNS) < 0) return glnx_throw_errno_prefix (error, "Failed to invoke unshare(CLONE_NEWNS)"); @@ -296,8 +299,6 @@ _ostree_sysroot_enter_mount_namespace (OstreeSysroot *self, GCancellable *cancel if (mount (NULL, "/", NULL, MS_PRIVATE | MS_REC | MS_SILENT, NULL) < 0) return glnx_throw_errno_prefix (error, "Failed to set the mount propagation to private"); } - else - g_return_val_if_fail (error == NULL || *error == NULL, FALSE); ostree_sysroot_set_mount_namespace_in_use (self); @@ -334,7 +335,7 @@ ostree_sysroot_initialize_with_mount_namespace (OstreeSysroot *self, GCancellabl if (!ostree_sysroot_initialize (self, error)) return FALSE; - return _ostree_sysroot_enter_mount_namespace (self, cancellable, error); + return _ostree_sysroot_enter_mount_namespace (self, error); } /** @@ -400,20 +401,33 @@ remount_writable (const char *path, gboolean *did_remount, GError **error) } static gboolean -is_sysroot_invisible (void) +_ostree_sysroot_invisible (gboolean *out_val, GError **error) { - struct stat stbuf; + gboolean exists; - if (lstat (OTCORE_RUN_OSTREE_PRIVATE "/sysroot-ns", &stbuf) < 0) + if (!ot_path_exists (OTCORE_RUN_OSTREE_PRIVATE "/sysroot-ns", &exists, error)) return FALSE; - if (lstat ("/sysroot/ostree", &stbuf) == 0) + if (!exists) + { + *out_val = FALSE; + return TRUE; + } + + if (!ot_path_exists ("/sysroot/ostree", &exists, error)) return FALSE; + if (exists) + { + *out_val = FALSE; + return TRUE; + } + + *out_val = TRUE; return TRUE; } -/* Unmount covering tmpfs to make /sysroot visible */ +/* Make /sysroot visible */ gboolean _ostree_sysroot_ensure_visible (OstreeSysroot *self, GError **error) { @@ -430,8 +444,10 @@ _ostree_sysroot_ensure_visible (OstreeSysroot *self, GError **error) if (!self->root_is_ostree_booted) return TRUE; + gboolean invisible; + g_return_val_if_fail (_ostree_sysroot_invisible (&invisible, error), FALSE); /* Handle invisible sysroot */ - if (is_sysroot_invisible ()) + if (invisible) { glnx_autofd int sysroot_ns_fd = open (OTCORE_RUN_OSTREE_PRIVATE "/sysroot-ns", O_RDONLY); if (sysroot_ns_fd < 0) @@ -1158,9 +1174,11 @@ ostree_sysroot_initialize (OstreeSysroot *self, GError **error) return TRUE; } - if (is_sysroot_invisible ()) + gboolean invisible; + g_return_val_if_fail (_ostree_sysroot_invisible (&invisible, error), FALSE); + if (invisible) { - if (!_ostree_sysroot_enter_mount_namespace (self, NULL, error)) + if (!_ostree_sysroot_enter_mount_namespace (self, error)) return FALSE; if (!_ostree_sysroot_ensure_visible (self, error)) return FALSE; diff --git a/src/libotutil/ot-fs-utils.c b/src/libotutil/ot-fs-utils.c index 1e961a98..31097a5b 100644 --- a/src/libotutil/ot-fs-utils.c +++ b/src/libotutil/ot-fs-utils.c @@ -277,3 +277,26 @@ ot_get_dir_size (int dfd, const char *path, guint64 blocksize, guint64 *out_size return TRUE; } + +/* Check whether a path exists */ +gboolean +ot_path_exists (const char *path, gboolean *out_val, GError **error) +{ + g_autoptr (GError) local_error = NULL; + + struct stat stbuf; + if (glnx_fstatat (AT_FDCWD, path, &stbuf, 0, &local_error)) + { + *out_val = TRUE; + return TRUE; + } + + if (g_error_matches (local_error, G_IO_ERROR, G_IO_ERROR_NOT_FOUND)) + { + *out_val = FALSE; + return TRUE; + } + + g_propagate_error (error, local_error); + return FALSE; +} diff --git a/src/libotutil/ot-fs-utils.h b/src/libotutil/ot-fs-utils.h index 7df79ba2..67198817 100644 --- a/src/libotutil/ot-fs-utils.h +++ b/src/libotutil/ot-fs-utils.h @@ -78,4 +78,6 @@ gboolean ot_parse_file_by_line (const char *path, gboolean (*cb) (const char *, gboolean ot_get_dir_size (int dfd, const char *path, guint64 blocksize, guint64 *out_size, GCancellable *cancellable, GError **error); +gboolean ot_path_exists (const char *path, gboolean *out_val, GError **error); + G_END_DECLS From 646cc34a3f54ae7a8f7e9a5ee30281ead8e7aaac Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Thu, 19 Dec 2024 15:14:41 +0800 Subject: [PATCH 18/27] sysroot: rework _ostree_sysroot_ensure_visible --- src/libostree/ostree-sysroot.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/libostree/ostree-sysroot.c b/src/libostree/ostree-sysroot.c index bdd1e583..0a8aa914 100644 --- a/src/libostree/ostree-sysroot.c +++ b/src/libostree/ostree-sysroot.c @@ -449,27 +449,27 @@ _ostree_sysroot_ensure_visible (OstreeSysroot *self, GError **error) /* Handle invisible sysroot */ if (invisible) { - glnx_autofd int sysroot_ns_fd = open (OTCORE_RUN_OSTREE_PRIVATE "/sysroot-ns", O_RDONLY); - if (sysroot_ns_fd < 0) + glnx_autofd int sysroot_ns_fd = -1; + if (!glnx_openat_rdonly (AT_FDCWD, OTCORE_RUN_OSTREE_PRIVATE "/sysroot-ns", TRUE, &sysroot_ns_fd, error)) return FALSE; g_autofree char *cur_ns = g_strdup_printf ("/proc/%d/ns/mnt", gettid ()); - glnx_autofd int cur_ns_fd = open(cur_ns, O_RDONLY); - if (cur_ns_fd < 0) + glnx_autofd int cur_ns_fd = -1; + if (!glnx_openat_rdonly (AT_FDCWD, cur_ns, TRUE, &cur_ns_fd, error)) return FALSE; if (setns (sysroot_ns_fd, CLONE_NEWNS) < 0) - return FALSE; + return glnx_throw_errno_prefix (error, "setns"); - glnx_autofd int tree_fd = open_tree (AT_FDCWD, "/", OPEN_TREE_CLONE); + glnx_autofd int tree_fd = (int)syscall (SYS_open_tree, AT_FDCWD, "/", 1 /* OPEN_TREE_CLONE */ | O_CLOEXEC); if (tree_fd < 0) - return FALSE; + return glnx_throw_errno_prefix (error, "open_tree"); if (setns (cur_ns_fd, CLONE_NEWNS) < 0) abort (); // it's unsafe to continue if we cannot switch back - if (move_mount (tree_fd, "", AT_FDCWD, "/sysroot", MOVE_MOUNT_F_EMPTY_PATH) < 0) - return FALSE; + if (syscall (SYS_move_mount, tree_fd, "", AT_FDCWD, "/sysroot", 4 /* MOVE_MOUNT_F_EMPTY_PATH */) < 0) + return glnx_throw_errno_prefix (error, "move_mount"); } /* Now close and reopen our file descriptors */ From 96e2fb8717eadf187c8f7c8b749ff7578bce8392 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Sat, 21 Dec 2024 02:43:40 +0800 Subject: [PATCH 19/27] sysroot: Do not require enter mount namespace for ensure_visible and ensure_writable --- src/libostree/ostree-sysroot-private.h | 11 -- src/libostree/ostree-sysroot.c | 220 ++++++++++++++++--------- src/libotutil/ot-fs-utils.c | 4 +- src/libotutil/ot-fs-utils.h | 2 +- src/ostree/ot-admin-builtin-nsenter.c | 4 +- src/ostree/ot-main.c | 11 +- src/ostree/ot-main.h | 1 + 7 files changed, 160 insertions(+), 93 deletions(-) diff --git a/src/libostree/ostree-sysroot-private.h b/src/libostree/ostree-sysroot-private.h index 4cf8414a..bc241684 100644 --- a/src/libostree/ostree-sysroot-private.h +++ b/src/libostree/ostree-sysroot-private.h @@ -69,12 +69,6 @@ struct OstreeSysroot GLnxLockFile lock; OstreeSysrootLoadState loadstate; - /* - * XXX: It's very bad that mount namespaces are per thread, not per process. - * In a multi-threading environment, it's troublesome to ensure current thread is always in the ns. - * So, do not use OstreeSysroot from another thread if you want mount namespace. - */ - gboolean mount_namespace_in_use; /* TRUE if caller has told us they used CLONE_NEWNS */ gboolean root_is_ostree_booted; /* TRUE if sysroot is / and we are booted via ostree */ /* The device/inode for / and /etc, used to detect booted deployment */ dev_t root_device; @@ -119,13 +113,8 @@ struct OstreeSysroot // Relative to /boot, consumed by ostree-boot-complete.service #define _OSTREE_FINALIZE_STAGED_FAILURE_PATH "ostree/finalize-failure.stamp" -gboolean _ostree_sysroot_ensure_visible (OstreeSysroot *self, GError **error); - gboolean _ostree_sysroot_ensure_writable (OstreeSysroot *self, GError **error); -gboolean -_ostree_sysroot_enter_mount_namespace (OstreeSysroot *self, GError **error); - void _ostree_sysroot_emit_journal_msg (OstreeSysroot *self, const char *msg); gboolean _ostree_sysroot_read_boot_loader_configs (OstreeSysroot *self, int bootversion, diff --git a/src/libostree/ostree-sysroot.c b/src/libostree/ostree-sysroot.c index 0a8aa914..8723a44b 100644 --- a/src/libostree/ostree-sysroot.c +++ b/src/libostree/ostree-sysroot.c @@ -262,6 +262,8 @@ _ostree_in_root_mount_namespace (gboolean *out_val, GError **error) * If you invoke this function, it must be before ostree_sysroot_load(); it may * be invoked before or after ostree_sysroot_initialize(). * + * This is function is now a stub. + * * Since: 2020.1 */ void @@ -273,23 +275,57 @@ ostree_sysroot_set_mount_namespace_in_use (OstreeSysroot *self) gboolean in_root; g_autoptr (GError) local_error = NULL; g_assert (_ostree_in_root_mount_namespace (&in_root, &local_error) && !in_root); - self->mount_namespace_in_use = TRUE; } +static gboolean +ensure_sysroot_fd (OstreeSysroot *self, GError **error); + gboolean +_ostree_sysroot_ensure_boot_fd (OstreeSysroot *self, GError **error); + +static gboolean +_ostree_sysroot_invisible (const OstreeSysroot *self, gboolean *out_val, GError **error); + +static gboolean _ostree_sysroot_enter_mount_namespace (OstreeSysroot *self, GError **error) { - /* Do nothing if we're not privileged */ - if (!ot_util_process_privileged ()) - return TRUE; - /* We also assume operating on non-booted roots won't have a readonly sysroot */ if (!self->root_is_ostree_booted) return TRUE; - // If the mount namespaces are the same, we need to unshare(). gboolean in_root; - g_return_val_if_fail (_ostree_in_root_mount_namespace (&in_root, error), FALSE); + if (!_ostree_in_root_mount_namespace (&in_root, error)) + return FALSE; + + /* Backup tree fd of sysroot_fd and boot_fd */ + glnx_autofd int sysroot_tree_fd = -1; + if ((sysroot_tree_fd = (int)syscall (SYS_open_tree, self->sysroot_fd, "", 1 /* OPEN_TREE_CLONE */ | O_CLOEXEC | AT_EMPTY_PATH)) < 0) + { + if (errno == EINVAL) + { + /* This means sysroot_fd is already a fd obtained by open_tree */ + sysroot_tree_fd = g_steal_fd (&self->sysroot_fd); + } + else + return glnx_throw_errno_prefix (error, "open_tree"); + } + + glnx_autofd int boot_tree_fd = -1; + if (self->boot_fd >= 0) + { + if ((boot_tree_fd = (int)syscall (SYS_open_tree, self->boot_fd, "", 1 /* OPEN_TREE_CLONE */ | O_CLOEXEC | AT_EMPTY_PATH)) < 0) + { + if (errno == EINVAL) + { + /* This means boot_fd is already a fd obtained by open_tree */ + boot_tree_fd = g_steal_fd (&self->boot_fd); + } + else + return glnx_throw_errno_prefix (error, "open_tree"); + } + } + + // If the mount namespaces are the same, we need to unshare(). if (in_root) { if (unshare (CLONE_NEWNS) < 0) @@ -300,7 +336,42 @@ _ostree_sysroot_enter_mount_namespace (OstreeSysroot *self, GError **error) return glnx_throw_errno_prefix (error, "Failed to set the mount propagation to private"); } - ostree_sysroot_set_mount_namespace_in_use (self); + /* Mount sysroot and boot back */ + ostree_sysroot_unload (self); + if (!ensure_sysroot_fd (self, error)) + return FALSE; + + gboolean invisible; + if (!_ostree_sysroot_invisible (self, &invisible, error)) + return FALSE; + + if (invisible) + { + glnx_autofd int old_sysroot_fd = g_steal_fd (&self->sysroot_fd); + + if (syscall (SYS_move_mount, sysroot_tree_fd, "", old_sysroot_fd, "sysroot", 4 /* MOVE_MOUNT_F_EMPTY_PATH */) < 0) + return glnx_throw_errno_prefix (error, "move_mount"); + + if (!glnx_opendirat (old_sysroot_fd, "sysroot", TRUE, &self->sysroot_fd, error)) + return FALSE; + + if (boot_tree_fd >= 0) + { + if (syscall (SYS_move_mount, boot_tree_fd, "", old_sysroot_fd, "boot", 4 /* MOVE_MOUNT_F_EMPTY_PATH */) < 0) + return glnx_throw_errno_prefix (error, "move_mount"); + + if (!glnx_opendirat (old_sysroot_fd, "boot", TRUE, &self->boot_fd, error)) + return FALSE; + } + } + else + { + if (boot_tree_fd >= 0) + { + if (!_ostree_sysroot_ensure_boot_fd (self, error)) + return FALSE; + } + } return TRUE; } @@ -401,23 +472,27 @@ remount_writable (const char *path, gboolean *did_remount, GError **error) } static gboolean -_ostree_sysroot_invisible (gboolean *out_val, GError **error) +_ostree_sysroot_invisible (const OstreeSysroot *self, gboolean *out_val, GError **error) { gboolean exists; - if (!ot_path_exists (OTCORE_RUN_OSTREE_PRIVATE "/sysroot-ns", &exists, error)) + g_assert (self->sysroot_fd >= 0); + g_assert (self->root_is_ostree_booted); + + if (!ot_path_exists (self->sysroot_fd, "sysroot/ostree", &exists, error)) return FALSE; - if (!exists) + if (exists) { *out_val = FALSE; return TRUE; } - if (!ot_path_exists ("/sysroot/ostree", &exists, error)) + // root_is_ostree_booted is true so we can use AT_FDCWD here + if (!ot_path_exists (AT_FDCWD, OTCORE_RUN_OSTREE_PRIVATE "/sysroot-ns", &exists, error)) return FALSE; - if (exists) + if (!exists) { *out_val = FALSE; return TRUE; @@ -428,55 +503,47 @@ _ostree_sysroot_invisible (gboolean *out_val, GError **error) } /* Make /sysroot visible */ -gboolean +static gboolean _ostree_sysroot_ensure_visible (OstreeSysroot *self, GError **error) { - if (!ostree_sysroot_initialize (self, error)) + gboolean invisible; + if (!_ostree_sysroot_invisible (self, &invisible, error)) return FALSE; - /* Do nothing if no mount namespace is in use */ - if (!self->mount_namespace_in_use) + if (!invisible) return TRUE; - /* If we aren't operating on a booted system, then we don't - * do anything with mounts. + /* Boot may reside on the original sysroot. + * To prevent from losing it, try ensuring it now. */ - if (!self->root_is_ostree_booted) - return TRUE; - - gboolean invisible; - g_return_val_if_fail (_ostree_sysroot_invisible (&invisible, error), FALSE); - /* Handle invisible sysroot */ - if (invisible) + if (!_ostree_sysroot_ensure_boot_fd (self, error)) { - glnx_autofd int sysroot_ns_fd = -1; - if (!glnx_openat_rdonly (AT_FDCWD, OTCORE_RUN_OSTREE_PRIVATE "/sysroot-ns", TRUE, &sysroot_ns_fd, error)) - return FALSE; - - g_autofree char *cur_ns = g_strdup_printf ("/proc/%d/ns/mnt", gettid ()); - glnx_autofd int cur_ns_fd = -1; - if (!glnx_openat_rdonly (AT_FDCWD, cur_ns, TRUE, &cur_ns_fd, error)) - return FALSE; - - if (setns (sysroot_ns_fd, CLONE_NEWNS) < 0) - return glnx_throw_errno_prefix (error, "setns"); - - glnx_autofd int tree_fd = (int)syscall (SYS_open_tree, AT_FDCWD, "/", 1 /* OPEN_TREE_CLONE */ | O_CLOEXEC); - if (tree_fd < 0) - return glnx_throw_errno_prefix (error, "open_tree"); - - if (setns (cur_ns_fd, CLONE_NEWNS) < 0) - abort (); // it's unsafe to continue if we cannot switch back - - if (syscall (SYS_move_mount, tree_fd, "", AT_FDCWD, "/sysroot", 4 /* MOVE_MOUNT_F_EMPTY_PATH */) < 0) - return glnx_throw_errno_prefix (error, "move_mount"); + // ignore failure } - /* Now close and reopen our file descriptors */ - ostree_sysroot_unload (self); - if (!ensure_sysroot_fd (self, error)) + glnx_autofd int sysroot_ns_fd = -1; + if (!glnx_openat_rdonly (AT_FDCWD, OTCORE_RUN_OSTREE_PRIVATE "/sysroot-ns", TRUE, &sysroot_ns_fd, error)) return FALSE; + g_autofree char *cur_ns = g_strdup_printf ("/proc/%d/ns/mnt", gettid ()); + glnx_autofd int cur_ns_fd = -1; + if (!glnx_openat_rdonly (AT_FDCWD, cur_ns, TRUE, &cur_ns_fd, error)) + return FALSE; + + /* Because namespace is per-thread, there is no race here */ + if (setns (sysroot_ns_fd, CLONE_NEWNS) < 0) + return glnx_throw_errno_prefix (error, "setns"); + + glnx_autofd int tree_fd = (int)syscall (SYS_open_tree, AT_FDCWD, "/", 1 /* OPEN_TREE_CLONE */ | O_CLOEXEC); + if (tree_fd < 0) + return glnx_throw_errno_prefix (error, "open_tree"); + + if (setns (cur_ns_fd, CLONE_NEWNS) < 0) + return glnx_throw_errno_prefix (error, "setns"); + + glnx_close_fd (&self->sysroot_fd); + self->sysroot_fd = g_steal_fd (&tree_fd); + return TRUE; } @@ -484,13 +551,9 @@ _ostree_sysroot_ensure_visible (OstreeSysroot *self, GError **error) gboolean _ostree_sysroot_ensure_writable (OstreeSysroot *self, GError **error) { - if (!_ostree_sysroot_ensure_visible (self, error)) + if (!ostree_sysroot_initialize (self, error)) return FALSE; - /* Do nothing if no mount namespace is in use */ - if (!self->mount_namespace_in_use) - return TRUE; - /* If we aren't operating on a booted system, then we don't * do anything with mounts. */ @@ -501,20 +564,34 @@ _ostree_sysroot_ensure_writable (OstreeSysroot *self, GError **error) if (!_ostree_sysroot_ensure_boot_fd (self, error)) return FALSE; - gboolean did_remount_sysroot = FALSE; - if (!remount_writable ("/sysroot", &did_remount_sysroot, error)) - return FALSE; - gboolean did_remount_boot = FALSE; - if (!remount_writable ("/boot", &did_remount_boot, error)) + glnx_autofd int cur_ns_fd = -1; + g_autofree char *cur_ns = g_strdup_printf ("/proc/%d/ns/mnt", gettid ()); + if (!glnx_openat_rdonly (AT_FDCWD, cur_ns, TRUE, &cur_ns_fd, error)) + return FALSE; + + if (!_ostree_sysroot_enter_mount_namespace (self, error)) return FALSE; - /* Now close and reopen our file descriptors */ ostree_sysroot_unload (self); + + const char *path = gs_file_get_path_cached (self->path); + g_autofree char *sysroot_path = g_strdup_printf ("%s/sysroot", path); + gboolean did_remount_sysroot = FALSE; + if (!remount_writable (sysroot_path, &did_remount_sysroot, error)) + return FALSE; + g_autofree char *boot_path = g_strdup_printf ("%s/boot", path); + gboolean did_remount_boot = FALSE; + if (!remount_writable (boot_path, &did_remount_boot, error)) + return FALSE; + if (!ensure_sysroot_fd (self, error)) return FALSE; if (!_ostree_sysroot_ensure_boot_fd (self, error)) return FALSE; + if (setns (cur_ns_fd, CLONE_NEWNS) < 0) + return glnx_throw_errno_prefix (error, "setns"); + return TRUE; } @@ -1167,21 +1244,14 @@ ostree_sysroot_initialize (OstreeSysroot *self, GError **error) self->root_is_ostree_booted = (ostree_booted && root_is_sysroot); g_debug ("root_is_ostree_booted: %d", self->root_is_ostree_booted); - self->loadstate = OSTREE_SYSROOT_LOAD_STATE_INIT; - } - else - { - return TRUE; - } - gboolean invisible; - g_return_val_if_fail (_ostree_sysroot_invisible (&invisible, error), FALSE); - if (invisible) - { - if (!_ostree_sysroot_enter_mount_namespace (self, error)) - return FALSE; - if (!_ostree_sysroot_ensure_visible (self, error)) - return FALSE; + if (self->root_is_ostree_booted) + { + if (!_ostree_sysroot_ensure_visible (self, error)) + return FALSE; + } + + self->loadstate = OSTREE_SYSROOT_LOAD_STATE_INIT; } return TRUE; diff --git a/src/libotutil/ot-fs-utils.c b/src/libotutil/ot-fs-utils.c index 31097a5b..f986f8d7 100644 --- a/src/libotutil/ot-fs-utils.c +++ b/src/libotutil/ot-fs-utils.c @@ -280,12 +280,12 @@ ot_get_dir_size (int dfd, const char *path, guint64 blocksize, guint64 *out_size /* Check whether a path exists */ gboolean -ot_path_exists (const char *path, gboolean *out_val, GError **error) +ot_path_exists (int dfd, const char *path, gboolean *out_val, GError **error) { g_autoptr (GError) local_error = NULL; struct stat stbuf; - if (glnx_fstatat (AT_FDCWD, path, &stbuf, 0, &local_error)) + if (glnx_fstatat (dfd, path, &stbuf, 0, &local_error)) { *out_val = TRUE; return TRUE; diff --git a/src/libotutil/ot-fs-utils.h b/src/libotutil/ot-fs-utils.h index 67198817..52f0f621 100644 --- a/src/libotutil/ot-fs-utils.h +++ b/src/libotutil/ot-fs-utils.h @@ -78,6 +78,6 @@ gboolean ot_parse_file_by_line (const char *path, gboolean (*cb) (const char *, gboolean ot_get_dir_size (int dfd, const char *path, guint64 blocksize, guint64 *out_size, GCancellable *cancellable, GError **error); -gboolean ot_path_exists (const char *path, gboolean *out_val, GError **error); +gboolean ot_path_exists (int dfd, const char *path, gboolean *out_val, GError **error); G_END_DECLS diff --git a/src/ostree/ot-admin-builtin-nsenter.c b/src/ostree/ot-admin-builtin-nsenter.c index fab7964e..bf6a1477 100644 --- a/src/ostree/ot-admin-builtin-nsenter.c +++ b/src/ostree/ot-admin-builtin-nsenter.c @@ -65,8 +65,8 @@ ot_admin_builtin_nsenter (int argc, char **argv, OstreeCommandInvocation *invoca } if (!ostree_admin_option_context_parse (context, options, &argc, &argv, - OSTREE_ADMIN_BUILTIN_FLAG_UNLOCKED, invocation, &sysroot, - cancellable, error)) + OSTREE_ADMIN_BUILTIN_FLAG_UNLOCKED | OSTREE_ADMIN_BUILTIN_FLAG_ENTER_NS, + invocation, &sysroot, cancellable, error)) return FALSE; argc = new_argc; diff --git a/src/ostree/ot-main.c b/src/ostree/ot-main.c index d47a59ca..e69b4f74 100644 --- a/src/ostree/ot-main.c +++ b/src/ostree/ot-main.c @@ -559,12 +559,19 @@ gboolean ostree_admin_sysroot_load (OstreeSysroot *sysroot, OstreeAdminBuiltinFlags flags, GCancellable *cancellable, GError **error) { - if ((flags & OSTREE_ADMIN_BUILTIN_FLAG_UNLOCKED) == 0) + if (flags & OSTREE_ADMIN_BUILTIN_FLAG_ENTER_NS) { - /* Set up the mount namespace, if applicable */ if (!ostree_sysroot_initialize_with_mount_namespace (sysroot, cancellable, error)) return FALSE; + } + else + { + if (!ostree_sysroot_initialize (sysroot, error)) + return FALSE; + } + if ((flags & OSTREE_ADMIN_BUILTIN_FLAG_UNLOCKED) == 0) + { /* Released when sysroot is finalized, or on process exit */ if (!ot_admin_sysroot_lock (sysroot, error)) return FALSE; diff --git a/src/ostree/ot-main.h b/src/ostree/ot-main.h index 8df1ca8e..e4cbc242 100644 --- a/src/ostree/ot-main.h +++ b/src/ostree/ot-main.h @@ -39,6 +39,7 @@ typedef enum OSTREE_ADMIN_BUILTIN_FLAG_UNLOCKED = (1 << 1), OSTREE_ADMIN_BUILTIN_FLAG_NO_SYSROOT = (1 << 2), OSTREE_ADMIN_BUILTIN_FLAG_NO_LOAD = (1 << 3), + OSTREE_ADMIN_BUILTIN_FLAG_ENTER_NS = (1 << 4), } OstreeAdminBuiltinFlags; typedef struct OstreeCommandInvocation OstreeCommandInvocation; From 01de3ea1e284df9d51df98c445db6ffa44a89215 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Sat, 21 Dec 2024 02:46:00 +0800 Subject: [PATCH 20/27] nsenter: Fix argument parsing --- src/ostree/ot-admin-builtin-nsenter.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/ostree/ot-admin-builtin-nsenter.c b/src/ostree/ot-admin-builtin-nsenter.c index bf6a1477..a6859d67 100644 --- a/src/ostree/ot-admin-builtin-nsenter.c +++ b/src/ostree/ot-admin-builtin-nsenter.c @@ -49,14 +49,14 @@ ot_admin_builtin_nsenter (int argc, char **argv, OstreeCommandInvocation *invoca context = g_option_context_new ("[PROGRAM [ARGUMENTS...]]"); - int new_argc = argc; - char **new_argv = argv; + int new_argc = 0; + char **new_argv = NULL; for (int i = 1; i < argc; i++) { if (g_str_equal (argv[i], "--")) { - new_argc -= i; + new_argc = argc - i; argc = i; new_argv = argv + i; argv[i] = NULL; @@ -69,8 +69,11 @@ ot_admin_builtin_nsenter (int argc, char **argv, OstreeCommandInvocation *invoca invocation, &sysroot, cancellable, error)) return FALSE; - argc = new_argc; - argv = new_argv; + if (new_argv) + { + argc = new_argc; + argv = new_argv; + } if (argc <= 1) { arguments = g_malloc_n (2, sizeof (char *)); From 627b4f88dff523b788b260adb8eeeb1a7905a083 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Sat, 21 Dec 2024 03:05:24 +0800 Subject: [PATCH 21/27] prepare-root: Tidy code for root_upperdir and root_workdir --- src/switchroot/ostree-prepare-root.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/switchroot/ostree-prepare-root.c b/src/switchroot/ostree-prepare-root.c index 81cea5f6..dbce3d72 100644 --- a/src/switchroot/ostree-prepare-root.c +++ b/src/switchroot/ostree-prepare-root.c @@ -444,16 +444,16 @@ main (int argc, char *argv[]) g_autofree char *expected_digest = NULL; - // For now we just stick the transient root on the default /run tmpfs; - // however, see - // https://github.com/systemd/systemd/blob/604b2001081adcbd64ee1fbe7de7a6d77c5209fe/src/basic/mountpoint-util.h#L36 - // which bumps up these defaults for the rootfs a bit. - const char *root_upperdir = OTCORE_RUN_OSTREE_PRIVATE "/root/upper"; - const char *root_workdir = OTCORE_RUN_OSTREE_PRIVATE "/root/work"; - // Propagate these options for transient root, if provided if (root_transient) { + // For now we just stick the transient root on the default /run tmpfs; + // however, see + // https://github.com/systemd/systemd/blob/604b2001081adcbd64ee1fbe7de7a6d77c5209fe/src/basic/mountpoint-util.h#L36 + // which bumps up these defaults for the rootfs a bit. + const char *root_upperdir = OTCORE_RUN_OSTREE_PRIVATE "/root/upper"; + const char *root_workdir = OTCORE_RUN_OSTREE_PRIVATE "/root/work"; + if (!glnx_shutil_mkdir_p_at (AT_FDCWD, root_upperdir, 0755, NULL, &error)) errx (EXIT_FAILURE, "Failed to create %s: %s", root_upperdir, error->message); if (!glnx_shutil_mkdir_p_at (AT_FDCWD, root_workdir, 0700, NULL, &error)) From c7caee90938f56e78d882466ecc30abdeb729fc2 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Sat, 21 Dec 2024 03:12:21 +0800 Subject: [PATCH 22/27] sysroot: Use glnx_fstatat_allow_noent, drop ot_path_exists --- src/libostree/ostree-sysroot.c | 10 ++++------ src/libotutil/ot-fs-utils.c | 23 ----------------------- src/libotutil/ot-fs-utils.h | 2 -- 3 files changed, 4 insertions(+), 31 deletions(-) diff --git a/src/libostree/ostree-sysroot.c b/src/libostree/ostree-sysroot.c index 8723a44b..27ecdb61 100644 --- a/src/libostree/ostree-sysroot.c +++ b/src/libostree/ostree-sysroot.c @@ -474,25 +474,23 @@ remount_writable (const char *path, gboolean *did_remount, GError **error) static gboolean _ostree_sysroot_invisible (const OstreeSysroot *self, gboolean *out_val, GError **error) { - gboolean exists; - g_assert (self->sysroot_fd >= 0); g_assert (self->root_is_ostree_booted); - if (!ot_path_exists (self->sysroot_fd, "sysroot/ostree", &exists, error)) + if (!glnx_fstatat_allow_noent (self->sysroot_fd, "sysroot/ostree", NULL, 0, error)) return FALSE; - if (exists) + if (errno == 0) { *out_val = FALSE; return TRUE; } // root_is_ostree_booted is true so we can use AT_FDCWD here - if (!ot_path_exists (AT_FDCWD, OTCORE_RUN_OSTREE_PRIVATE "/sysroot-ns", &exists, error)) + if (!glnx_fstatat_allow_noent (AT_FDCWD, OTCORE_RUN_OSTREE_PRIVATE "/sysroot-ns", NULL, 0, error)) return FALSE; - if (!exists) + if (errno != 0) { *out_val = FALSE; return TRUE; diff --git a/src/libotutil/ot-fs-utils.c b/src/libotutil/ot-fs-utils.c index f986f8d7..1e961a98 100644 --- a/src/libotutil/ot-fs-utils.c +++ b/src/libotutil/ot-fs-utils.c @@ -277,26 +277,3 @@ ot_get_dir_size (int dfd, const char *path, guint64 blocksize, guint64 *out_size return TRUE; } - -/* Check whether a path exists */ -gboolean -ot_path_exists (int dfd, const char *path, gboolean *out_val, GError **error) -{ - g_autoptr (GError) local_error = NULL; - - struct stat stbuf; - if (glnx_fstatat (dfd, path, &stbuf, 0, &local_error)) - { - *out_val = TRUE; - return TRUE; - } - - if (g_error_matches (local_error, G_IO_ERROR, G_IO_ERROR_NOT_FOUND)) - { - *out_val = FALSE; - return TRUE; - } - - g_propagate_error (error, local_error); - return FALSE; -} diff --git a/src/libotutil/ot-fs-utils.h b/src/libotutil/ot-fs-utils.h index 52f0f621..7df79ba2 100644 --- a/src/libotutil/ot-fs-utils.h +++ b/src/libotutil/ot-fs-utils.h @@ -78,6 +78,4 @@ gboolean ot_parse_file_by_line (const char *path, gboolean (*cb) (const char *, gboolean ot_get_dir_size (int dfd, const char *path, guint64 blocksize, guint64 *out_size, GCancellable *cancellable, GError **error); -gboolean ot_path_exists (int dfd, const char *path, gboolean *out_val, GError **error); - G_END_DECLS From 124035b36b19b9018a99dabd659da83fd9647b40 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Sat, 21 Dec 2024 03:14:36 +0800 Subject: [PATCH 23/27] Revert "prepare-root: Make leftover /sysroot immutable" This reverts commit 1d4dc03de8b052f7aaf99d3508f553325398b92e. --- Makefile-switchroot.am | 3 +-- src/switchroot/ostree-prepare-root.c | 17 ----------------- 2 files changed, 1 insertion(+), 19 deletions(-) diff --git a/Makefile-switchroot.am b/Makefile-switchroot.am index a1d70f7d..9a1d4137 100644 --- a/Makefile-switchroot.am +++ b/Makefile-switchroot.am @@ -50,8 +50,7 @@ CLEANFILES += ostree-prepare-root else ostree_boot_PROGRAMS += ostree-prepare-root ostree_prepare_root_CFLAGS += $(AM_CFLAGS) -Isrc/switchroot -I$(srcdir)/src/libostree -I$(srcdir)/src/libotcore -I$(srcdir)/src/libotutil -ostree_prepare_root_SOURCES += src/switchroot/ostree-prepare-root.c \ - src/libostree/ostree-linuxfsutil.c +ostree_prepare_root_SOURCES += src/switchroot/ostree-prepare-root.c ostree_prepare_root_CPPFLAGS += $(OT_INTERNAL_GIO_UNIX_CFLAGS) $(OT_DEP_CRYPTO_CFLAGS) -I $(srcdir)/libglnx ostree_prepare_root_LDADD += $(AM_LDFLAGS) $(OT_INTERNAL_GIO_UNIX_LIBS) $(OT_DEP_CRYPTO_LIBS) libotcore.la libotutil.la libglnx.la endif # BUILDOPT_USE_STATIC_COMPILER diff --git a/src/switchroot/ostree-prepare-root.c b/src/switchroot/ostree-prepare-root.c index dbce3d72..c9880edc 100644 --- a/src/switchroot/ostree-prepare-root.c +++ b/src/switchroot/ostree-prepare-root.c @@ -98,7 +98,6 @@ #include #endif -#include "ostree-linuxfsutil.h" #include "ostree-mount-util.h" static bool @@ -785,22 +784,6 @@ main (int argc, char *argv[]) /* Unmount /sysroot */ if (umount2 ("sysroot", MNT_DETACH) < 0) err (EXIT_FAILURE, "failed to unmount /sysroot"); - - /* Attempt to make the leftover empty /sysroot immutable. - * This is to prevent accidental modification when root.transient is enabled. - */ - do - { - g_autoptr (GError) local_error = NULL; - glnx_autofd int fd = -1; - if (!glnx_opendirat (AT_FDCWD, "sysroot", TRUE, &fd, &local_error)) - err (EXIT_FAILURE, "failed to open /sysroot"); - /* It's funny that we need to first touch it to move it to upper layer */ - if (futimens (fd, NULL) < 0) - break; - if (!_ostree_linuxfs_fd_alter_immutable_flag (fd, TRUE, NULL, &local_error)) - break; - } while (FALSE); } else if (sysroot_readonly) { From 2bd95ea8559d95c6f6bc373043b382d1f3904a6c Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Sat, 21 Dec 2024 03:29:43 +0800 Subject: [PATCH 24/27] sysroot: Fix ostree in ostree nsenter --- src/libostree/ostree-sysroot.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/libostree/ostree-sysroot.c b/src/libostree/ostree-sysroot.c index 27ecdb61..98bfae43 100644 --- a/src/libostree/ostree-sysroot.c +++ b/src/libostree/ostree-sysroot.c @@ -529,6 +529,9 @@ _ostree_sysroot_ensure_visible (OstreeSysroot *self, GError **error) return FALSE; /* Because namespace is per-thread, there is no race here */ + if (unshare (CLONE_NEWNS) < 0) + return glnx_throw_errno_prefix (error, "unshare"); + if (setns (sysroot_ns_fd, CLONE_NEWNS) < 0) return glnx_throw_errno_prefix (error, "setns"); @@ -562,11 +565,18 @@ _ostree_sysroot_ensure_writable (OstreeSysroot *self, GError **error) if (!_ostree_sysroot_ensure_boot_fd (self, error)) return FALSE; - glnx_autofd int cur_ns_fd = -1; - g_autofree char *cur_ns = g_strdup_printf ("/proc/%d/ns/mnt", gettid ()); - if (!glnx_openat_rdonly (AT_FDCWD, cur_ns, TRUE, &cur_ns_fd, error)) + gboolean in_root; + if (!_ostree_in_root_mount_namespace (&in_root, error)) return FALSE; + glnx_autofd int cur_ns_fd = -1; + if (in_root) + { + g_autofree char *cur_ns = g_strdup_printf ("/proc/%d/ns/mnt", gettid ()); + if (!glnx_openat_rdonly (AT_FDCWD, cur_ns, TRUE, &cur_ns_fd, error)) + return FALSE; + } + if (!_ostree_sysroot_enter_mount_namespace (self, error)) return FALSE; @@ -587,8 +597,11 @@ _ostree_sysroot_ensure_writable (OstreeSysroot *self, GError **error) if (!_ostree_sysroot_ensure_boot_fd (self, error)) return FALSE; - if (setns (cur_ns_fd, CLONE_NEWNS) < 0) - return glnx_throw_errno_prefix (error, "setns"); + if (in_root) + { + if (setns (cur_ns_fd, CLONE_NEWNS) < 0) + return glnx_throw_errno_prefix (error, "setns"); + } return TRUE; } From 04d36fafb11a981406fd7650d0f976502e9f2f53 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Sat, 21 Dec 2024 14:14:34 +0800 Subject: [PATCH 25/27] sysroot: Use ostree/repo to check visibility --- src/libostree/ostree-sysroot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libostree/ostree-sysroot.c b/src/libostree/ostree-sysroot.c index 98bfae43..2fa821ce 100644 --- a/src/libostree/ostree-sysroot.c +++ b/src/libostree/ostree-sysroot.c @@ -477,7 +477,7 @@ _ostree_sysroot_invisible (const OstreeSysroot *self, gboolean *out_val, GError g_assert (self->sysroot_fd >= 0); g_assert (self->root_is_ostree_booted); - if (!glnx_fstatat_allow_noent (self->sysroot_fd, "sysroot/ostree", NULL, 0, error)) + if (!glnx_fstatat_allow_noent (self->sysroot_fd, "ostree/repo", NULL, 0, error)) return FALSE; if (errno == 0) From 2960f0a304f48d0503fe94a40bccc25dc8ca89f2 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Sat, 21 Dec 2024 14:31:22 +0800 Subject: [PATCH 26/27] sysroot: Tidy code for _ostree_sysroot_ensure_writable --- src/libostree/ostree-sysroot.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/libostree/ostree-sysroot.c b/src/libostree/ostree-sysroot.c index 2fa821ce..01597c9b 100644 --- a/src/libostree/ostree-sysroot.c +++ b/src/libostree/ostree-sysroot.c @@ -569,14 +569,6 @@ _ostree_sysroot_ensure_writable (OstreeSysroot *self, GError **error) if (!_ostree_in_root_mount_namespace (&in_root, error)) return FALSE; - glnx_autofd int cur_ns_fd = -1; - if (in_root) - { - g_autofree char *cur_ns = g_strdup_printf ("/proc/%d/ns/mnt", gettid ()); - if (!glnx_openat_rdonly (AT_FDCWD, cur_ns, TRUE, &cur_ns_fd, error)) - return FALSE; - } - if (!_ostree_sysroot_enter_mount_namespace (self, error)) return FALSE; @@ -597,9 +589,15 @@ _ostree_sysroot_ensure_writable (OstreeSysroot *self, GError **error) if (!_ostree_sysroot_ensure_boot_fd (self, error)) return FALSE; + /* Switch back */ if (in_root) { - if (setns (cur_ns_fd, CLONE_NEWNS) < 0) + glnx_autofd int root_ns_fd = -1; + + if (!glnx_openat_rdonly (AT_FDCWD, "/proc/1/ns/mnt", TRUE, &root_ns_fd, error)) + return FALSE; + + if (setns (root_ns_fd, CLONE_NEWNS) < 0) return glnx_throw_errno_prefix (error, "setns"); } From e808a69067892628717c56d1db143f9e0ebe98e3 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Sat, 21 Dec 2024 14:51:47 +0800 Subject: [PATCH 27/27] finalize-staged: Add a comment --- src/ostree/ot-admin-builtin-finalize-staged.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ostree/ot-admin-builtin-finalize-staged.c b/src/ostree/ot-admin-builtin-finalize-staged.c index 2ae1674d..456802d9 100644 --- a/src/ostree/ot-admin-builtin-finalize-staged.c +++ b/src/ostree/ot-admin-builtin-finalize-staged.c @@ -64,6 +64,8 @@ ot_admin_builtin_finalize_staged (int argc, char **argv, OstreeCommandInvocation if (opt_hold) { + /* XXX: does this work with invisible sysroot? */ + /* Load the sysroot unlocked so that a separate namespace isn't * created. */ if (!ostree_admin_sysroot_load (