mirror of
https://github.com/ostreedev/ostree.git
synced 2024-12-21 13:34:34 +03:00
Compare commits
32 Commits
cd06b322c1
...
e808a69067
Author | SHA1 | Date | |
---|---|---|---|
|
e808a69067 | ||
|
2960f0a304 | ||
|
04d36fafb1 | ||
|
2bd95ea855 | ||
|
124035b36b | ||
|
c7caee9093 | ||
|
627b4f88df | ||
|
01de3ea1e2 | ||
|
96e2fb8717 | ||
|
646cc34a3f | ||
|
300d21c1cb | ||
|
3dc167e2ee | ||
|
6626f51e13 | ||
|
1d4dc03de8 | ||
|
a5c64da05e | ||
|
52d03d762e | ||
|
fb8a0cec65 | ||
|
bf10b40387 | ||
|
83438a10e6 | ||
|
c5c414a921 | ||
|
b7e46b4e54 | ||
|
a762caeb30 | ||
|
412ec41744 | ||
|
91dda713d7 | ||
|
cb03e4466e | ||
|
bfb84a3154 | ||
|
1db98c0c18 | ||
|
8aaea0c65d | ||
|
45ddf3b798 | ||
|
aca6f17ff8 | ||
|
66f5a77ae6 | ||
|
786b38c2cf |
@ -86,6 +86,7 @@ ostree_SOURCES += \
|
||||
src/ostree/ot-admin-builtin-upgrade.c \
|
||||
src/ostree/ot-admin-builtin-unlock.c \
|
||||
src/ostree/ot-admin-builtin-state-overlay.c \
|
||||
src/ostree/ot-admin-builtin-nsenter.c \
|
||||
src/ostree/ot-admin-builtins.h \
|
||||
src/ostree/ot-admin-instutil-builtin-selinux-ensure-labeled.c \
|
||||
src/ostree/ot-admin-instutil-builtin-set-kargs.c \
|
||||
|
@ -1,7 +1,7 @@
|
||||
AC_PREREQ([2.63])
|
||||
dnl To perform a release, follow the instructions in `docs/CONTRIBUTING.md`.
|
||||
m4_define([year_version], [2024])
|
||||
m4_define([release_version], [10])
|
||||
m4_define([release_version], [11])
|
||||
m4_define([package_version], [year_version.release_version])
|
||||
AC_INIT([libostree], [package_version], [walters@verbum.org])
|
||||
is_release_build=no
|
||||
|
@ -120,20 +120,25 @@ License along with this library. If not, see <https://www.gnu.org/licenses/>.
|
||||
<varlistentry>
|
||||
<term><varname>root.transient</varname></term>
|
||||
<listitem><para>A boolean value; the default is <literal>false</literal>.
|
||||
If this is set to <literal>true</literal>, then the <literal>/</literal> filesystem will be a writable <literal>overlayfs</literal>,
|
||||
with the upper directory being a hidden directory (in the underlying system root filesystem) that will persist across reboots by default.
|
||||
However, changes will <emphasis>be discarded</emphasis> on OS updates!
|
||||
Setting this flag to <literal>true</literal> requires composefs (See <literal>composefs.enabled</literal>).
|
||||
When enabled, the root mount point <literal>/</literal> will be an overlayfs whose contents will be stored
|
||||
in a tmpfs, and hence discarded on OS upgrade or reboot.
|
||||
</para>
|
||||
<para>
|
||||
Enabling this option can be very useful for cases such as packages (dpkg/rpm/etc) that write content into <literal>/opt</literal>,
|
||||
particularly where they expect the target to be writable at runtime. To make that work, ensure that your <literal>/opt</literal>
|
||||
directory is *not* a symlink to <literal>/var/opt</literal>, but is just an empty directory.
|
||||
</para>
|
||||
<para>
|
||||
Note the <literal>/usr</literal> mount point remains read-only by default. This option is independent of <literal>etc.transient</literal> and <literal>sysroot.readonly</literal>;
|
||||
This option is independent of <literal>etc.transient</literal> and <literal>sysroot.readonly</literal>;
|
||||
it is supported for example to have <literal>root.transient=true</literal> but <literal>etc.transient=false</literal> in which case changes to <literal>/etc</literal> continue
|
||||
to persist across updates, with the default OSTree 3-way merge applied.
|
||||
</para></listitem>
|
||||
Also related to persistence it is important to emphasize that <literal>/sysroot</literal> (the physical root filesystem) is still persistent
|
||||
by default; in-place OS upgrades can be applied.
|
||||
</para>
|
||||
<para>
|
||||
Enabling this option can make it significantly easier to adopt an image-based model in some circumstances.
|
||||
For example, if you have a configuration management system that is inspecting machine-specific state and
|
||||
e.g. dynamically installing packages or applying configuration, it can more easily be adapted to
|
||||
run on each boot, while still shifting a portion (or ideally most) image configuration to build time
|
||||
as part of the base image/commit.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
<term><varname>composefs.enabled</varname></term>
|
||||
|
@ -432,7 +432,7 @@ _ostree_bootloader_zipl_post_bls_sync (OstreeBootloader *bootloader, int bootver
|
||||
// This can happen in a unit testing environment; at some point what we want to do here
|
||||
// is move all of the zipl logic to a systemd unit instead that's keyed of
|
||||
// ostree-finalize-staged.service.
|
||||
if (getuid () != 0)
|
||||
if (!ot_util_process_privileged ())
|
||||
return TRUE;
|
||||
|
||||
// If we're in a booted deployment, we don't need to spawn a container.
|
||||
|
@ -126,31 +126,32 @@ require_internal_units (const char *normal_dir, const char *early_dir, const cha
|
||||
#endif
|
||||
}
|
||||
|
||||
// Resolve symlink to return osname
|
||||
static gboolean
|
||||
_ostree_sysroot_parse_bootlink_aboot (const char *bootlink, char **out_osname, GError **error)
|
||||
write_unit_file (int dir_fd, const char *path, GCancellable *cancellable, GError **error, const char *fmt, ...)
|
||||
{
|
||||
static gsize regex_initialized;
|
||||
static GRegex *regex;
|
||||
g_autofree char *symlink_val = glnx_readlinkat_malloc (-1, bootlink, NULL, error);
|
||||
if (!symlink_val)
|
||||
return glnx_prefix_error (error, "Failed to read '%s' symlink", bootlink);
|
||||
|
||||
if (g_once_init_enter (®ex_initialized))
|
||||
{
|
||||
regex = g_regex_new ("^deploy/([^/]+)/", 0, 0, NULL);
|
||||
g_assert (regex);
|
||||
g_once_init_leave (®ex_initialized, 1);
|
||||
}
|
||||
|
||||
g_autoptr (GMatchInfo) match = NULL;
|
||||
if (!g_regex_match (regex, symlink_val, 0, &match))
|
||||
return glnx_throw (error,
|
||||
"Invalid aboot symlink in /ostree, expected symlink to resolve to "
|
||||
"deploy/OSNAME/... instead it resolves to '%s'",
|
||||
symlink_val);
|
||||
|
||||
*out_osname = g_match_info_fetch (match, 1);
|
||||
g_auto (GLnxTmpfile) tmpf = {
|
||||
0,
|
||||
};
|
||||
if (!glnx_open_tmpfile_linkable_at (dir_fd, ".", O_WRONLY | O_CLOEXEC, &tmpf, error))
|
||||
return FALSE;
|
||||
g_autoptr (GOutputStream) outstream = g_unix_output_stream_new (tmpf.fd, FALSE);
|
||||
gsize bytes_written;
|
||||
va_list args;
|
||||
va_start (args, fmt);
|
||||
const gboolean r = g_output_stream_vprintf (outstream, &bytes_written, cancellable, error, fmt, args);
|
||||
va_end (args);
|
||||
if (!r)
|
||||
return FALSE;
|
||||
if (!g_output_stream_flush (outstream, cancellable, error))
|
||||
return FALSE;
|
||||
g_clear_object (&outstream);
|
||||
/* It should be readable */
|
||||
if (!glnx_fchmod (tmpf.fd, 0644, error))
|
||||
return FALSE;
|
||||
/* Error out if somehow it already exists, that'll help us debug conflicts */
|
||||
if (!glnx_link_tmpfile_at (&tmpf, GLNX_LINK_TMPFILE_NOREPLACE, dir_fd, path,
|
||||
error))
|
||||
return FALSE;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
@ -163,22 +164,37 @@ fstab_generator (const char *ostree_target, const bool is_aboot, const char *nor
|
||||
/* Not currently cancellable, but define a var in case we care later */
|
||||
GCancellable *cancellable = NULL;
|
||||
/* Some path constants to avoid typos */
|
||||
static const char fstab_path[] = "/etc/fstab";
|
||||
static const char var_path[] = "/var";
|
||||
const char *fstab_path = "/etc/fstab";
|
||||
const char *var_dst = "/var";
|
||||
const char *var_src = OTCORE_RUN_OSTREE_PRIVATE "/var";
|
||||
|
||||
/* Written by ostree-sysroot-deploy.c. We parse out the stateroot here since we
|
||||
* need to know it to mount /var. Unfortunately we can't easily use the
|
||||
* libostree API to find the booted deployment since /boot might not have been
|
||||
* mounted yet.
|
||||
/* Prepare to write to the output unit dir; we use the "normal" dir
|
||||
* that overrides /usr, but not /etc.
|
||||
*/
|
||||
g_autofree char *stateroot = NULL;
|
||||
if (is_aboot)
|
||||
{
|
||||
if (!_ostree_sysroot_parse_bootlink_aboot (ostree_target, &stateroot, error))
|
||||
return glnx_prefix_error (error, "Parsing aboot stateroot");
|
||||
}
|
||||
else if (!_ostree_sysroot_parse_bootlink (ostree_target, NULL, &stateroot, NULL, NULL, error))
|
||||
return glnx_prefix_error (error, "Parsing stateroot");
|
||||
glnx_autofd int normal_dir_dfd = -1;
|
||||
if (!glnx_opendirat (AT_FDCWD, normal_dir, TRUE, &normal_dir_dfd, error))
|
||||
return FALSE;
|
||||
|
||||
/* Generate a unit to unmount var_src */
|
||||
if (!write_unit_file (normal_dir_dfd, "ostree-unmount-temp-var.service", cancellable, error,
|
||||
"##\n# Automatically generated by ostree-system-generator\n##\n\n"
|
||||
"[Unit]\n"
|
||||
"Documentation=man:ostree(1)\n"
|
||||
"ConditionPathIsMountPoint=%s\n"
|
||||
"After=var.mount\n"
|
||||
"\n"
|
||||
"[Service]\n"
|
||||
"Type=oneshot\n"
|
||||
"ExecStart=/usr/bin/umount --lazy %s\n",
|
||||
var_src, var_src))
|
||||
return FALSE;
|
||||
|
||||
if (!glnx_shutil_mkdir_p_at (normal_dir_dfd, "local-fs.target.wants", 0755, cancellable,
|
||||
error))
|
||||
return FALSE;
|
||||
if (symlinkat ("../ostree-unmount-temp-var.service", normal_dir_dfd,
|
||||
"local-fs.target.wants/ostree-unmount-temp-var.service") < 0)
|
||||
return glnx_throw_errno_prefix (error, "symlinkat");
|
||||
|
||||
/* Load /etc/fstab if it exists, and look for a /var mount */
|
||||
g_autoptr (OtLibMountFile) fstab = setmntent (fstab_path, "re");
|
||||
@ -199,7 +215,7 @@ fstab_generator (const char *ostree_target, const bool is_aboot, const char *nor
|
||||
path_kill_slashes (where);
|
||||
|
||||
/* We're only looking for /var here */
|
||||
if (strcmp (where, var_path) != 0)
|
||||
if (strcmp (where, var_dst) != 0)
|
||||
continue;
|
||||
|
||||
found_var_mnt = TRUE;
|
||||
@ -211,59 +227,19 @@ fstab_generator (const char *ostree_target, const bool is_aboot, const char *nor
|
||||
if (found_var_mnt)
|
||||
return TRUE;
|
||||
|
||||
/* Prepare to write to the output unit dir; we use the "normal" dir
|
||||
* that overrides /usr, but not /etc.
|
||||
*/
|
||||
glnx_autofd int normal_dir_dfd = -1;
|
||||
if (!glnx_opendirat (AT_FDCWD, normal_dir, TRUE, &normal_dir_dfd, error))
|
||||
return FALSE;
|
||||
|
||||
/* Generate our bind mount unit */
|
||||
const char *stateroot_var_path = glnx_strjoina ("/sysroot/ostree/deploy/", stateroot, "/var");
|
||||
|
||||
g_auto (GLnxTmpfile) tmpf = {
|
||||
0,
|
||||
};
|
||||
if (!glnx_open_tmpfile_linkable_at (normal_dir_dfd, ".", O_WRONLY | O_CLOEXEC, &tmpf, error))
|
||||
return FALSE;
|
||||
g_autoptr (GOutputStream) outstream = g_unix_output_stream_new (tmpf.fd, FALSE);
|
||||
gsize bytes_written;
|
||||
/* This code is inspired by systemd's fstab-generator.c.
|
||||
*
|
||||
* Note that our unit doesn't run if systemd.volatile is enabled;
|
||||
* see https://github.com/ostreedev/ostree/pull/856
|
||||
*
|
||||
* To avoid having submounts of /var propagate into $stateroot/var, the mount
|
||||
* is made with slave+shared propagation. This means that /var will receive
|
||||
* mount events from the parent /sysroot mount, but not vice versa. Adding a
|
||||
* shared peer group below the slave group means that submounts of /var will
|
||||
* inherit normal shared propagation. See mount_namespaces(7), Linux
|
||||
* Documentation/filesystems/sharedsubtree.txt and
|
||||
* https://github.com/ostreedev/ostree/issues/2086. This also happens in
|
||||
* ostree-prepare-root.c for the INITRAMFS_MOUNT_VAR case.
|
||||
*/
|
||||
if (!g_output_stream_printf (outstream, &bytes_written, cancellable, error,
|
||||
"##\n# Automatically generated by ostree-system-generator\n##\n\n"
|
||||
"[Unit]\n"
|
||||
"Documentation=man:ostree(1)\n"
|
||||
"ConditionKernelCommandLine=!systemd.volatile\n"
|
||||
"Before=local-fs.target\n"
|
||||
"\n"
|
||||
"[Mount]\n"
|
||||
"Where=%s\n"
|
||||
"What=%s\n"
|
||||
"Options=bind,slave,shared\n",
|
||||
var_path, stateroot_var_path))
|
||||
return FALSE;
|
||||
if (!g_output_stream_flush (outstream, cancellable, error))
|
||||
return FALSE;
|
||||
g_clear_object (&outstream);
|
||||
/* It should be readable */
|
||||
if (!glnx_fchmod (tmpf.fd, 0644, error))
|
||||
return FALSE;
|
||||
/* Error out if somehow it already exists, that'll help us debug conflicts */
|
||||
if (!glnx_link_tmpfile_at (&tmpf, GLNX_LINK_TMPFILE_NOREPLACE, normal_dir_dfd, "var.mount",
|
||||
error))
|
||||
if (!write_unit_file (normal_dir_dfd, "var.mount", cancellable, error,
|
||||
"##\n# Automatically generated by ostree-system-generator\n##\n\n"
|
||||
"[Unit]\n"
|
||||
"Documentation=man:ostree(1)\n"
|
||||
"ConditionKernelCommandLine=!systemd.volatile\n"
|
||||
"Before=local-fs.target\n"
|
||||
"\n"
|
||||
"[Mount]\n"
|
||||
"Where=%s\n"
|
||||
"What=%s\n"
|
||||
"Options=bind\n",
|
||||
var_dst, var_src))
|
||||
return FALSE;
|
||||
|
||||
/* And ensure it's required; newer systemd will auto-inject fs dependencies
|
||||
|
@ -1658,7 +1658,7 @@ ostree_repo_prepare_transaction (OstreeRepo *self, gboolean *out_transaction_res
|
||||
self->reserved_blocks = reserved_bytes / self->txn.blocksize;
|
||||
|
||||
/* Use the appropriate free block count if we're unprivileged */
|
||||
guint64 bfree = (getuid () != 0 ? stvfsbuf.f_bavail : stvfsbuf.f_bfree);
|
||||
guint64 bfree = (ot_util_process_privileged () ? stvfsbuf.f_bfree : stvfsbuf.f_bavail);
|
||||
if (bfree > self->reserved_blocks)
|
||||
self->txn.max_blocks = bfree - self->reserved_blocks;
|
||||
else
|
||||
|
@ -69,7 +69,6 @@ struct OstreeSysroot
|
||||
GLnxLockFile lock;
|
||||
|
||||
OstreeSysrootLoadState loadstate;
|
||||
gboolean mount_namespace_in_use; /* TRUE if caller has told us they used CLONE_NEWNS */
|
||||
gboolean root_is_ostree_booted; /* TRUE if sysroot is / and we are booted via ostree */
|
||||
/* The device/inode for / and /etc, used to detect booted deployment */
|
||||
dev_t root_device;
|
||||
|
@ -227,6 +227,25 @@ ostree_sysroot_new_default (void)
|
||||
return ostree_sysroot_new (NULL);
|
||||
}
|
||||
|
||||
static gboolean
|
||||
_ostree_in_root_mount_namespace (gboolean *out_val, GError **error)
|
||||
{
|
||||
/* glnx_readlinkat_malloc does not use cancellable acually. */
|
||||
g_autofree char *mntns_pid1
|
||||
= glnx_readlinkat_malloc (AT_FDCWD, "/proc/1/ns/mnt", NULL, error);
|
||||
if (!mntns_pid1)
|
||||
return glnx_prefix_error (error, "Reading /proc/1/ns/mnt");
|
||||
/* mount namespace is per-thread, not per-process */
|
||||
g_autofree char *cur_thread = g_strdup_printf ("/proc/%d/ns/mnt", gettid ());
|
||||
g_autofree char *mntns_cur
|
||||
= glnx_readlinkat_malloc (AT_FDCWD, cur_thread, NULL, error);
|
||||
if (!mntns_cur)
|
||||
return glnx_prefix_error (error, "Reading %s", cur_thread);
|
||||
|
||||
*out_val = g_str_equal (mntns_pid1, mntns_cur);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/**
|
||||
* ostree_sysroot_set_mount_namespace_in_use:
|
||||
*
|
||||
@ -243,6 +262,8 @@ ostree_sysroot_new_default (void)
|
||||
* If you invoke this function, it must be before ostree_sysroot_load(); it may
|
||||
* be invoked before or after ostree_sysroot_initialize().
|
||||
*
|
||||
* This is function is now a stub.
|
||||
*
|
||||
* Since: 2020.1
|
||||
*/
|
||||
void
|
||||
@ -251,7 +272,108 @@ ostree_sysroot_set_mount_namespace_in_use (OstreeSysroot *self)
|
||||
/* Must be before we're loaded, as otherwise we'd have to close/reopen all our
|
||||
fds, e.g. the repo */
|
||||
g_return_if_fail (self->loadstate < OSTREE_SYSROOT_LOAD_STATE_LOADED);
|
||||
self->mount_namespace_in_use = TRUE;
|
||||
gboolean in_root;
|
||||
g_autoptr (GError) local_error = NULL;
|
||||
g_assert (_ostree_in_root_mount_namespace (&in_root, &local_error) && !in_root);
|
||||
}
|
||||
|
||||
static gboolean
|
||||
ensure_sysroot_fd (OstreeSysroot *self, GError **error);
|
||||
|
||||
gboolean
|
||||
_ostree_sysroot_ensure_boot_fd (OstreeSysroot *self, GError **error);
|
||||
|
||||
static gboolean
|
||||
_ostree_sysroot_invisible (const OstreeSysroot *self, gboolean *out_val, GError **error);
|
||||
|
||||
static gboolean
|
||||
_ostree_sysroot_enter_mount_namespace (OstreeSysroot *self, GError **error)
|
||||
{
|
||||
/* We also assume operating on non-booted roots won't have a readonly sysroot */
|
||||
if (!self->root_is_ostree_booted)
|
||||
return TRUE;
|
||||
|
||||
gboolean in_root;
|
||||
if (!_ostree_in_root_mount_namespace (&in_root, error))
|
||||
return FALSE;
|
||||
|
||||
/* Backup tree fd of sysroot_fd and boot_fd */
|
||||
glnx_autofd int sysroot_tree_fd = -1;
|
||||
if ((sysroot_tree_fd = (int)syscall (SYS_open_tree, self->sysroot_fd, "", 1 /* OPEN_TREE_CLONE */ | O_CLOEXEC | AT_EMPTY_PATH)) < 0)
|
||||
{
|
||||
if (errno == EINVAL)
|
||||
{
|
||||
/* This means sysroot_fd is already a fd obtained by open_tree */
|
||||
sysroot_tree_fd = g_steal_fd (&self->sysroot_fd);
|
||||
}
|
||||
else
|
||||
return glnx_throw_errno_prefix (error, "open_tree");
|
||||
}
|
||||
|
||||
glnx_autofd int boot_tree_fd = -1;
|
||||
if (self->boot_fd >= 0)
|
||||
{
|
||||
if ((boot_tree_fd = (int)syscall (SYS_open_tree, self->boot_fd, "", 1 /* OPEN_TREE_CLONE */ | O_CLOEXEC | AT_EMPTY_PATH)) < 0)
|
||||
{
|
||||
if (errno == EINVAL)
|
||||
{
|
||||
/* This means boot_fd is already a fd obtained by open_tree */
|
||||
boot_tree_fd = g_steal_fd (&self->boot_fd);
|
||||
}
|
||||
else
|
||||
return glnx_throw_errno_prefix (error, "open_tree");
|
||||
}
|
||||
}
|
||||
|
||||
// If the mount namespaces are the same, we need to unshare().
|
||||
if (in_root)
|
||||
{
|
||||
if (unshare (CLONE_NEWNS) < 0)
|
||||
return glnx_throw_errno_prefix (error, "Failed to invoke unshare(CLONE_NEWNS)");
|
||||
|
||||
/* Ensure what we do in our mount namespace do not leak to outside */
|
||||
if (mount (NULL, "/", NULL, MS_PRIVATE | MS_REC | MS_SILENT, NULL) < 0)
|
||||
return glnx_throw_errno_prefix (error, "Failed to set the mount propagation to private");
|
||||
}
|
||||
|
||||
/* Mount sysroot and boot back */
|
||||
ostree_sysroot_unload (self);
|
||||
if (!ensure_sysroot_fd (self, error))
|
||||
return FALSE;
|
||||
|
||||
gboolean invisible;
|
||||
if (!_ostree_sysroot_invisible (self, &invisible, error))
|
||||
return FALSE;
|
||||
|
||||
if (invisible)
|
||||
{
|
||||
glnx_autofd int old_sysroot_fd = g_steal_fd (&self->sysroot_fd);
|
||||
|
||||
if (syscall (SYS_move_mount, sysroot_tree_fd, "", old_sysroot_fd, "sysroot", 4 /* MOVE_MOUNT_F_EMPTY_PATH */) < 0)
|
||||
return glnx_throw_errno_prefix (error, "move_mount");
|
||||
|
||||
if (!glnx_opendirat (old_sysroot_fd, "sysroot", TRUE, &self->sysroot_fd, error))
|
||||
return FALSE;
|
||||
|
||||
if (boot_tree_fd >= 0)
|
||||
{
|
||||
if (syscall (SYS_move_mount, boot_tree_fd, "", old_sysroot_fd, "boot", 4 /* MOVE_MOUNT_F_EMPTY_PATH */) < 0)
|
||||
return glnx_throw_errno_prefix (error, "move_mount");
|
||||
|
||||
if (!glnx_opendirat (old_sysroot_fd, "boot", TRUE, &self->boot_fd, error))
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (boot_tree_fd >= 0)
|
||||
{
|
||||
if (!_ostree_sysroot_ensure_boot_fd (self, error))
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -284,32 +406,7 @@ ostree_sysroot_initialize_with_mount_namespace (OstreeSysroot *self, GCancellabl
|
||||
if (!ostree_sysroot_initialize (self, error))
|
||||
return FALSE;
|
||||
|
||||
/* Do nothing if we're not privileged */
|
||||
if (getuid () != 0)
|
||||
return TRUE;
|
||||
|
||||
/* We also assume operating on non-booted roots won't have a readonly sysroot */
|
||||
if (!self->root_is_ostree_booted)
|
||||
return TRUE;
|
||||
|
||||
g_autofree char *mntns_pid1
|
||||
= glnx_readlinkat_malloc (AT_FDCWD, "/proc/1/ns/mnt", cancellable, error);
|
||||
if (!mntns_pid1)
|
||||
return glnx_prefix_error (error, "Reading /proc/1/ns/mnt");
|
||||
g_autofree char *mntns_self
|
||||
= glnx_readlinkat_malloc (AT_FDCWD, "/proc/self/ns/mnt", cancellable, error);
|
||||
if (!mntns_self)
|
||||
return glnx_prefix_error (error, "Reading /proc/self/ns/mnt");
|
||||
|
||||
// If the mount namespaces are the same, we need to unshare().
|
||||
if (strcmp (mntns_pid1, mntns_self) == 0)
|
||||
{
|
||||
if (unshare (CLONE_NEWNS) < 0)
|
||||
return glnx_throw_errno_prefix (error, "Failed to invoke unshare(CLONE_NEWNS)");
|
||||
}
|
||||
|
||||
ostree_sysroot_set_mount_namespace_in_use (self);
|
||||
return TRUE;
|
||||
return _ostree_sysroot_enter_mount_namespace (self, error);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -374,6 +471,83 @@ remount_writable (const char *path, gboolean *did_remount, GError **error)
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
_ostree_sysroot_invisible (const OstreeSysroot *self, gboolean *out_val, GError **error)
|
||||
{
|
||||
g_assert (self->sysroot_fd >= 0);
|
||||
g_assert (self->root_is_ostree_booted);
|
||||
|
||||
if (!glnx_fstatat_allow_noent (self->sysroot_fd, "ostree/repo", NULL, 0, error))
|
||||
return FALSE;
|
||||
|
||||
if (errno == 0)
|
||||
{
|
||||
*out_val = FALSE;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
// root_is_ostree_booted is true so we can use AT_FDCWD here
|
||||
if (!glnx_fstatat_allow_noent (AT_FDCWD, OTCORE_RUN_OSTREE_PRIVATE "/sysroot-ns", NULL, 0, error))
|
||||
return FALSE;
|
||||
|
||||
if (errno != 0)
|
||||
{
|
||||
*out_val = FALSE;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
*out_val = TRUE;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* Make /sysroot visible */
|
||||
static gboolean
|
||||
_ostree_sysroot_ensure_visible (OstreeSysroot *self, GError **error)
|
||||
{
|
||||
gboolean invisible;
|
||||
if (!_ostree_sysroot_invisible (self, &invisible, error))
|
||||
return FALSE;
|
||||
|
||||
if (!invisible)
|
||||
return TRUE;
|
||||
|
||||
/* Boot may reside on the original sysroot.
|
||||
* To prevent from losing it, try ensuring it now.
|
||||
*/
|
||||
if (!_ostree_sysroot_ensure_boot_fd (self, error))
|
||||
{
|
||||
// ignore failure
|
||||
}
|
||||
|
||||
glnx_autofd int sysroot_ns_fd = -1;
|
||||
if (!glnx_openat_rdonly (AT_FDCWD, OTCORE_RUN_OSTREE_PRIVATE "/sysroot-ns", TRUE, &sysroot_ns_fd, error))
|
||||
return FALSE;
|
||||
|
||||
g_autofree char *cur_ns = g_strdup_printf ("/proc/%d/ns/mnt", gettid ());
|
||||
glnx_autofd int cur_ns_fd = -1;
|
||||
if (!glnx_openat_rdonly (AT_FDCWD, cur_ns, TRUE, &cur_ns_fd, error))
|
||||
return FALSE;
|
||||
|
||||
/* Because namespace is per-thread, there is no race here */
|
||||
if (unshare (CLONE_NEWNS) < 0)
|
||||
return glnx_throw_errno_prefix (error, "unshare");
|
||||
|
||||
if (setns (sysroot_ns_fd, CLONE_NEWNS) < 0)
|
||||
return glnx_throw_errno_prefix (error, "setns");
|
||||
|
||||
glnx_autofd int tree_fd = (int)syscall (SYS_open_tree, AT_FDCWD, "/", 1 /* OPEN_TREE_CLONE */ | O_CLOEXEC);
|
||||
if (tree_fd < 0)
|
||||
return glnx_throw_errno_prefix (error, "open_tree");
|
||||
|
||||
if (setns (cur_ns_fd, CLONE_NEWNS) < 0)
|
||||
return glnx_throw_errno_prefix (error, "setns");
|
||||
|
||||
glnx_close_fd (&self->sysroot_fd);
|
||||
self->sysroot_fd = g_steal_fd (&tree_fd);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* Remount /sysroot read-write if necessary */
|
||||
gboolean
|
||||
_ostree_sysroot_ensure_writable (OstreeSysroot *self, GError **error)
|
||||
@ -381,10 +555,6 @@ _ostree_sysroot_ensure_writable (OstreeSysroot *self, GError **error)
|
||||
if (!ostree_sysroot_initialize (self, error))
|
||||
return FALSE;
|
||||
|
||||
/* Do nothing if no mount namespace is in use */
|
||||
if (!self->mount_namespace_in_use)
|
||||
return TRUE;
|
||||
|
||||
/* If we aren't operating on a booted system, then we don't
|
||||
* do anything with mounts.
|
||||
*/
|
||||
@ -395,20 +565,42 @@ _ostree_sysroot_ensure_writable (OstreeSysroot *self, GError **error)
|
||||
if (!_ostree_sysroot_ensure_boot_fd (self, error))
|
||||
return FALSE;
|
||||
|
||||
gboolean did_remount_sysroot = FALSE;
|
||||
if (!remount_writable ("/sysroot", &did_remount_sysroot, error))
|
||||
return FALSE;
|
||||
gboolean did_remount_boot = FALSE;
|
||||
if (!remount_writable ("/boot", &did_remount_boot, error))
|
||||
gboolean in_root;
|
||||
if (!_ostree_in_root_mount_namespace (&in_root, error))
|
||||
return FALSE;
|
||||
|
||||
if (!_ostree_sysroot_enter_mount_namespace (self, error))
|
||||
return FALSE;
|
||||
|
||||
/* Now close and reopen our file descriptors */
|
||||
ostree_sysroot_unload (self);
|
||||
|
||||
const char *path = gs_file_get_path_cached (self->path);
|
||||
g_autofree char *sysroot_path = g_strdup_printf ("%s/sysroot", path);
|
||||
gboolean did_remount_sysroot = FALSE;
|
||||
if (!remount_writable (sysroot_path, &did_remount_sysroot, error))
|
||||
return FALSE;
|
||||
g_autofree char *boot_path = g_strdup_printf ("%s/boot", path);
|
||||
gboolean did_remount_boot = FALSE;
|
||||
if (!remount_writable (boot_path, &did_remount_boot, error))
|
||||
return FALSE;
|
||||
|
||||
if (!ensure_sysroot_fd (self, error))
|
||||
return FALSE;
|
||||
if (!_ostree_sysroot_ensure_boot_fd (self, error))
|
||||
return FALSE;
|
||||
|
||||
/* Switch back */
|
||||
if (in_root)
|
||||
{
|
||||
glnx_autofd int root_ns_fd = -1;
|
||||
|
||||
if (!glnx_openat_rdonly (AT_FDCWD, "/proc/1/ns/mnt", TRUE, &root_ns_fd, error))
|
||||
return FALSE;
|
||||
|
||||
if (setns (root_ns_fd, CLONE_NEWNS) < 0)
|
||||
return glnx_throw_errno_prefix (error, "setns");
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
@ -1061,6 +1253,13 @@ ostree_sysroot_initialize (OstreeSysroot *self, GError **error)
|
||||
|
||||
self->root_is_ostree_booted = (ostree_booted && root_is_sysroot);
|
||||
g_debug ("root_is_ostree_booted: %d", self->root_is_ostree_booted);
|
||||
|
||||
if (self->root_is_ostree_booted)
|
||||
{
|
||||
if (!_ostree_sysroot_ensure_visible (self, error))
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
self->loadstate = OSTREE_SYSROOT_LOAD_STATE_INIT;
|
||||
}
|
||||
|
||||
|
@ -101,6 +101,8 @@ ComposefsConfig *otcore_load_composefs_config (const char *cmdline, GKeyFile *co
|
||||
#define OTCORE_RUN_BOOTED_KEY_COMPOSEFS_SIGNATURE "composefs.signed"
|
||||
// This key will be present if the root is transient
|
||||
#define OTCORE_RUN_BOOTED_KEY_ROOT_TRANSIENT "root.transient"
|
||||
// This key will be present if the sysroot is made invisible
|
||||
#define OTCORE_RUN_BOOTED_KEY_SYSROOT_INVISIBLE "sysroot-invisible"
|
||||
// This key will be present if the sysroot-ro flag was found
|
||||
#define OTCORE_RUN_BOOTED_KEY_SYSROOT_RO "sysroot-ro"
|
||||
// Always holds the (device, inode) pair of the booted deployment
|
||||
|
@ -102,3 +102,10 @@ ot_util_path_split_validate (const char *path, GPtrArray **out_components, GErro
|
||||
ot_transfer_out_value (out_components, &ret_components);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* Check if current process is privileged */
|
||||
gboolean
|
||||
ot_util_process_privileged (void)
|
||||
{
|
||||
return geteuid() == 0;
|
||||
}
|
||||
|
@ -39,4 +39,6 @@ gboolean ot_util_filename_validate (const char *name, GError **error);
|
||||
|
||||
gboolean ot_util_path_split_validate (const char *path, GPtrArray **out_components, GError **error);
|
||||
|
||||
gboolean ot_util_process_privileged (void);
|
||||
|
||||
G_END_DECLS
|
||||
|
@ -64,6 +64,8 @@ ot_admin_builtin_finalize_staged (int argc, char **argv, OstreeCommandInvocation
|
||||
|
||||
if (opt_hold)
|
||||
{
|
||||
/* XXX: does this work with invisible sysroot? */
|
||||
|
||||
/* Load the sysroot unlocked so that a separate namespace isn't
|
||||
* created. */
|
||||
if (!ostree_admin_sysroot_load (
|
||||
|
129
src/ostree/ot-admin-builtin-nsenter.c
Normal file
129
src/ostree/ot-admin-builtin-nsenter.c
Normal file
@ -0,0 +1,129 @@
|
||||
/*
|
||||
* Copyright (C) 2024 Colin Walters <walters@verbum.org>
|
||||
*
|
||||
* SPDX-License-Identifier: LGPL-2.0+
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library. If not, see <https://www.gnu.org/licenses/>.
|
||||
*
|
||||
* Author: Misaki Kasumi <misakikasumi@outlook.com>
|
||||
*/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "libglnx.h"
|
||||
#include "ostree.h"
|
||||
#include "ot-admin-builtins.h"
|
||||
#include "ot-admin-functions.h"
|
||||
|
||||
#include <spawn.h>
|
||||
#include <sys/wait.h>
|
||||
|
||||
static gboolean opt_lock;
|
||||
static gboolean opt_exec;
|
||||
|
||||
static GOptionEntry options[] = {
|
||||
{ "lock", 0, 0, G_OPTION_ARG_NONE, &opt_lock,
|
||||
"Make /sysroot writable in the mount namespace and acquire an exclusive multi-process write lock", NULL },
|
||||
{ "exec", 0, 0, G_OPTION_ARG_NONE, &opt_exec,
|
||||
"Replace the process instead of spawning the program as child", NULL},
|
||||
{ NULL } };
|
||||
|
||||
gboolean
|
||||
ot_admin_builtin_nsenter (int argc, char **argv, OstreeCommandInvocation *invocation,
|
||||
GCancellable *cancellable, GError **error)
|
||||
{
|
||||
g_autoptr (GOptionContext) context = NULL;
|
||||
g_autoptr (OstreeSysroot) sysroot = NULL;
|
||||
g_autofree char **arguments = NULL;
|
||||
|
||||
context = g_option_context_new ("[PROGRAM [ARGUMENTS...]]");
|
||||
|
||||
int new_argc = 0;
|
||||
char **new_argv = NULL;
|
||||
|
||||
for (int i = 1; i < argc; i++)
|
||||
{
|
||||
if (g_str_equal (argv[i], "--"))
|
||||
{
|
||||
new_argc = argc - i;
|
||||
argc = i;
|
||||
new_argv = argv + i;
|
||||
argv[i] = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ostree_admin_option_context_parse (context, options, &argc, &argv,
|
||||
OSTREE_ADMIN_BUILTIN_FLAG_UNLOCKED | OSTREE_ADMIN_BUILTIN_FLAG_ENTER_NS,
|
||||
invocation, &sysroot, cancellable, error))
|
||||
return FALSE;
|
||||
|
||||
if (new_argv)
|
||||
{
|
||||
argc = new_argc;
|
||||
argv = new_argv;
|
||||
}
|
||||
if (argc <= 1)
|
||||
{
|
||||
arguments = g_malloc_n (2, sizeof (char *));
|
||||
if ((arguments[0] = getenv ("SHELL")) == NULL)
|
||||
arguments[0] = "/bin/sh";
|
||||
arguments[1] = NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
arguments = g_malloc_n (argc, sizeof (char *));
|
||||
memcpy (arguments, argv + 1, (argc - 1) * sizeof (char *));
|
||||
arguments[argc - 1] = NULL;
|
||||
}
|
||||
|
||||
if (opt_lock)
|
||||
{
|
||||
if (opt_exec)
|
||||
return glnx_throw (error, "cannot specify both --lock and --exec");
|
||||
if (!ostree_sysroot_lock (sysroot, error))
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
pid_t child_pid;
|
||||
if (opt_exec)
|
||||
{
|
||||
if (execvp (arguments[0], arguments) < 0)
|
||||
return glnx_throw_errno_prefix (error, "execvp");
|
||||
}
|
||||
else
|
||||
{
|
||||
if (posix_spawnp (&child_pid, arguments[0], NULL, NULL, arguments, environ) != 0)
|
||||
return glnx_throw_errno_prefix (error, "posix_spawnp");
|
||||
}
|
||||
|
||||
int status;
|
||||
while (waitpid (child_pid, &status, 0) < 0)
|
||||
{
|
||||
if (errno != EINTR)
|
||||
return glnx_throw_errno_prefix (error, "waitpid");
|
||||
}
|
||||
|
||||
if (opt_lock)
|
||||
ostree_sysroot_unlock (sysroot);
|
||||
|
||||
if (!WIFEXITED (status))
|
||||
return glnx_throw (error, "child process killed by signal");
|
||||
|
||||
int exit_status = WEXITSTATUS (status);
|
||||
if (exit_status != EXIT_SUCCESS)
|
||||
exit (exit_status);
|
||||
|
||||
return TRUE;
|
||||
}
|
@ -51,6 +51,7 @@ BUILTINPROTO (kargs);
|
||||
BUILTINPROTO (post_copy);
|
||||
BUILTINPROTO (lock_finalization);
|
||||
BUILTINPROTO (state_overlay);
|
||||
BUILTINPROTO (nsenter);
|
||||
|
||||
#undef BUILTINPROTO
|
||||
|
||||
|
@ -70,6 +70,8 @@ static OstreeCommand admin_subcommands[] = {
|
||||
{ "upgrade", OSTREE_BUILTIN_FLAG_NO_REPO, ot_admin_builtin_upgrade,
|
||||
"Construct new tree from current origin and deploy it, if it changed" },
|
||||
{ "kargs", OSTREE_BUILTIN_FLAG_NO_REPO, ot_admin_builtin_kargs, "Change kernel arguments" },
|
||||
{"nsenter", OSTREE_BUILTIN_FLAG_NO_REPO | OSTREE_BUILTIN_FLAG_HIDDEN, ot_admin_builtin_nsenter,
|
||||
"Run program in the mount namespace where /sysroot is present"},
|
||||
{ NULL, 0, NULL, NULL }
|
||||
};
|
||||
|
||||
@ -121,11 +123,6 @@ ostree_builtin_admin (int argc, char **argv, OstreeCommandInvocation *invocation
|
||||
}
|
||||
}
|
||||
|
||||
else if (g_str_equal (argv[in], "--"))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
argv[out] = argv[in];
|
||||
}
|
||||
|
||||
|
@ -116,7 +116,7 @@ maybe_setup_mount_namespace (gboolean *out_ns, GError **error)
|
||||
*out_ns = FALSE;
|
||||
|
||||
/* If we're not root, then we almost certainly can't be remounting anything */
|
||||
if (getuid () != 0)
|
||||
if (!ot_util_process_privileged ())
|
||||
return TRUE;
|
||||
|
||||
/* If the system isn't booted via libostree, also nothing to do */
|
||||
@ -559,12 +559,19 @@ gboolean
|
||||
ostree_admin_sysroot_load (OstreeSysroot *sysroot, OstreeAdminBuiltinFlags flags,
|
||||
GCancellable *cancellable, GError **error)
|
||||
{
|
||||
if ((flags & OSTREE_ADMIN_BUILTIN_FLAG_UNLOCKED) == 0)
|
||||
if (flags & OSTREE_ADMIN_BUILTIN_FLAG_ENTER_NS)
|
||||
{
|
||||
/* Set up the mount namespace, if applicable */
|
||||
if (!ostree_sysroot_initialize_with_mount_namespace (sysroot, cancellable, error))
|
||||
return FALSE;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!ostree_sysroot_initialize (sysroot, error))
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if ((flags & OSTREE_ADMIN_BUILTIN_FLAG_UNLOCKED) == 0)
|
||||
{
|
||||
/* Released when sysroot is finalized, or on process exit */
|
||||
if (!ot_admin_sysroot_lock (sysroot, error))
|
||||
return FALSE;
|
||||
@ -580,7 +587,7 @@ ostree_admin_sysroot_load (OstreeSysroot *sysroot, OstreeAdminBuiltinFlags flags
|
||||
/* Only require root if we're manipulating a booted sysroot. (Mostly
|
||||
* useful for the test suite)
|
||||
*/
|
||||
if (booted && getuid () != 0)
|
||||
if (booted && !ot_util_process_privileged ())
|
||||
{
|
||||
g_set_error (error, G_IO_ERROR, G_IO_ERROR_PERMISSION_DENIED,
|
||||
"You must be root to perform this command");
|
||||
|
@ -39,6 +39,7 @@ typedef enum
|
||||
OSTREE_ADMIN_BUILTIN_FLAG_UNLOCKED = (1 << 1),
|
||||
OSTREE_ADMIN_BUILTIN_FLAG_NO_SYSROOT = (1 << 2),
|
||||
OSTREE_ADMIN_BUILTIN_FLAG_NO_LOAD = (1 << 3),
|
||||
OSTREE_ADMIN_BUILTIN_FLAG_ENTER_NS = (1 << 4),
|
||||
} OstreeAdminBuiltinFlags;
|
||||
|
||||
typedef struct OstreeCommandInvocation OstreeCommandInvocation;
|
||||
|
@ -68,6 +68,7 @@
|
||||
#include <sys/stat.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <ostree-core.h>
|
||||
@ -78,6 +79,8 @@
|
||||
|
||||
#define SYSROOT_KEY "sysroot"
|
||||
#define READONLY_KEY "readonly"
|
||||
#define PROTECT_KEY "protect"
|
||||
#define INVISIBLE_VALUE "invisible"
|
||||
|
||||
/* This key configures the / mount in the deployment root */
|
||||
#define ROOT_KEY "root"
|
||||
@ -254,6 +257,33 @@ composefs_error_message (int errsv)
|
||||
|
||||
#endif
|
||||
|
||||
static int
|
||||
invisible_helper (void*)
|
||||
{
|
||||
if (mount (NULL, "/", NULL, MS_PRIVATE | MS_REC | MS_SILENT, NULL) < 0)
|
||||
return EXIT_FAILURE;
|
||||
|
||||
if (chdir ("sysroot") < 0)
|
||||
return EXIT_FAILURE;
|
||||
|
||||
if (mount (".", "/", NULL, MS_BIND | MS_SILENT, NULL) < 0)
|
||||
return EXIT_FAILURE;
|
||||
|
||||
if (chroot (".") < 0)
|
||||
return EXIT_FAILURE;
|
||||
|
||||
sigset_t sigset;
|
||||
sigemptyset (&sigset);
|
||||
sigaddset (&sigset, SIGUSR1);
|
||||
while (sigwaitinfo (&sigset, NULL) < 0)
|
||||
{
|
||||
if (errno != EINTR)
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
@ -280,8 +310,9 @@ main (int argc, char *argv[])
|
||||
if (!config)
|
||||
errx (EXIT_FAILURE, "Failed to parse config: %s", error->message);
|
||||
|
||||
gboolean sysroot_readonly = FALSE;
|
||||
gboolean root_transient = FALSE;
|
||||
gboolean sysroot_invisible = FALSE;
|
||||
gboolean sysroot_readonly = FALSE;
|
||||
|
||||
if (!ot_keyfile_get_boolean_with_default (config, ROOT_KEY, TRANSIENT_KEY, FALSE, &root_transient,
|
||||
&error))
|
||||
@ -296,10 +327,33 @@ main (int argc, char *argv[])
|
||||
|
||||
// If composefs is enabled, that also implies sysroot.readonly=true because it's
|
||||
// the new default we want to use (not because it's actually required)
|
||||
const bool sysroot_readonly_default = composefs_config->enabled == OT_TRISTATE_YES;
|
||||
if (!ot_keyfile_get_boolean_with_default (config, SYSROOT_KEY, READONLY_KEY,
|
||||
sysroot_readonly_default, &sysroot_readonly, &error))
|
||||
errx (EXIT_FAILURE, "Failed to parse sysroot.readonly value: %s", error->message);
|
||||
sysroot_readonly = composefs_config->enabled == OT_TRISTATE_YES;
|
||||
{
|
||||
const char *keys[] = {PROTECT_KEY, READONLY_KEY};
|
||||
g_autofree char *value = NULL;
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
if (!ot_keyfile_get_value_with_default (config, SYSROOT_KEY, keys[i], NULL, &value,
|
||||
&error))
|
||||
errx (EXIT_FAILURE, "%s", error->message);
|
||||
|
||||
if (value)
|
||||
{
|
||||
if (g_str_equal (value, INVISIBLE_VALUE))
|
||||
{
|
||||
sysroot_invisible = TRUE;
|
||||
// sysroot_invisible implies sysroot_readonly
|
||||
sysroot_readonly = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!_ostree_parse_boolean (value, &sysroot_readonly, &error))
|
||||
errx (EXIT_FAILURE, "%s", error->message);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* This is the final target where we should prepare the rootfs. The usual
|
||||
* case with systemd in the initramfs is that root_mountpoint = "/sysroot".
|
||||
@ -389,19 +443,16 @@ main (int argc, char *argv[])
|
||||
|
||||
g_autofree char *expected_digest = NULL;
|
||||
|
||||
// For now we just stick the transient root on the default /run tmpfs;
|
||||
// however, see
|
||||
// https://github.com/systemd/systemd/blob/604b2001081adcbd64ee1fbe7de7a6d77c5209fe/src/basic/mountpoint-util.h#L36
|
||||
// which bumps up these defaults for the rootfs a bit.
|
||||
g_autofree char *root_upperdir
|
||||
= root_transient ? g_build_filename (OTCORE_RUN_OSTREE_PRIVATE, "root/upper", NULL)
|
||||
: NULL;
|
||||
g_autofree char *root_workdir
|
||||
= root_transient ? g_build_filename (OTCORE_RUN_OSTREE_PRIVATE, "root/work", NULL) : NULL;
|
||||
|
||||
// Propagate these options for transient root, if provided
|
||||
if (root_transient)
|
||||
{
|
||||
// For now we just stick the transient root on the default /run tmpfs;
|
||||
// however, see
|
||||
// https://github.com/systemd/systemd/blob/604b2001081adcbd64ee1fbe7de7a6d77c5209fe/src/basic/mountpoint-util.h#L36
|
||||
// which bumps up these defaults for the rootfs a bit.
|
||||
const char *root_upperdir = OTCORE_RUN_OSTREE_PRIVATE "/root/upper";
|
||||
const char *root_workdir = OTCORE_RUN_OSTREE_PRIVATE "/root/work";
|
||||
|
||||
if (!glnx_shutil_mkdir_p_at (AT_FDCWD, root_upperdir, 0755, NULL, &error))
|
||||
errx (EXIT_FAILURE, "Failed to create %s: %s", root_upperdir, error->message);
|
||||
if (!glnx_shutil_mkdir_p_at (AT_FDCWD, root_workdir, 0700, NULL, &error))
|
||||
@ -506,7 +557,9 @@ main (int argc, char *argv[])
|
||||
g_variant_builder_add (&metadata_builder, "{sv}", OTCORE_RUN_BOOTED_KEY_ROOT_TRANSIENT,
|
||||
g_variant_new_boolean (root_transient));
|
||||
|
||||
/* Pass on the state for use by ostree-prepare-root */
|
||||
g_variant_builder_add (&metadata_builder, "{sv}", OTCORE_RUN_BOOTED_KEY_SYSROOT_INVISIBLE,
|
||||
g_variant_new_boolean (sysroot_invisible));
|
||||
|
||||
g_variant_builder_add (&metadata_builder, "{sv}", OTCORE_RUN_BOOTED_KEY_SYSROOT_RO,
|
||||
g_variant_new_boolean (sysroot_readonly));
|
||||
|
||||
@ -611,17 +664,15 @@ main (int argc, char *argv[])
|
||||
err (EXIT_FAILURE, "failed to bind mount (class:readonly) /usr");
|
||||
}
|
||||
|
||||
/* Prepare /var.
|
||||
* When a read-only sysroot is configured, this adds a dedicated bind-mount (to itself)
|
||||
* so that the stateroot location stays writable. */
|
||||
if (sysroot_readonly)
|
||||
{
|
||||
/* Bind-mount /var (at stateroot path), and remount as writable. */
|
||||
if (mount ("../../var", "../../var", NULL, MS_BIND | MS_SILENT, NULL) < 0)
|
||||
err (EXIT_FAILURE, "failed to prepare /var bind-mount at %s", srcpath);
|
||||
if (mount ("../../var", "../../var", NULL, MS_BIND | MS_REMOUNT | MS_SILENT, NULL) < 0)
|
||||
err (EXIT_FAILURE, "failed to make writable /var bind-mount at %s", srcpath);
|
||||
}
|
||||
const char *var_dir = OTCORE_RUN_OSTREE_PRIVATE "/var";
|
||||
|
||||
/* Bind-mount /var, and remount as writable. */
|
||||
if (mkdirat (AT_FDCWD, var_dir, 0) < 0)
|
||||
err (EXIT_FAILURE, "failed to mkdir %s", var_dir);
|
||||
if (mount ("../../var", var_dir, NULL, MS_BIND | MS_SILENT, NULL) < 0)
|
||||
err (EXIT_FAILURE, "failed to prepare /var bind-mount at %s", var_dir);
|
||||
if (mount (var_dir, var_dir, NULL, MS_BIND | MS_REMOUNT | MS_SILENT, NULL) < 0)
|
||||
err (EXIT_FAILURE, "failed to make writable /var bind-mount at %s", var_dir);
|
||||
|
||||
/* When running under systemd, /var will be handled by a 'var.mount' unit outside
|
||||
* of initramfs.
|
||||
@ -640,18 +691,11 @@ main (int argc, char *argv[])
|
||||
*/
|
||||
if (mount_var)
|
||||
{
|
||||
if (mount ("../../var", TMP_SYSROOT "/var", NULL, MS_BIND | MS_SILENT, NULL) < 0)
|
||||
err (EXIT_FAILURE, "failed to bind mount ../../var to var");
|
||||
if (mount (var_dir, TMP_SYSROOT "/var", NULL, MS_BIND | MS_SILENT, NULL) < 0)
|
||||
err (EXIT_FAILURE, "failed to bind mount %s to /var", var_dir);
|
||||
|
||||
/* To avoid having submounts of /var propagate into $stateroot/var, the
|
||||
* mount is made with slave+shared propagation. See the comment in
|
||||
* ostree-impl-system-generator.c when /var isn't mounted in the
|
||||
* initramfs for further explanation.
|
||||
*/
|
||||
if (mount (NULL, TMP_SYSROOT "/var", NULL, MS_SLAVE | MS_SILENT, NULL) < 0)
|
||||
err (EXIT_FAILURE, "failed to change /var to slave mount");
|
||||
if (mount (NULL, TMP_SYSROOT "/var", NULL, MS_SHARED | MS_SILENT, NULL) < 0)
|
||||
err (EXIT_FAILURE, "failed to change /var to slave+shared mount");
|
||||
if (umount2 (var_dir, MNT_DETACH) < 0)
|
||||
err (EXIT_FAILURE, "failed to umount %s", var_dir);
|
||||
}
|
||||
|
||||
/* This can be used by other things to signal ostree is in use */
|
||||
@ -684,12 +728,69 @@ main (int argc, char *argv[])
|
||||
if (rmdir (TMP_SYSROOT) < 0)
|
||||
err (EXIT_FAILURE, "couldn't remove temporary sysroot %s", TMP_SYSROOT);
|
||||
|
||||
/* Now that we've set up all the mount points, if configured we remount the physical
|
||||
* rootfs as read-only; what is visibly mutable to the OS by default is just /etc and /var.
|
||||
* But ostree knows how to mount /boot and /sysroot read-write to perform operations.
|
||||
*/
|
||||
if (sysroot_readonly)
|
||||
if (sysroot_invisible)
|
||||
{
|
||||
/* Keep a living sysroot in a private mount namespace,
|
||||
* and unmount sysroot in the root mount namespace to make it invisible.
|
||||
*/
|
||||
const char *sysroot_ns = OTCORE_RUN_OSTREE_PRIVATE "/sysroot-ns";
|
||||
glnx_autofd int ns_fd = open (sysroot_ns, O_RDONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0);
|
||||
if (ns_fd < 0)
|
||||
err (EXIT_FAILURE, "failed to create %s", sysroot_ns);
|
||||
|
||||
const gsize stack_size = 0x8000;
|
||||
g_autofree void *stack = g_malloc (stack_size);
|
||||
|
||||
/* Block signals.
|
||||
* This is necessary to deliver SIGUSR1 to finish the child process in a deterministic way.
|
||||
* If we do not block signals here, kill(SIGUSR1) may accidentally kill the child
|
||||
* before it has set up a signal mask.
|
||||
*/
|
||||
sigset_t oldset, newset;
|
||||
sigfillset (&newset);
|
||||
sigprocmask (SIG_SETMASK, &newset, &oldset);
|
||||
|
||||
/* We use clone() instead of fork() + setns() here,
|
||||
* so that the child process is created with a new mount namespace,
|
||||
* and in parent we can bind mount the new mount namespace immediately
|
||||
* without race condition.
|
||||
*/
|
||||
int pid = clone (invisible_helper, (char*)stack + stack_size, CLONE_VM | CLONE_NEWNS | SIGCHLD, NULL);
|
||||
|
||||
sigprocmask (SIG_SETMASK, &oldset, NULL);
|
||||
|
||||
if (pid < 0)
|
||||
err (EXIT_FAILURE, "failed to create child process");
|
||||
|
||||
/* Bind mount the private mount namespace */
|
||||
g_autofree char *ns = g_strdup_printf ("/proc/%d/ns/mnt", pid);
|
||||
if (mount (ns, sysroot_ns, NULL, MS_BIND | MS_SILENT, NULL) < 0)
|
||||
err (EXIT_FAILURE, "failed to bind-mount sysroot-ns");
|
||||
|
||||
/* Finish child process */
|
||||
kill (pid, SIGUSR1);
|
||||
|
||||
/* Wait child process to exit. */
|
||||
int status;
|
||||
while (waitpid (pid, &status, 0) < 0)
|
||||
{
|
||||
if (errno != EINTR)
|
||||
err (EXIT_FAILURE, "waitpid failed");
|
||||
}
|
||||
|
||||
if (!WIFEXITED (status) || WEXITSTATUS (status) != EXIT_SUCCESS)
|
||||
err (EXIT_FAILURE, "child exited abnormally");
|
||||
|
||||
/* Unmount /sysroot */
|
||||
if (umount2 ("sysroot", MNT_DETACH) < 0)
|
||||
err (EXIT_FAILURE, "failed to unmount /sysroot");
|
||||
}
|
||||
else if (sysroot_readonly)
|
||||
{
|
||||
/* Now that we've set up all the mount points, if configured we remount the physical
|
||||
* rootfs as read-only; what is visibly mutable to the OS by default is just /etc and /var.
|
||||
* But ostree knows how to mount /boot and /sysroot read-write to perform operations.
|
||||
*/
|
||||
if (mount ("sysroot", "sysroot", NULL, MS_BIND | MS_REMOUNT | MS_RDONLY | MS_SILENT, NULL)
|
||||
< 0)
|
||||
err (EXIT_FAILURE, "failed to make /sysroot read-only");
|
||||
|
@ -225,13 +225,19 @@ main (int argc, char *argv[])
|
||||
exit (EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
/* Handle remounting /sysroot; if it's explicitly marked as read-only (opt in)
|
||||
/* Handle remounting /sysroot;
|
||||
* If it's made invisible, do nothing.
|
||||
* if it's explicitly marked as read-only (opt in)
|
||||
* then ensure it's readonly, otherwise mount writable, the same as /
|
||||
*/
|
||||
gboolean sysroot_configured_invisible = FALSE;
|
||||
g_variant_dict_lookup (ostree_run_metadata, OTCORE_RUN_BOOTED_KEY_SYSROOT_INVISIBLE, "b",
|
||||
&sysroot_configured_invisible);
|
||||
gboolean sysroot_configured_readonly = FALSE;
|
||||
g_variant_dict_lookup (ostree_run_metadata, OTCORE_RUN_BOOTED_KEY_SYSROOT_RO, "b",
|
||||
&sysroot_configured_readonly);
|
||||
do_remount ("/sysroot", !sysroot_configured_readonly);
|
||||
if (!sysroot_configured_invisible)
|
||||
do_remount ("/sysroot", !sysroot_configured_readonly);
|
||||
|
||||
/* And also make sure to make /etc rw again. We make this conditional on
|
||||
* sysroot_configured_readonly && !transient_etc because only in that case is it a
|
||||
|
Loading…
Reference in New Issue
Block a user