1
0
mirror of https://github.com/systemd/systemd.git synced 2024-12-22 17:35:35 +03:00

mount-util: use mount beneath to replace previous namespace mount

Instead of mounting over, do an atomic swap using mount beneath, if
available. This way assets can be mounted again and again (e.g.:
updates) without leaking mounts.
This commit is contained in:
Luca Boccassi 2023-09-29 01:50:15 +01:00 committed by Luca Boccassi
parent f5e6f3117c
commit 7c83d42ef8
9 changed files with 109 additions and 41 deletions

View File

@ -1310,11 +1310,16 @@ node /org/freedesktop/systemd1 {
<function>TryRestartUnit()</function> or <function>ReloadOrTryRestartUnit()</function> for the marked
units.</para>
<para><function>BindMountUnit()</function> can be used to bind mount new files or directories into
a running service mount namespace.</para>
<para><function>BindMountUnit()</function> can be used to bind mount new files or directories into a
running service mount namespace. If supported by the kernel, any prior mount on the selected target
will be replaced by the new mount. If not supported, any prior mount will be over-mounted, but remain
pinned and inaccessible.
</para>
<para><function>MountImageUnit()</function> can be used to mount new images into a running service
mount namespace.</para>
mount namespace. If supported by the kernel, any prior mount on the selected target will be replaced
by the new mount. If not supported, any prior mount will be over-mounted, but remain pinned and
inaccessible.</para>
<para><function>KillUnit()</function> may be used to kill (i.e. send a signal to) all processes of a
unit. It takes the unit <varname>name</varname>, an enum <varname>who</varname> and a UNIX

View File

@ -663,6 +663,10 @@ Jan 12 10:46:45 example.com bluetoothd[8900]: gatt-time-server: Input/output err
<option>ExecReload=</option>, <option>ExecStartPre=</option>, etc.) run in distinct namespaces.
</para>
<para>If supported by the kernel, any prior mount on the selected target will be replaced by the
new mount. If not supported, any prior mount will be over-mounted, but remain pinned and
inaccessible.</para>
<xi:include href="version-info.xml" xpointer="v248"/></listitem>
</varlistentry>
@ -693,6 +697,10 @@ Jan 12 10:46:45 example.com bluetoothd[8900]: gatt-time-server: Input/output err
<option>ExecReload=</option>, <option>ExecStartPre=</option>, etc.) run in distinct namespaces.
</para>
<para>If supported by the kernel, any prior mount on the selected target will be replaced by the
new mount. If not supported, any prior mount will be over-mounted, but remain pinned and
inaccessible.</para>
<para>Example:
<programlisting>systemctl mount-image foo.service /tmp/img.raw /var/lib/image root:ro,nosuid</programlisting>
<programlisting>systemctl mount-image --mkdir bar.service /tmp/img.raw /var/lib/baz/img</programlisting>

View File

@ -539,6 +539,10 @@ static inline int missing_open_tree(
/* ======================================================================= */
#ifndef MOVE_MOUNT_BENEATH
#define MOVE_MOUNT_BENEATH 0x00000200
#endif
#if !HAVE_MOVE_MOUNT
#ifndef MOVE_MOUNT_F_EMPTY_PATH

View File

@ -2007,8 +2007,12 @@ static int mount_partition(
if (m->fsmount_fd >= 0) {
/* Case #1: Attach existing fsmount fd to the file system */
if (move_mount(m->fsmount_fd, "", -EBADF, p, MOVE_MOUNT_F_EMPTY_PATH) < 0)
return -errno;
r = mount_exchange_graceful(
m->fsmount_fd,
p,
FLAGS_SET(flags, DISSECT_IMAGE_TRY_ATOMIC_MOUNT_EXCHANGE));
if (r < 0)
return log_debug_errno(r, "Failed to mount image on '%s': %m", p);
} else {
assert(node);

View File

@ -86,6 +86,7 @@ typedef enum DissectImageFlags {
DISSECT_IMAGE_RELAX_EXTENSION_CHECK = 1 << 22, /* Don't insist that the extension-release file name matches the image name */
DISSECT_IMAGE_DISKSEQ_DEVNODE = 1 << 23, /* Prefer /dev/disk/by-diskseq/… device nodes */
DISSECT_IMAGE_ALLOW_EMPTY = 1 << 24, /* Allow that no usable partitions is present */
DISSECT_IMAGE_TRY_ATOMIC_MOUNT_EXCHANGE = 1 << 25, /* Try to mount the image beneath the specified mountpoint, rather than on top of it, and then umount the top */
} DissectImageFlags;
struct DissectedImage {

View File

@ -730,6 +730,45 @@ int umount_verbose(
return 0;
}
int mount_exchange_graceful(int fsmount_fd, const char *dest, bool mount_beneath) {
int r;
assert(fsmount_fd >= 0);
assert(dest);
/* First, try to mount beneath an existing mount point, and if that works, umount the old mount,
* which is now at the top. This will ensure we can atomically replace a mount. Note that this works
* also in the case where there are submounts down the tree. Mount propagation is allowed but
* restricted to layouts that don't end up propagation the new mount on top of the mount stack. If
* this is not supported (minimum kernel v6.5), or if there is no mount on the mountpoint, we get
* -EINVAL and then we fallback to normal mounting. */
r = RET_NERRNO(move_mount(
fsmount_fd,
/* from_path= */ "",
/* to_fd= */ -EBADF,
dest,
MOVE_MOUNT_F_EMPTY_PATH | (mount_beneath ? MOVE_MOUNT_BENEATH : 0)));
if (mount_beneath) {
if (r == -EINVAL) { /* Fallback if mount_beneath is not supported */
log_debug_errno(r,
"Failed to mount beneath '%s', falling back to overmount",
dest);
return RET_NERRNO(move_mount(
fsmount_fd,
/* from_path= */ "",
/* to_fd= */ -EBADF,
dest,
MOVE_MOUNT_F_EMPTY_PATH));
}
if (r >= 0) /* If it is, now remove the old mount */
return umount_verbose(LOG_DEBUG, dest, UMOUNT_NOFOLLOW|MNT_DETACH);
}
return r;
}
int mount_option_mangle(
const char *options,
unsigned long mount_flags,
@ -1155,7 +1194,7 @@ static int mount_in_namespace(
(void) mkdir_parents(dest, 0755);
if (img) {
DissectImageFlags f = 0;
DissectImageFlags f = DISSECT_IMAGE_TRY_ATOMIC_MOUNT_EXCHANGE;
if (make_file_or_directory)
f |= DISSECT_IMAGE_MKDIR;
@ -1174,11 +1213,7 @@ static int mount_in_namespace(
if (make_file_or_directory)
(void) make_mount_point_inode_from_stat(&st, dest, 0700);
r = RET_NERRNO(move_mount(new_mount_fd,
"",
-EBADF,
dest,
MOVE_MOUNT_F_EMPTY_PATH));
r = mount_exchange_graceful(new_mount_fd, dest, /* mount_beneath= */ true);
}
if (r < 0) {
(void) write(errno_pipe_fd[1], &r, sizeof(r));

View File

@ -68,6 +68,8 @@ int umount_verbose(
const char *where,
int flags);
int mount_exchange_graceful(int fsmount_fd, const char *dest, bool mount_beneath);
int mount_option_mangle(
const char *options,
unsigned long mount_flags,

View File

@ -23,8 +23,12 @@ systemctl start testsuite-23-namespaced.service
# Ensure that inaccessible paths aren't bypassed by the runtime setup,
(! systemctl bind --mkdir testsuite-23-namespaced.service /run/testsuite-23-marker-fixed /run/inaccessible/testfile-marker-fixed)
echo "MARKER_WRONG" >/run/testsuite-23-marker-wrong
echo "MARKER_RUNTIME" >/run/testsuite-23-marker-runtime
# Mount twice to exercise mount-beneath (on kernel 6.5+, on older kernels it will just overmount)
systemctl bind --mkdir testsuite-23-namespaced.service /run/testsuite-23-marker-wrong /tmp/testfile-marker-runtime
test "$(systemctl show -P SubState testsuite-23-namespaced.service)" = "running"
systemctl bind --mkdir testsuite-23-namespaced.service /run/testsuite-23-marker-runtime /tmp/testfile-marker-runtime
timeout 10 bash -xec 'while [[ "$(systemctl show -P SubState testsuite-23-namespaced.service)" == running ]]; do sleep .5; done'

View File

@ -363,6 +363,11 @@ ExecStart=/bin/sh -c ' \\
EOF
systemctl start testservice-50d.service
# Mount twice to exercise mount-beneath (on kernel 6.5+, on older kernels it will just overmount)
mkdir -p /tmp/wrong/foo
mksquashfs /tmp/wrong/foo /tmp/wrong.raw
systemctl mount-image --mkdir testservice-50d.service /tmp/wrong.raw /tmp/img
test "$(systemctl show -P SubState testservice-50d.service)" = "running"
systemctl mount-image --mkdir testservice-50d.service "${image}.raw" /tmp/img root:nosuid
while systemctl show -P SubState testservice-50d.service | grep -q running