mirror of
https://github.com/systemd/systemd.git
synced 2024-12-22 17:35:35 +03:00
mount-util: use mount beneath to replace previous namespace mount
Instead of mounting over, do an atomic swap using mount beneath, if available. This way assets can be mounted again and again (e.g.: updates) without leaking mounts.
This commit is contained in:
parent
f5e6f3117c
commit
7c83d42ef8
@ -1310,11 +1310,16 @@ node /org/freedesktop/systemd1 {
|
|||||||
<function>TryRestartUnit()</function> or <function>ReloadOrTryRestartUnit()</function> for the marked
|
<function>TryRestartUnit()</function> or <function>ReloadOrTryRestartUnit()</function> for the marked
|
||||||
units.</para>
|
units.</para>
|
||||||
|
|
||||||
<para><function>BindMountUnit()</function> can be used to bind mount new files or directories into
|
<para><function>BindMountUnit()</function> can be used to bind mount new files or directories into a
|
||||||
a running service mount namespace.</para>
|
running service mount namespace. If supported by the kernel, any prior mount on the selected target
|
||||||
|
will be replaced by the new mount. If not supported, any prior mount will be over-mounted, but remain
|
||||||
|
pinned and inaccessible.
|
||||||
|
</para>
|
||||||
|
|
||||||
<para><function>MountImageUnit()</function> can be used to mount new images into a running service
|
<para><function>MountImageUnit()</function> can be used to mount new images into a running service
|
||||||
mount namespace.</para>
|
mount namespace. If supported by the kernel, any prior mount on the selected target will be replaced
|
||||||
|
by the new mount. If not supported, any prior mount will be over-mounted, but remain pinned and
|
||||||
|
inaccessible.</para>
|
||||||
|
|
||||||
<para><function>KillUnit()</function> may be used to kill (i.e. send a signal to) all processes of a
|
<para><function>KillUnit()</function> may be used to kill (i.e. send a signal to) all processes of a
|
||||||
unit. It takes the unit <varname>name</varname>, an enum <varname>who</varname> and a UNIX
|
unit. It takes the unit <varname>name</varname>, an enum <varname>who</varname> and a UNIX
|
||||||
|
@ -663,6 +663,10 @@ Jan 12 10:46:45 example.com bluetoothd[8900]: gatt-time-server: Input/output err
|
|||||||
<option>ExecReload=</option>, <option>ExecStartPre=</option>, etc.) run in distinct namespaces.
|
<option>ExecReload=</option>, <option>ExecStartPre=</option>, etc.) run in distinct namespaces.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<para>If supported by the kernel, any prior mount on the selected target will be replaced by the
|
||||||
|
new mount. If not supported, any prior mount will be over-mounted, but remain pinned and
|
||||||
|
inaccessible.</para>
|
||||||
|
|
||||||
<xi:include href="version-info.xml" xpointer="v248"/></listitem>
|
<xi:include href="version-info.xml" xpointer="v248"/></listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
@ -693,6 +697,10 @@ Jan 12 10:46:45 example.com bluetoothd[8900]: gatt-time-server: Input/output err
|
|||||||
<option>ExecReload=</option>, <option>ExecStartPre=</option>, etc.) run in distinct namespaces.
|
<option>ExecReload=</option>, <option>ExecStartPre=</option>, etc.) run in distinct namespaces.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<para>If supported by the kernel, any prior mount on the selected target will be replaced by the
|
||||||
|
new mount. If not supported, any prior mount will be over-mounted, but remain pinned and
|
||||||
|
inaccessible.</para>
|
||||||
|
|
||||||
<para>Example:
|
<para>Example:
|
||||||
<programlisting>systemctl mount-image foo.service /tmp/img.raw /var/lib/image root:ro,nosuid</programlisting>
|
<programlisting>systemctl mount-image foo.service /tmp/img.raw /var/lib/image root:ro,nosuid</programlisting>
|
||||||
<programlisting>systemctl mount-image --mkdir bar.service /tmp/img.raw /var/lib/baz/img</programlisting>
|
<programlisting>systemctl mount-image --mkdir bar.service /tmp/img.raw /var/lib/baz/img</programlisting>
|
||||||
|
@ -539,6 +539,10 @@ static inline int missing_open_tree(
|
|||||||
|
|
||||||
/* ======================================================================= */
|
/* ======================================================================= */
|
||||||
|
|
||||||
|
#ifndef MOVE_MOUNT_BENEATH
|
||||||
|
#define MOVE_MOUNT_BENEATH 0x00000200
|
||||||
|
#endif
|
||||||
|
|
||||||
#if !HAVE_MOVE_MOUNT
|
#if !HAVE_MOVE_MOUNT
|
||||||
|
|
||||||
#ifndef MOVE_MOUNT_F_EMPTY_PATH
|
#ifndef MOVE_MOUNT_F_EMPTY_PATH
|
||||||
|
@ -2007,8 +2007,12 @@ static int mount_partition(
|
|||||||
if (m->fsmount_fd >= 0) {
|
if (m->fsmount_fd >= 0) {
|
||||||
/* Case #1: Attach existing fsmount fd to the file system */
|
/* Case #1: Attach existing fsmount fd to the file system */
|
||||||
|
|
||||||
if (move_mount(m->fsmount_fd, "", -EBADF, p, MOVE_MOUNT_F_EMPTY_PATH) < 0)
|
r = mount_exchange_graceful(
|
||||||
return -errno;
|
m->fsmount_fd,
|
||||||
|
p,
|
||||||
|
FLAGS_SET(flags, DISSECT_IMAGE_TRY_ATOMIC_MOUNT_EXCHANGE));
|
||||||
|
if (r < 0)
|
||||||
|
return log_debug_errno(r, "Failed to mount image on '%s': %m", p);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
assert(node);
|
assert(node);
|
||||||
|
@ -56,36 +56,37 @@ struct DissectedPartition {
|
|||||||
})
|
})
|
||||||
|
|
||||||
typedef enum DissectImageFlags {
|
typedef enum DissectImageFlags {
|
||||||
DISSECT_IMAGE_DEVICE_READ_ONLY = 1 << 0, /* Make device read-only */
|
DISSECT_IMAGE_DEVICE_READ_ONLY = 1 << 0, /* Make device read-only */
|
||||||
DISSECT_IMAGE_DISCARD_ON_LOOP = 1 << 1, /* Turn on "discard" if on a loop device and file system supports it */
|
DISSECT_IMAGE_DISCARD_ON_LOOP = 1 << 1, /* Turn on "discard" if on a loop device and file system supports it */
|
||||||
DISSECT_IMAGE_DISCARD = 1 << 2, /* Turn on "discard" if file system supports it, on all block devices */
|
DISSECT_IMAGE_DISCARD = 1 << 2, /* Turn on "discard" if file system supports it, on all block devices */
|
||||||
DISSECT_IMAGE_DISCARD_ON_CRYPTO = 1 << 3, /* Turn on "discard" also on crypto devices */
|
DISSECT_IMAGE_DISCARD_ON_CRYPTO = 1 << 3, /* Turn on "discard" also on crypto devices */
|
||||||
DISSECT_IMAGE_DISCARD_ANY = DISSECT_IMAGE_DISCARD_ON_LOOP |
|
DISSECT_IMAGE_DISCARD_ANY = DISSECT_IMAGE_DISCARD_ON_LOOP |
|
||||||
DISSECT_IMAGE_DISCARD |
|
DISSECT_IMAGE_DISCARD |
|
||||||
DISSECT_IMAGE_DISCARD_ON_CRYPTO,
|
DISSECT_IMAGE_DISCARD_ON_CRYPTO,
|
||||||
DISSECT_IMAGE_GPT_ONLY = 1 << 4, /* Only recognize images with GPT partition tables */
|
DISSECT_IMAGE_GPT_ONLY = 1 << 4, /* Only recognize images with GPT partition tables */
|
||||||
DISSECT_IMAGE_GENERIC_ROOT = 1 << 5, /* If no partition table or only single generic partition, assume it's the root fs */
|
DISSECT_IMAGE_GENERIC_ROOT = 1 << 5, /* If no partition table or only single generic partition, assume it's the root fs */
|
||||||
DISSECT_IMAGE_MOUNT_ROOT_ONLY = 1 << 6, /* Mount only the root and /usr partitions */
|
DISSECT_IMAGE_MOUNT_ROOT_ONLY = 1 << 6, /* Mount only the root and /usr partitions */
|
||||||
DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY = 1 << 7, /* Mount only the non-root and non-/usr partitions */
|
DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY = 1 << 7, /* Mount only the non-root and non-/usr partitions */
|
||||||
DISSECT_IMAGE_VALIDATE_OS = 1 << 8, /* Refuse mounting images that aren't identifiable as OS images */
|
DISSECT_IMAGE_VALIDATE_OS = 1 << 8, /* Refuse mounting images that aren't identifiable as OS images */
|
||||||
DISSECT_IMAGE_VALIDATE_OS_EXT = 1 << 9, /* Refuse mounting images that aren't identifiable as OS extension images */
|
DISSECT_IMAGE_VALIDATE_OS_EXT = 1 << 9, /* Refuse mounting images that aren't identifiable as OS extension images */
|
||||||
DISSECT_IMAGE_RELAX_VAR_CHECK = 1 << 10, /* Don't insist that the UUID of /var is hashed from /etc/machine-id */
|
DISSECT_IMAGE_RELAX_VAR_CHECK = 1 << 10, /* Don't insist that the UUID of /var is hashed from /etc/machine-id */
|
||||||
DISSECT_IMAGE_FSCK = 1 << 11, /* File system check the partition before mounting (no effect when combined with DISSECT_IMAGE_READ_ONLY) */
|
DISSECT_IMAGE_FSCK = 1 << 11, /* File system check the partition before mounting (no effect when combined with DISSECT_IMAGE_READ_ONLY) */
|
||||||
DISSECT_IMAGE_NO_PARTITION_TABLE = 1 << 12, /* Only recognize single file system images */
|
DISSECT_IMAGE_NO_PARTITION_TABLE = 1 << 12, /* Only recognize single file system images */
|
||||||
DISSECT_IMAGE_VERITY_SHARE = 1 << 13, /* When activating a verity device, reuse existing one if already open */
|
DISSECT_IMAGE_VERITY_SHARE = 1 << 13, /* When activating a verity device, reuse existing one if already open */
|
||||||
DISSECT_IMAGE_MKDIR = 1 << 14, /* Make top-level directory to mount right before mounting, if missing */
|
DISSECT_IMAGE_MKDIR = 1 << 14, /* Make top-level directory to mount right before mounting, if missing */
|
||||||
DISSECT_IMAGE_USR_NO_ROOT = 1 << 15, /* If no root fs is in the image, but /usr is, then allow this (so that we can mount the rootfs as tmpfs or so */
|
DISSECT_IMAGE_USR_NO_ROOT = 1 << 15, /* If no root fs is in the image, but /usr is, then allow this (so that we can mount the rootfs as tmpfs or so */
|
||||||
DISSECT_IMAGE_REQUIRE_ROOT = 1 << 16, /* Don't accept disks without root partition (or at least /usr partition if DISSECT_IMAGE_USR_NO_ROOT is set) */
|
DISSECT_IMAGE_REQUIRE_ROOT = 1 << 16, /* Don't accept disks without root partition (or at least /usr partition if DISSECT_IMAGE_USR_NO_ROOT is set) */
|
||||||
DISSECT_IMAGE_MOUNT_READ_ONLY = 1 << 17, /* Make mounts read-only */
|
DISSECT_IMAGE_MOUNT_READ_ONLY = 1 << 17, /* Make mounts read-only */
|
||||||
DISSECT_IMAGE_READ_ONLY = DISSECT_IMAGE_DEVICE_READ_ONLY |
|
DISSECT_IMAGE_READ_ONLY = DISSECT_IMAGE_DEVICE_READ_ONLY |
|
||||||
DISSECT_IMAGE_MOUNT_READ_ONLY,
|
DISSECT_IMAGE_MOUNT_READ_ONLY,
|
||||||
DISSECT_IMAGE_GROWFS = 1 << 18, /* Grow file systems in partitions marked for that to the size of the partitions after mount */
|
DISSECT_IMAGE_GROWFS = 1 << 18, /* Grow file systems in partitions marked for that to the size of the partitions after mount */
|
||||||
DISSECT_IMAGE_MOUNT_IDMAPPED = 1 << 19, /* Mount mounts with kernel 5.12-style userns ID mapping, if file system type doesn't support uid=/gid= */
|
DISSECT_IMAGE_MOUNT_IDMAPPED = 1 << 19, /* Mount mounts with kernel 5.12-style userns ID mapping, if file system type doesn't support uid=/gid= */
|
||||||
DISSECT_IMAGE_ADD_PARTITION_DEVICES = 1 << 20, /* Create partition devices via BLKPG_ADD_PARTITION */
|
DISSECT_IMAGE_ADD_PARTITION_DEVICES = 1 << 20, /* Create partition devices via BLKPG_ADD_PARTITION */
|
||||||
DISSECT_IMAGE_PIN_PARTITION_DEVICES = 1 << 21, /* Open dissected partitions and decrypted partitions and pin them by fd */
|
DISSECT_IMAGE_PIN_PARTITION_DEVICES = 1 << 21, /* Open dissected partitions and decrypted partitions and pin them by fd */
|
||||||
DISSECT_IMAGE_RELAX_EXTENSION_CHECK = 1 << 22, /* Don't insist that the extension-release file name matches the image name */
|
DISSECT_IMAGE_RELAX_EXTENSION_CHECK = 1 << 22, /* Don't insist that the extension-release file name matches the image name */
|
||||||
DISSECT_IMAGE_DISKSEQ_DEVNODE = 1 << 23, /* Prefer /dev/disk/by-diskseq/… device nodes */
|
DISSECT_IMAGE_DISKSEQ_DEVNODE = 1 << 23, /* Prefer /dev/disk/by-diskseq/… device nodes */
|
||||||
DISSECT_IMAGE_ALLOW_EMPTY = 1 << 24, /* Allow that no usable partitions is present */
|
DISSECT_IMAGE_ALLOW_EMPTY = 1 << 24, /* Allow that no usable partitions is present */
|
||||||
|
DISSECT_IMAGE_TRY_ATOMIC_MOUNT_EXCHANGE = 1 << 25, /* Try to mount the image beneath the specified mountpoint, rather than on top of it, and then umount the top */
|
||||||
} DissectImageFlags;
|
} DissectImageFlags;
|
||||||
|
|
||||||
struct DissectedImage {
|
struct DissectedImage {
|
||||||
|
@ -730,6 +730,45 @@ int umount_verbose(
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int mount_exchange_graceful(int fsmount_fd, const char *dest, bool mount_beneath) {
|
||||||
|
int r;
|
||||||
|
|
||||||
|
assert(fsmount_fd >= 0);
|
||||||
|
assert(dest);
|
||||||
|
|
||||||
|
/* First, try to mount beneath an existing mount point, and if that works, umount the old mount,
|
||||||
|
* which is now at the top. This will ensure we can atomically replace a mount. Note that this works
|
||||||
|
* also in the case where there are submounts down the tree. Mount propagation is allowed but
|
||||||
|
* restricted to layouts that don't end up propagation the new mount on top of the mount stack. If
|
||||||
|
* this is not supported (minimum kernel v6.5), or if there is no mount on the mountpoint, we get
|
||||||
|
* -EINVAL and then we fallback to normal mounting. */
|
||||||
|
|
||||||
|
r = RET_NERRNO(move_mount(
|
||||||
|
fsmount_fd,
|
||||||
|
/* from_path= */ "",
|
||||||
|
/* to_fd= */ -EBADF,
|
||||||
|
dest,
|
||||||
|
MOVE_MOUNT_F_EMPTY_PATH | (mount_beneath ? MOVE_MOUNT_BENEATH : 0)));
|
||||||
|
if (mount_beneath) {
|
||||||
|
if (r == -EINVAL) { /* Fallback if mount_beneath is not supported */
|
||||||
|
log_debug_errno(r,
|
||||||
|
"Failed to mount beneath '%s', falling back to overmount",
|
||||||
|
dest);
|
||||||
|
return RET_NERRNO(move_mount(
|
||||||
|
fsmount_fd,
|
||||||
|
/* from_path= */ "",
|
||||||
|
/* to_fd= */ -EBADF,
|
||||||
|
dest,
|
||||||
|
MOVE_MOUNT_F_EMPTY_PATH));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (r >= 0) /* If it is, now remove the old mount */
|
||||||
|
return umount_verbose(LOG_DEBUG, dest, UMOUNT_NOFOLLOW|MNT_DETACH);
|
||||||
|
}
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
int mount_option_mangle(
|
int mount_option_mangle(
|
||||||
const char *options,
|
const char *options,
|
||||||
unsigned long mount_flags,
|
unsigned long mount_flags,
|
||||||
@ -1155,7 +1194,7 @@ static int mount_in_namespace(
|
|||||||
(void) mkdir_parents(dest, 0755);
|
(void) mkdir_parents(dest, 0755);
|
||||||
|
|
||||||
if (img) {
|
if (img) {
|
||||||
DissectImageFlags f = 0;
|
DissectImageFlags f = DISSECT_IMAGE_TRY_ATOMIC_MOUNT_EXCHANGE;
|
||||||
|
|
||||||
if (make_file_or_directory)
|
if (make_file_or_directory)
|
||||||
f |= DISSECT_IMAGE_MKDIR;
|
f |= DISSECT_IMAGE_MKDIR;
|
||||||
@ -1174,11 +1213,7 @@ static int mount_in_namespace(
|
|||||||
if (make_file_or_directory)
|
if (make_file_or_directory)
|
||||||
(void) make_mount_point_inode_from_stat(&st, dest, 0700);
|
(void) make_mount_point_inode_from_stat(&st, dest, 0700);
|
||||||
|
|
||||||
r = RET_NERRNO(move_mount(new_mount_fd,
|
r = mount_exchange_graceful(new_mount_fd, dest, /* mount_beneath= */ true);
|
||||||
"",
|
|
||||||
-EBADF,
|
|
||||||
dest,
|
|
||||||
MOVE_MOUNT_F_EMPTY_PATH));
|
|
||||||
}
|
}
|
||||||
if (r < 0) {
|
if (r < 0) {
|
||||||
(void) write(errno_pipe_fd[1], &r, sizeof(r));
|
(void) write(errno_pipe_fd[1], &r, sizeof(r));
|
||||||
|
@ -68,6 +68,8 @@ int umount_verbose(
|
|||||||
const char *where,
|
const char *where,
|
||||||
int flags);
|
int flags);
|
||||||
|
|
||||||
|
int mount_exchange_graceful(int fsmount_fd, const char *dest, bool mount_beneath);
|
||||||
|
|
||||||
int mount_option_mangle(
|
int mount_option_mangle(
|
||||||
const char *options,
|
const char *options,
|
||||||
unsigned long mount_flags,
|
unsigned long mount_flags,
|
||||||
|
@ -23,8 +23,12 @@ systemctl start testsuite-23-namespaced.service
|
|||||||
# Ensure that inaccessible paths aren't bypassed by the runtime setup,
|
# Ensure that inaccessible paths aren't bypassed by the runtime setup,
|
||||||
(! systemctl bind --mkdir testsuite-23-namespaced.service /run/testsuite-23-marker-fixed /run/inaccessible/testfile-marker-fixed)
|
(! systemctl bind --mkdir testsuite-23-namespaced.service /run/testsuite-23-marker-fixed /run/inaccessible/testfile-marker-fixed)
|
||||||
|
|
||||||
|
echo "MARKER_WRONG" >/run/testsuite-23-marker-wrong
|
||||||
echo "MARKER_RUNTIME" >/run/testsuite-23-marker-runtime
|
echo "MARKER_RUNTIME" >/run/testsuite-23-marker-runtime
|
||||||
|
|
||||||
|
# Mount twice to exercise mount-beneath (on kernel 6.5+, on older kernels it will just overmount)
|
||||||
|
systemctl bind --mkdir testsuite-23-namespaced.service /run/testsuite-23-marker-wrong /tmp/testfile-marker-runtime
|
||||||
|
test "$(systemctl show -P SubState testsuite-23-namespaced.service)" = "running"
|
||||||
systemctl bind --mkdir testsuite-23-namespaced.service /run/testsuite-23-marker-runtime /tmp/testfile-marker-runtime
|
systemctl bind --mkdir testsuite-23-namespaced.service /run/testsuite-23-marker-runtime /tmp/testfile-marker-runtime
|
||||||
|
|
||||||
timeout 10 bash -xec 'while [[ "$(systemctl show -P SubState testsuite-23-namespaced.service)" == running ]]; do sleep .5; done'
|
timeout 10 bash -xec 'while [[ "$(systemctl show -P SubState testsuite-23-namespaced.service)" == running ]]; do sleep .5; done'
|
||||||
|
@ -363,6 +363,11 @@ ExecStart=/bin/sh -c ' \\
|
|||||||
EOF
|
EOF
|
||||||
systemctl start testservice-50d.service
|
systemctl start testservice-50d.service
|
||||||
|
|
||||||
|
# Mount twice to exercise mount-beneath (on kernel 6.5+, on older kernels it will just overmount)
|
||||||
|
mkdir -p /tmp/wrong/foo
|
||||||
|
mksquashfs /tmp/wrong/foo /tmp/wrong.raw
|
||||||
|
systemctl mount-image --mkdir testservice-50d.service /tmp/wrong.raw /tmp/img
|
||||||
|
test "$(systemctl show -P SubState testservice-50d.service)" = "running"
|
||||||
systemctl mount-image --mkdir testservice-50d.service "${image}.raw" /tmp/img root:nosuid
|
systemctl mount-image --mkdir testservice-50d.service "${image}.raw" /tmp/img root:nosuid
|
||||||
|
|
||||||
while systemctl show -P SubState testservice-50d.service | grep -q running
|
while systemctl show -P SubState testservice-50d.service | grep -q running
|
||||||
|
Loading…
Reference in New Issue
Block a user