mirror of
https://github.com/systemd/systemd-stable.git
synced 2025-01-12 09:17:44 +03:00
tree-wide: take BSD lock on loopback devices we dissect/mount/operate on
So here's something we should always keep in mind: systemd-udevd actually does *two* things with BSD file locks on block devices: 1. While it probes a device it takes a LOCK_SH lock. Thus everyone else taking a LOCK_EX lock will temporarily block udev from probing devices, which is good when making changes to it. 2. Whenever a device is closed after write (detected via inotify), udevd will issue BLKRRPART (requesting the kernel to reread the partition table). It does this while holding a LOCK_EX lock on the block device. Thus anyone else taking LOCK_SH or LOCK_EX will temporarily block udevd from issuing that ioctl. And that's quite relevant, since the kernel will temporarily flush out all partitions while re-reading the partition table and then create them anew. Thus it is smart to take LOCK_SH when dissecting a block device to ensure that no BLKRRPART is issued in the background, until we mounted the devices.
This commit is contained in:
parent
345418a9e6
commit
41bc484906
@ -2055,6 +2055,12 @@ int setup_namespace(
|
|||||||
if (r < 0)
|
if (r < 0)
|
||||||
return log_debug_errno(r, "Failed to create loop device for root image: %m");
|
return log_debug_errno(r, "Failed to create loop device for root image: %m");
|
||||||
|
|
||||||
|
/* Make sure udevd won't issue BLKRRPART (which might flush out the loaded partition table)
|
||||||
|
* while we are still trying to mount things */
|
||||||
|
r = loop_device_flock(loop_device, LOCK_SH);
|
||||||
|
if (r < 0)
|
||||||
|
return log_debug_errno(r, "Failed to lock loopback device with LOCK_SH: %m");
|
||||||
|
|
||||||
r = dissect_image(
|
r = dissect_image(
|
||||||
loop_device->fd,
|
loop_device->fd,
|
||||||
&verity,
|
&verity,
|
||||||
@ -2403,6 +2409,14 @@ int setup_namespace(
|
|||||||
goto finish;
|
goto finish;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Now release the block device lock, so that udevd is free to call BLKRRPART on the device
|
||||||
|
* if it likes. */
|
||||||
|
r = loop_device_flock(loop_device, LOCK_UN);
|
||||||
|
if (r < 0) {
|
||||||
|
log_debug_errno(r, "Failed to release lock on loopback block device: %m");
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
|
||||||
if (decrypted_image) {
|
if (decrypted_image) {
|
||||||
r = decrypted_image_relinquish(decrypted_image);
|
r = decrypted_image_relinquish(decrypted_image);
|
||||||
if (r < 0) {
|
if (r < 0) {
|
||||||
|
@ -639,6 +639,10 @@ static int action_mount(DissectedImage *m, LoopDevice *d) {
|
|||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
|
r = loop_device_flock(d, LOCK_UN);
|
||||||
|
if (r < 0)
|
||||||
|
return log_error_errno(r, "Failed to unlock loopback block device: %m");
|
||||||
|
|
||||||
if (di) {
|
if (di) {
|
||||||
r = decrypted_image_relinquish(di);
|
r = decrypted_image_relinquish(di);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
@ -687,6 +691,10 @@ static int action_copy(DissectedImage *m, LoopDevice *d) {
|
|||||||
|
|
||||||
mounted_dir = TAKE_PTR(created_dir);
|
mounted_dir = TAKE_PTR(created_dir);
|
||||||
|
|
||||||
|
r = loop_device_flock(d, LOCK_UN);
|
||||||
|
if (r < 0)
|
||||||
|
return log_error_errno(r, "Failed to unlock loopback block device: %m");
|
||||||
|
|
||||||
if (di) {
|
if (di) {
|
||||||
r = decrypted_image_relinquish(di);
|
r = decrypted_image_relinquish(di);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
@ -845,6 +853,12 @@ static int run(int argc, char *argv[]) {
|
|||||||
if (r < 0)
|
if (r < 0)
|
||||||
return log_error_errno(r, "Failed to set up loopback device for %s: %m", arg_image);
|
return log_error_errno(r, "Failed to set up loopback device for %s: %m", arg_image);
|
||||||
|
|
||||||
|
/* Make sure udevd doesn't issue BLKRRPART underneath us thus making devices disappear in the middle,
|
||||||
|
* that we assume already are there. */
|
||||||
|
r = loop_device_flock(d, LOCK_SH);
|
||||||
|
if (r < 0)
|
||||||
|
return log_error_errno(r, "Failed to lock loopback device: %m");
|
||||||
|
|
||||||
r = dissect_image_and_warn(
|
r = dissect_image_and_warn(
|
||||||
d->fd,
|
d->fd,
|
||||||
arg_image,
|
arg_image,
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
/* SPDX-License-Identifier: LGPL-2.1-or-later */
|
/* SPDX-License-Identifier: LGPL-2.1-or-later */
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#include <sys/file.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
#include "sd-device.h"
|
#include "sd-device.h"
|
||||||
@ -696,6 +697,12 @@ static int enumerate_partitions(dev_t devnum) {
|
|||||||
if (r <= 0)
|
if (r <= 0)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
|
/* Let's take a LOCK_SH lock on the block device, in case udevd is already running. If we don't take
|
||||||
|
* the lock, udevd might end up issuing BLKRRPART in the middle, and we don't want that, since that
|
||||||
|
* might remove all partitions while we are operating on them. */
|
||||||
|
if (flock(fd, LOCK_SH) < 0)
|
||||||
|
return log_error_errno(errno, "Failed to lock root block device: %m");
|
||||||
|
|
||||||
r = dissect_image(
|
r = dissect_image(
|
||||||
fd,
|
fd,
|
||||||
NULL, NULL,
|
NULL, NULL,
|
||||||
|
@ -5737,6 +5737,13 @@ static int run(int argc, char *argv[]) {
|
|||||||
goto finish;
|
goto finish;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Take a LOCK_SH lock on the device, so that udevd doesn't issue BLKRRPART in our back */
|
||||||
|
r = loop_device_flock(loop, LOCK_SH);
|
||||||
|
if (r < 0) {
|
||||||
|
log_error_errno(r, "Failed to take lock on loopback block device: %m");
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
|
||||||
r = dissect_image_and_warn(
|
r = dissect_image_and_warn(
|
||||||
loop->fd,
|
loop->fd,
|
||||||
arg_image,
|
arg_image,
|
||||||
|
@ -359,6 +359,10 @@ static int portable_extract_by_path(
|
|||||||
/* We now have a loopback block device, let's fork off a child in its own mount namespace, mount it
|
/* We now have a loopback block device, let's fork off a child in its own mount namespace, mount it
|
||||||
* there, and extract the metadata we need. The metadata is sent from the child back to us. */
|
* there, and extract the metadata we need. The metadata is sent from the child back to us. */
|
||||||
|
|
||||||
|
r = loop_device_flock(d, LOCK_SH);
|
||||||
|
if (r < 0)
|
||||||
|
return log_debug_errno(r, "Failed to acquire lock on loopback block device: %m");
|
||||||
|
|
||||||
BLOCK_SIGNALS(SIGCHLD);
|
BLOCK_SIGNALS(SIGCHLD);
|
||||||
|
|
||||||
r = mkdtemp_malloc("/tmp/inspect-XXXXXX", &tmpdir);
|
r = mkdtemp_malloc("/tmp/inspect-XXXXXX", &tmpdir);
|
||||||
|
@ -1196,6 +1196,12 @@ int image_read_metadata(Image *i) {
|
|||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
|
/* Make sure udevd doesn't issue BLKRRPART in the background which might make our partitions
|
||||||
|
* disappear temporarily. */
|
||||||
|
r = loop_device_flock(d, LOCK_SH);
|
||||||
|
if (r < 0)
|
||||||
|
return r;
|
||||||
|
|
||||||
r = dissect_image(
|
r = dissect_image(
|
||||||
d->fd,
|
d->fd,
|
||||||
NULL, NULL,
|
NULL, NULL,
|
||||||
|
@ -2980,6 +2980,11 @@ int mount_image_privately_interactively(
|
|||||||
if (r < 0)
|
if (r < 0)
|
||||||
return log_error_errno(r, "Failed to set up loopback device for %s: %m", image);
|
return log_error_errno(r, "Failed to set up loopback device for %s: %m", image);
|
||||||
|
|
||||||
|
/* Make sure udevd doesn't issue BLKRRPART behind our backs */
|
||||||
|
r = loop_device_flock(d, LOCK_SH);
|
||||||
|
if (r < 0)
|
||||||
|
return r;
|
||||||
|
|
||||||
r = dissect_image_and_warn(d->fd, image, &verity, NULL, d->diskseq, d->uevent_seqnum_not_before, d->timestamp_not_before, flags, &dissected_image);
|
r = dissect_image_and_warn(d->fd, image, &verity, NULL, d->diskseq, d->uevent_seqnum_not_before, d->timestamp_not_before, flags, &dissected_image);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
return r;
|
||||||
@ -3006,6 +3011,10 @@ int mount_image_privately_interactively(
|
|||||||
if (r < 0)
|
if (r < 0)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
|
r = loop_device_flock(d, LOCK_UN);
|
||||||
|
if (r < 0)
|
||||||
|
return r;
|
||||||
|
|
||||||
if (decrypted_image) {
|
if (decrypted_image) {
|
||||||
r = decrypted_image_relinquish(decrypted_image);
|
r = decrypted_image_relinquish(decrypted_image);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
@ -3086,6 +3095,10 @@ int verity_dissect_and_mount(
|
|||||||
if (r < 0)
|
if (r < 0)
|
||||||
return log_debug_errno(r, "Failed to create loop device for image: %m");
|
return log_debug_errno(r, "Failed to create loop device for image: %m");
|
||||||
|
|
||||||
|
r = loop_device_flock(loop_device, LOCK_SH);
|
||||||
|
if (r < 0)
|
||||||
|
return log_debug_errno(r, "Failed to lock loop device: %m");
|
||||||
|
|
||||||
r = dissect_image(
|
r = dissect_image(
|
||||||
loop_device->fd,
|
loop_device->fd,
|
||||||
&verity,
|
&verity,
|
||||||
@ -3133,6 +3146,10 @@ int verity_dissect_and_mount(
|
|||||||
if (r < 0)
|
if (r < 0)
|
||||||
return log_debug_errno(r, "Failed to mount image: %m");
|
return log_debug_errno(r, "Failed to mount image: %m");
|
||||||
|
|
||||||
|
r = loop_device_flock(loop_device, LOCK_UN);
|
||||||
|
if (r < 0)
|
||||||
|
return log_debug_errno(r, "Failed to unlock loopback device: %m");
|
||||||
|
|
||||||
/* If we got os-release values from the caller, then we need to match them with the image's
|
/* If we got os-release values from the caller, then we need to match them with the image's
|
||||||
* extension-release.d/ content. Return -EINVAL if there's any mismatch.
|
* extension-release.d/ content. Return -EINVAL if there's any mismatch.
|
||||||
* First, check the distro ID. If that matches, then check the new SYSEXT_LEVEL value if
|
* First, check the distro ID. If that matches, then check the new SYSEXT_LEVEL value if
|
||||||
|
@ -533,6 +533,10 @@ static int merge_subprocess(Hashmap *images, const char *workspace) {
|
|||||||
if (r < 0)
|
if (r < 0)
|
||||||
return log_error_errno(r, "Failed to set up loopback device for %s: %m", img->path);
|
return log_error_errno(r, "Failed to set up loopback device for %s: %m", img->path);
|
||||||
|
|
||||||
|
r = loop_device_flock(d, LOCK_SH);
|
||||||
|
if (r < 0)
|
||||||
|
return log_error_errno(r, "Failed to lock loopback device: %m");
|
||||||
|
|
||||||
r = dissect_image_and_warn(
|
r = dissect_image_and_warn(
|
||||||
d->fd,
|
d->fd,
|
||||||
img->path,
|
img->path,
|
||||||
|
@ -55,6 +55,9 @@ static void* thread_func(void *ptr) {
|
|||||||
|
|
||||||
log_notice("Acquired loop device %s, will mount on %s", loop->node, mounted);
|
log_notice("Acquired loop device %s, will mount on %s", loop->node, mounted);
|
||||||
|
|
||||||
|
/* Let's make sure udev doesn't call BLKRRPART in the background, while we try to mount the device. */
|
||||||
|
assert_se(loop_device_flock(loop, LOCK_SH) >= 0);
|
||||||
|
|
||||||
r = dissect_image(loop->fd, NULL, NULL, loop->diskseq, loop->uevent_seqnum_not_before, loop->timestamp_not_before, DISSECT_IMAGE_READ_ONLY, &dissected);
|
r = dissect_image(loop->fd, NULL, NULL, loop->diskseq, loop->uevent_seqnum_not_before, loop->timestamp_not_before, DISSECT_IMAGE_READ_ONLY, &dissected);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
log_error_errno(r, "Failed dissect loopback device %s: %m", loop->node);
|
log_error_errno(r, "Failed dissect loopback device %s: %m", loop->node);
|
||||||
@ -85,6 +88,10 @@ static void* thread_func(void *ptr) {
|
|||||||
log_notice_errno(r, "Mounted %s → %s: %m", loop->node, mounted);
|
log_notice_errno(r, "Mounted %s → %s: %m", loop->node, mounted);
|
||||||
assert_se(r >= 0);
|
assert_se(r >= 0);
|
||||||
|
|
||||||
|
/* Now the block device is mounted, we don't need no manual lock anymore, the devices are now
|
||||||
|
* pinned by the mounts. */
|
||||||
|
assert_se(loop_device_flock(loop, LOCK_UN) >= 0);
|
||||||
|
|
||||||
log_notice("Unmounting %s", mounted);
|
log_notice("Unmounting %s", mounted);
|
||||||
mounted = umount_and_rmdir_and_free(mounted);
|
mounted = umount_and_rmdir_and_free(mounted);
|
||||||
|
|
||||||
@ -215,6 +222,11 @@ static int run(int argc, char *argv[]) {
|
|||||||
pthread_t threads[arg_n_threads];
|
pthread_t threads[arg_n_threads];
|
||||||
sd_id128_t id;
|
sd_id128_t id;
|
||||||
|
|
||||||
|
/* Take an explicit lock while we format the file systems, in accordance with
|
||||||
|
* https://systemd.io/BLOCK_DEVICE_LOCKING/. We don't want udev to interfere and probe while we write
|
||||||
|
* or even issue BLKRRPART or similar while we are working on this. */
|
||||||
|
assert_se(loop_device_flock(loop, LOCK_EX) >= 0);
|
||||||
|
|
||||||
assert_se(dissect_image(loop->fd, NULL, NULL, loop->diskseq, loop->uevent_seqnum_not_before, loop->timestamp_not_before, 0, &dissected) >= 0);
|
assert_se(dissect_image(loop->fd, NULL, NULL, loop->diskseq, loop->uevent_seqnum_not_before, loop->timestamp_not_before, 0, &dissected) >= 0);
|
||||||
|
|
||||||
assert_se(dissected->partitions[PARTITION_ESP].found);
|
assert_se(dissected->partitions[PARTITION_ESP].found);
|
||||||
@ -243,9 +255,21 @@ static int run(int argc, char *argv[]) {
|
|||||||
|
|
||||||
assert_se(mkdtemp_malloc(NULL, &mounted) >= 0);
|
assert_se(mkdtemp_malloc(NULL, &mounted) >= 0);
|
||||||
|
|
||||||
|
/* We are particularly correct here, and now downgrade LOCK → LOCK_SH. That's because we are done
|
||||||
|
* with formatting the file systems, so we don't need the exclusive lock anymore. From now on a
|
||||||
|
* shared one is fine. This way udev can now probe the device if it wants, but still won't call
|
||||||
|
* BLKRRPART on it, and that's good, because that would destroy our partition table while we are at
|
||||||
|
* it. */
|
||||||
|
assert_se(loop_device_flock(loop, LOCK_SH) >= 0);
|
||||||
|
|
||||||
/* This first (writable) mount will initialize the mount point dirs, so that the subsequent read-only ones can work */
|
/* This first (writable) mount will initialize the mount point dirs, so that the subsequent read-only ones can work */
|
||||||
assert_se(dissected_image_mount(dissected, mounted, UID_INVALID, UID_INVALID, 0) >= 0);
|
assert_se(dissected_image_mount(dissected, mounted, UID_INVALID, UID_INVALID, 0) >= 0);
|
||||||
|
|
||||||
|
/* Now we mounted everything, the partitions are pinned. Now it's fine to release the lock
|
||||||
|
* fully. This means udev could now issue BLKRRPART again, but that's OK given this will fail because
|
||||||
|
* we now mounted the device. */
|
||||||
|
assert_se(loop_device_flock(loop, LOCK_UN) >= 0);
|
||||||
|
|
||||||
assert_se(umount_recursive(mounted, 0) >= 0);
|
assert_se(umount_recursive(mounted, 0) >= 0);
|
||||||
loop = loop_device_unref(loop);
|
loop = loop_device_unref(loop);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user