mirror of
https://github.com/systemd/systemd-stable.git
synced 2024-12-23 17:34:00 +03:00
Merge pull request #22992 from poettering/loop-dissect-tweaks
loop-util/image dissect fixes
This commit is contained in:
commit
29d902f03a
6
TODO
6
TODO
@ -169,12 +169,6 @@ Features:
|
||||
|
||||
* bootctl: show whether UEFI audit mode is available
|
||||
|
||||
* dissect: rework how we access partitions: instead of letting the kernel probe
|
||||
partition tables asynchronously, just pass the stuff we parsed in userspace
|
||||
to the kernel via BLKPG_ADD_PARTITION. Benefit: we don't have to wait for
|
||||
kernel/netlink/udev, but can run this synchronously without chance of losing
|
||||
events or similar.
|
||||
|
||||
* sd-event: optionally, if per-event source rate limit is hit, downgrade
|
||||
priority, but leave enabled, and once ratelimit window is over, upgrade
|
||||
priority again. That way we can combat event source starvation without
|
||||
|
@ -2055,6 +2055,12 @@ int setup_namespace(
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to create loop device for root image: %m");
|
||||
|
||||
/* Make sure udevd won't issue BLKRRPART (which might flush out the loaded partition table)
|
||||
* while we are still trying to mount things */
|
||||
r = loop_device_flock(loop_device, LOCK_SH);
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to lock loopback device with LOCK_SH: %m");
|
||||
|
||||
r = dissect_image(
|
||||
loop_device->fd,
|
||||
&verity,
|
||||
@ -2403,6 +2409,14 @@ int setup_namespace(
|
||||
goto finish;
|
||||
}
|
||||
|
||||
/* Now release the block device lock, so that udevd is free to call BLKRRPART on the device
|
||||
* if it likes. */
|
||||
r = loop_device_flock(loop_device, LOCK_UN);
|
||||
if (r < 0) {
|
||||
log_debug_errno(r, "Failed to release lock on loopback block device: %m");
|
||||
goto finish;
|
||||
}
|
||||
|
||||
if (decrypted_image) {
|
||||
r = decrypted_image_relinquish(decrypted_image);
|
||||
if (r < 0) {
|
||||
|
@ -639,6 +639,10 @@ static int action_mount(DissectedImage *m, LoopDevice *d) {
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = loop_device_flock(d, LOCK_UN);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to unlock loopback block device: %m");
|
||||
|
||||
if (di) {
|
||||
r = decrypted_image_relinquish(di);
|
||||
if (r < 0)
|
||||
@ -687,6 +691,10 @@ static int action_copy(DissectedImage *m, LoopDevice *d) {
|
||||
|
||||
mounted_dir = TAKE_PTR(created_dir);
|
||||
|
||||
r = loop_device_flock(d, LOCK_UN);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to unlock loopback block device: %m");
|
||||
|
||||
if (di) {
|
||||
r = decrypted_image_relinquish(di);
|
||||
if (r < 0)
|
||||
@ -845,6 +853,12 @@ static int run(int argc, char *argv[]) {
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to set up loopback device for %s: %m", arg_image);
|
||||
|
||||
/* Make sure udevd doesn't issue BLKRRPART underneath us thus making devices disappear in the middle,
|
||||
* that we assume already are there. */
|
||||
r = loop_device_flock(d, LOCK_SH);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to lock loopback device: %m");
|
||||
|
||||
r = dissect_image_and_warn(
|
||||
d->fd,
|
||||
arg_image,
|
||||
|
@ -1,6 +1,7 @@
|
||||
/* SPDX-License-Identifier: LGPL-2.1-or-later */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <sys/file.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "sd-device.h"
|
||||
@ -696,6 +697,12 @@ static int enumerate_partitions(dev_t devnum) {
|
||||
if (r <= 0)
|
||||
return r;
|
||||
|
||||
/* Let's take a LOCK_SH lock on the block device, in case udevd is already running. If we don't take
|
||||
* the lock, udevd might end up issuing BLKRRPART in the middle, and we don't want that, since that
|
||||
* might remove all partitions while we are operating on them. */
|
||||
if (flock(fd, LOCK_SH) < 0)
|
||||
return log_error_errno(errno, "Failed to lock root block device: %m");
|
||||
|
||||
r = dissect_image(
|
||||
fd,
|
||||
NULL, NULL,
|
||||
@ -703,7 +710,6 @@ static int enumerate_partitions(dev_t devnum) {
|
||||
UINT64_MAX,
|
||||
USEC_INFINITY,
|
||||
DISSECT_IMAGE_GPT_ONLY|
|
||||
DISSECT_IMAGE_NO_UDEV|
|
||||
DISSECT_IMAGE_USR_NO_ROOT,
|
||||
&m);
|
||||
if (r == -ENOPKG) {
|
||||
|
@ -5737,6 +5737,13 @@ static int run(int argc, char *argv[]) {
|
||||
goto finish;
|
||||
}
|
||||
|
||||
/* Take a LOCK_SH lock on the device, so that udevd doesn't issue BLKRRPART in our back */
|
||||
r = loop_device_flock(loop, LOCK_SH);
|
||||
if (r < 0) {
|
||||
log_error_errno(r, "Failed to take lock on loopback block device: %m");
|
||||
goto finish;
|
||||
}
|
||||
|
||||
r = dissect_image_and_warn(
|
||||
loop->fd,
|
||||
arg_image,
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "sd-device.h"
|
||||
#include "sd-id128.h"
|
||||
|
||||
#include "alloc-util.h"
|
||||
@ -3643,14 +3644,13 @@ static int resolve_copy_blocks_auto_candidate(
|
||||
sd_id128_t *ret_uuid) {
|
||||
|
||||
_cleanup_(blkid_free_probep) blkid_probe b = NULL;
|
||||
_cleanup_free_ char *p = NULL;
|
||||
_cleanup_(sd_device_unrefp) sd_device *dev = NULL;
|
||||
_cleanup_close_ int fd = -1;
|
||||
const char *pttype, *t;
|
||||
const char *pttype, *t, *p;
|
||||
sd_id128_t pt_parsed, u;
|
||||
blkid_partition pp;
|
||||
dev_t whole_devno;
|
||||
blkid_partlist pl;
|
||||
struct stat st;
|
||||
int r;
|
||||
|
||||
/* Checks if the specified partition has the specified GPT type UUID, and is located on the specified
|
||||
@ -3673,21 +3673,19 @@ static int resolve_copy_blocks_auto_candidate(
|
||||
major(partition_devno), minor(partition_devno),
|
||||
major(restrict_devno), minor(restrict_devno));
|
||||
|
||||
r = device_path_make_major_minor(S_IFBLK, whole_devno, &p);
|
||||
r = sd_device_new_from_devnum(&dev, 'b', whole_devno);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to convert block device to device node path: %m");
|
||||
return log_error_errno(r, "Failed to create sd-device for block device %u:%u: %m",
|
||||
major(whole_devno), minor(whole_devno));
|
||||
|
||||
fd = open(p, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
|
||||
r = sd_device_get_devname(dev, &p);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to get name of block device %u:%u: %m",
|
||||
major(whole_devno), minor(whole_devno));
|
||||
|
||||
fd = sd_device_open(dev, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
|
||||
if (fd < 0)
|
||||
return log_error_errno(r, "Failed to open '%s': %m", p);
|
||||
|
||||
if (fstat(fd, &st) < 0)
|
||||
return log_error_errno(r, "Failed to stat '%s': %m", p);
|
||||
|
||||
if (!S_ISBLK(st.st_mode) || st.st_rdev != whole_devno)
|
||||
return log_error_errno(
|
||||
SYNTHETIC_ERRNO(EPERM),
|
||||
"Opened and determined block device don't match, refusing.");
|
||||
return log_error_errno(fd, "Failed to open block device %s: %m", p);
|
||||
|
||||
b = blkid_new_probe();
|
||||
if (!b)
|
||||
@ -3805,7 +3803,7 @@ static int resolve_copy_blocks_auto(
|
||||
sd_id128_t type_uuid,
|
||||
const char *root,
|
||||
dev_t restrict_devno,
|
||||
char **ret_path,
|
||||
dev_t *ret_devno,
|
||||
sd_id128_t *ret_uuid) {
|
||||
|
||||
const char *try1 = NULL, *try2 = NULL;
|
||||
@ -3815,8 +3813,6 @@ static int resolve_copy_blocks_auto(
|
||||
dev_t devno, found = 0;
|
||||
int r;
|
||||
|
||||
assert(ret_path);
|
||||
|
||||
/* Enforce some security restrictions: CopyBlocks=auto should not be an avenue to get outside of the
|
||||
* --root=/--image= confinement. Specifically, refuse CopyBlocks= in combination with --root= at all,
|
||||
* and restrict block device references in the --image= case to loopback block device we set up.
|
||||
@ -3926,9 +3922,8 @@ static int resolve_copy_blocks_auto(
|
||||
return log_error_errno(SYNTHETIC_ERRNO(ENXIO),
|
||||
"Unable to automatically discover suitable partition to copy blocks from.");
|
||||
|
||||
r = device_path_make_major_minor(S_IFBLK, found, ret_path);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to convert dev_t to device node path: %m");
|
||||
if (ret_devno)
|
||||
*ret_devno = found;
|
||||
|
||||
if (ret_uuid)
|
||||
*ret_uuid = found_uuid;
|
||||
@ -3972,32 +3967,43 @@ static int context_open_copy_block_paths(
|
||||
"Copying from block device node is not permitted in --image=/--root= mode, refusing.");
|
||||
|
||||
} else if (p->copy_blocks_auto) {
|
||||
_cleanup_(sd_device_unrefp) sd_device *dev = NULL;
|
||||
const char *devname;
|
||||
dev_t devno;
|
||||
|
||||
r = resolve_copy_blocks_auto(p->type_uuid, root, restrict_devno, &opened, &uuid);
|
||||
r = resolve_copy_blocks_auto(p->type_uuid, root, restrict_devno, &devno, &uuid);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
source_fd = open(opened, O_RDONLY|O_CLOEXEC|O_NOCTTY);
|
||||
r = sd_device_new_from_devnum(&dev, 'b', devno);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to create sd-device object for device %u:%u: %m", major(devno), minor(devno));
|
||||
|
||||
r = sd_device_get_devname(dev, &devname);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to get device name of %u:%u: %m", major(devno), minor(devno));
|
||||
|
||||
opened = strdup(devname);
|
||||
if (!opened)
|
||||
return log_oom();
|
||||
|
||||
source_fd = sd_device_open(dev, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
|
||||
if (source_fd < 0)
|
||||
return log_error_errno(errno, "Failed to open automatically determined source block copy device '%s': %m", opened);
|
||||
return log_error_errno(source_fd, "Failed to open automatically determined source block copy device '%s': %m", opened);
|
||||
|
||||
if (fstat(source_fd, &st) < 0)
|
||||
return log_error_errno(errno, "Failed to stat block copy file '%s': %m", opened);
|
||||
|
||||
/* If we found it automatically, it must be a block device, let's enforce that */
|
||||
if (!S_ISBLK(st.st_mode))
|
||||
return log_error_errno(SYNTHETIC_ERRNO(EBADF),
|
||||
"Automatically detected source block copy device '%s' is not a block device, refusing: %m", opened);
|
||||
} else
|
||||
} else
|
||||
continue;
|
||||
|
||||
if (S_ISDIR(st.st_mode)) {
|
||||
_cleanup_free_ char *bdev = NULL;
|
||||
_cleanup_(sd_device_unrefp) sd_device *dev = NULL;
|
||||
const char *bdev;
|
||||
|
||||
/* If the file is a directory, automatically find the backing block device */
|
||||
|
||||
if (major(st.st_dev) != 0)
|
||||
r = device_path_make_major_minor(S_IFBLK, st.st_dev, &bdev);
|
||||
r = sd_device_new_from_devnum(&dev, 'b', st.st_dev);
|
||||
else {
|
||||
dev_t devt;
|
||||
|
||||
@ -4009,22 +4015,23 @@ static int context_open_copy_block_paths(
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Unable to determine backing block device of '%s': %m", opened);
|
||||
|
||||
r = device_path_make_major_minor(S_IFBLK, devt, &bdev);
|
||||
r = sd_device_new_from_devnum(&dev, 'b', devt);
|
||||
}
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to determine block device path for block device backing '%s': %m", opened);
|
||||
return log_error_errno(r, "Failed to create sd-device object for block device backing '%s': %m", opened);
|
||||
|
||||
r = sd_device_get_devpath(dev, &bdev);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to get device name for block device backing '%s': %m", opened);
|
||||
|
||||
safe_close(source_fd);
|
||||
|
||||
source_fd = open(bdev, O_RDONLY|O_CLOEXEC|O_NOCTTY);
|
||||
source_fd = sd_device_open(dev, O_RDONLY|O_CLOEXEC|O_NONBLOCK);
|
||||
if (source_fd < 0)
|
||||
return log_error_errno(errno, "Failed to open block device '%s': %m", bdev);
|
||||
return log_error_errno(source_fd, "Failed to open block device '%s': %m", bdev);
|
||||
|
||||
if (fstat(source_fd, &st) < 0)
|
||||
return log_error_errno(errno, "Failed to stat block device '%s': %m", bdev);
|
||||
|
||||
if (!S_ISBLK(st.st_mode))
|
||||
return log_error_errno(SYNTHETIC_ERRNO(ENOTBLK), "Block device '%s' is not actually a block device, refusing.", bdev);
|
||||
}
|
||||
|
||||
if (S_ISREG(st.st_mode))
|
||||
|
@ -359,6 +359,10 @@ static int portable_extract_by_path(
|
||||
/* We now have a loopback block device, let's fork off a child in its own mount namespace, mount it
|
||||
* there, and extract the metadata we need. The metadata is sent from the child back to us. */
|
||||
|
||||
r = loop_device_flock(d, LOCK_SH);
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to acquire lock on loopback block device: %m");
|
||||
|
||||
BLOCK_SIGNALS(SIGCHLD);
|
||||
|
||||
r = mkdtemp_malloc("/tmp/inspect-XXXXXX", &tmpdir);
|
||||
|
@ -1196,6 +1196,12 @@ int image_read_metadata(Image *i) {
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
/* Make sure udevd doesn't issue BLKRRPART in the background which might make our partitions
|
||||
* disappear temporarily. */
|
||||
r = loop_device_flock(d, LOCK_SH);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = dissect_image(
|
||||
d->fd,
|
||||
NULL, NULL,
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <valgrind/memcheck.h>
|
||||
#endif
|
||||
|
||||
#include <linux/blkpg.h>
|
||||
#include <linux/dm-ioctl.h>
|
||||
#include <linux/loop.h>
|
||||
#include <sys/mount.h>
|
||||
@ -125,389 +126,6 @@ not_found:
|
||||
}
|
||||
|
||||
#if HAVE_BLKID
|
||||
static int enumerator_for_parent(sd_device *d, sd_device_enumerator **ret) {
|
||||
_cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
|
||||
int r;
|
||||
|
||||
assert(d);
|
||||
assert(ret);
|
||||
|
||||
r = sd_device_enumerator_new(&e);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = sd_device_enumerator_add_match_subsystem(e, "block", true);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = sd_device_enumerator_add_match_parent(e, d);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = sd_device_enumerator_add_match_sysattr(e, "partition", NULL, true);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
*ret = TAKE_PTR(e);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int device_is_partition(
|
||||
sd_device *d,
|
||||
sd_device *expected_parent,
|
||||
blkid_partition pp) {
|
||||
|
||||
const char *v, *parent_syspath, *expected_parent_syspath;
|
||||
blkid_loff_t bsize, bstart;
|
||||
uint64_t size, start;
|
||||
int partno, bpartno, r;
|
||||
sd_device *parent;
|
||||
|
||||
assert(d);
|
||||
assert(expected_parent);
|
||||
assert(pp);
|
||||
|
||||
r = sd_device_get_subsystem(d, &v);
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (!streq(v, "block"))
|
||||
return false;
|
||||
|
||||
if (sd_device_get_devtype(d, &v) < 0 || !streq(v, "partition"))
|
||||
return false;
|
||||
|
||||
r = sd_device_get_parent(d, &parent);
|
||||
if (r < 0)
|
||||
return false; /* Doesn't have a parent? No relevant to us */
|
||||
|
||||
r = sd_device_get_syspath(parent, &parent_syspath); /* Check parent of device of this action */
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = sd_device_get_syspath(expected_parent, &expected_parent_syspath); /* Check parent of device we are looking for */
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (!path_equal(parent_syspath, expected_parent_syspath))
|
||||
return false; /* Has a different parent than what we need, not interesting to us */
|
||||
|
||||
/* On kernel uevents we may find the partition number in the PARTN= field. Let's use that preferably,
|
||||
* since it's cheaper and more importantly: the sysfs attribute "partition" appears to become
|
||||
* available late, hence let's use the property instead, which is available at the moment we see the
|
||||
* uevent. */
|
||||
r = sd_device_get_property_value(d, "PARTN", &v);
|
||||
if (r == -ENOENT)
|
||||
r = sd_device_get_sysattr_value(d, "partition", &v);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = safe_atoi(v, &partno);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
errno = 0;
|
||||
bpartno = blkid_partition_get_partno(pp);
|
||||
if (bpartno < 0)
|
||||
return errno_or_else(EIO);
|
||||
|
||||
if (partno != bpartno)
|
||||
return false;
|
||||
|
||||
r = sd_device_get_sysattr_value(d, "start", &v);
|
||||
if (r < 0)
|
||||
return r;
|
||||
r = safe_atou64(v, &start);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
errno = 0;
|
||||
bstart = blkid_partition_get_start(pp);
|
||||
if (bstart < 0)
|
||||
return errno_or_else(EIO);
|
||||
|
||||
if (start != (uint64_t) bstart)
|
||||
return false;
|
||||
|
||||
r = sd_device_get_sysattr_value(d, "size", &v);
|
||||
if (r < 0)
|
||||
return r;
|
||||
r = safe_atou64(v, &size);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
errno = 0;
|
||||
bsize = blkid_partition_get_size(pp);
|
||||
if (bsize < 0)
|
||||
return errno_or_else(EIO);
|
||||
|
||||
if (size != (uint64_t) bsize)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int find_partition(
|
||||
sd_device *parent,
|
||||
blkid_partition pp,
|
||||
usec_t timestamp_not_before,
|
||||
DissectImageFlags flags,
|
||||
sd_device **ret) {
|
||||
|
||||
_cleanup_(sd_device_enumerator_unrefp) sd_device_enumerator *e = NULL;
|
||||
sd_device *q;
|
||||
int r;
|
||||
|
||||
assert(parent);
|
||||
assert(pp);
|
||||
assert(ret);
|
||||
|
||||
r = enumerator_for_parent(parent, &e);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
FOREACH_DEVICE(e, q) {
|
||||
uint64_t usec;
|
||||
|
||||
if (!FLAGS_SET(flags, DISSECT_IMAGE_NO_UDEV)) {
|
||||
r = sd_device_get_usec_initialized(q, &usec);
|
||||
if (r == -EBUSY) /* Not initialized yet */
|
||||
continue;
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (timestamp_not_before != USEC_INFINITY &&
|
||||
usec < timestamp_not_before) /* udev database entry older than our attachment? Then it's not ours */
|
||||
continue;
|
||||
}
|
||||
|
||||
r = device_is_partition(q, parent, pp);
|
||||
if (r < 0)
|
||||
return r;
|
||||
if (r > 0) {
|
||||
*ret = sd_device_ref(q);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
struct wait_data {
|
||||
sd_device *parent_device;
|
||||
blkid_partition blkidp;
|
||||
sd_device *found;
|
||||
uint64_t diskseq;
|
||||
uint64_t uevent_seqnum_not_before;
|
||||
usec_t timestamp_not_before;
|
||||
DissectImageFlags flags;
|
||||
};
|
||||
|
||||
static inline void wait_data_done(struct wait_data *d) {
|
||||
sd_device_unref(d->found);
|
||||
}
|
||||
|
||||
static int device_monitor_handler(sd_device_monitor *monitor, sd_device *device, void *userdata) {
|
||||
struct wait_data *w = userdata;
|
||||
int r;
|
||||
|
||||
assert(w);
|
||||
|
||||
if (device_for_action(device, SD_DEVICE_REMOVE))
|
||||
return 0;
|
||||
|
||||
if (w->diskseq != 0) {
|
||||
uint64_t diskseq;
|
||||
|
||||
/* If w->diskseq is non-zero, then we must have a disk seqnum */
|
||||
r = sd_device_get_diskseq(device, &diskseq);
|
||||
if (r < 0) {
|
||||
log_debug_errno(r, "Dropping event because it has no diskseq, but waiting for %" PRIu64, w->diskseq);
|
||||
return 0;
|
||||
}
|
||||
if (diskseq < w->diskseq) {
|
||||
log_debug("Dropping event because diskseq too old (%" PRIu64 " < %" PRIu64 ")",
|
||||
diskseq, w->diskseq);
|
||||
return 0;
|
||||
}
|
||||
if (diskseq > w->diskseq) {
|
||||
r = -EBUSY;
|
||||
goto finish; /* Newer than what we were expecting, so we missed it, stop waiting */
|
||||
}
|
||||
} else if (w->uevent_seqnum_not_before != UINT64_MAX) {
|
||||
uint64_t seqnum;
|
||||
|
||||
r = sd_device_get_seqnum(device, &seqnum);
|
||||
if (r < 0)
|
||||
goto finish;
|
||||
|
||||
if (seqnum <= w->uevent_seqnum_not_before) { /* From an older use of this loop device */
|
||||
log_debug("Dropping event because seqnum too old (%" PRIu64 " <= %" PRIu64 ")",
|
||||
seqnum, w->uevent_seqnum_not_before);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
r = device_is_partition(device, w->parent_device, w->blkidp);
|
||||
if (r < 0)
|
||||
goto finish;
|
||||
if (r == 0) /* Not the one we need */
|
||||
return 0;
|
||||
|
||||
/* It's the one we need! Yay! */
|
||||
assert(!w->found);
|
||||
w->found = sd_device_ref(device);
|
||||
r = 0;
|
||||
|
||||
finish:
|
||||
return sd_event_exit(sd_device_monitor_get_event(monitor), r);
|
||||
}
|
||||
|
||||
static int timeout_handler(sd_event_source *s, uint64_t usec, void *userdata) {
|
||||
struct wait_data *w = userdata;
|
||||
int r;
|
||||
|
||||
assert(w);
|
||||
|
||||
/* Why partition not appeared within the timeout? We may lost some uevent, as some properties
|
||||
* were not ready when we received uevent... Not sure, but anyway, let's try to find the
|
||||
* partition again before give up. */
|
||||
|
||||
r = find_partition(w->parent_device, w->blkidp, w->timestamp_not_before, w->flags, &w->found);
|
||||
if (r == -ENXIO)
|
||||
return log_debug_errno(SYNTHETIC_ERRNO(ETIMEDOUT),
|
||||
"Partition still not appeared after timeout reached.");
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to find partition: %m");
|
||||
|
||||
log_debug("Partition appeared after timeout reached.");
|
||||
return sd_event_exit(sd_event_source_get_event(s), 0);
|
||||
}
|
||||
|
||||
static int retry_handler(sd_event_source *s, uint64_t usec, void *userdata) {
|
||||
struct wait_data *w = userdata;
|
||||
int r;
|
||||
|
||||
assert(w);
|
||||
|
||||
r = find_partition(w->parent_device, w->blkidp, w->timestamp_not_before, w->flags, &w->found);
|
||||
if (r != -ENXIO) {
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to find partition: %m");
|
||||
|
||||
log_debug("Partition found by a periodic search.");
|
||||
return sd_event_exit(sd_event_source_get_event(s), 0);
|
||||
}
|
||||
|
||||
r = sd_event_source_set_time_relative(s, 500 * USEC_PER_MSEC);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
return sd_event_source_set_enabled(s, SD_EVENT_ONESHOT);
|
||||
}
|
||||
|
||||
static int wait_for_partition_device(
|
||||
sd_device *parent,
|
||||
blkid_partition pp,
|
||||
usec_t deadline,
|
||||
uint64_t diskseq,
|
||||
uint64_t uevent_seqnum_not_before,
|
||||
usec_t timestamp_not_before,
|
||||
DissectImageFlags flags,
|
||||
sd_device **ret) {
|
||||
|
||||
_cleanup_(sd_event_source_unrefp) sd_event_source *timeout_source = NULL, *retry_source = NULL;
|
||||
_cleanup_(sd_device_monitor_unrefp) sd_device_monitor *monitor = NULL;
|
||||
_cleanup_(sd_event_unrefp) sd_event *event = NULL;
|
||||
int r;
|
||||
|
||||
assert(parent);
|
||||
assert(pp);
|
||||
assert(ret);
|
||||
|
||||
r = find_partition(parent, pp, timestamp_not_before, flags, ret);
|
||||
if (r != -ENXIO)
|
||||
return r;
|
||||
|
||||
r = sd_event_new(&event);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = sd_device_monitor_new(&monitor);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = sd_device_monitor_filter_add_match_subsystem_devtype(monitor, "block", "partition");
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = sd_device_monitor_filter_add_match_parent(monitor, parent, true);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = sd_device_monitor_filter_add_match_sysattr(monitor, "partition", NULL, true);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = sd_device_monitor_attach_event(monitor, event);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
_cleanup_(wait_data_done) struct wait_data w = {
|
||||
.parent_device = parent,
|
||||
.blkidp = pp,
|
||||
.diskseq = diskseq,
|
||||
.uevent_seqnum_not_before = uevent_seqnum_not_before,
|
||||
.timestamp_not_before = timestamp_not_before,
|
||||
.flags = flags,
|
||||
};
|
||||
|
||||
r = sd_device_monitor_start(monitor, device_monitor_handler, &w);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
/* Check again, the partition might have appeared in the meantime */
|
||||
r = find_partition(parent, pp, timestamp_not_before, flags, ret);
|
||||
if (r != -ENXIO)
|
||||
return r;
|
||||
|
||||
if (deadline != USEC_INFINITY) {
|
||||
r = sd_event_add_time(
|
||||
event, &timeout_source,
|
||||
CLOCK_MONOTONIC, deadline, 0,
|
||||
timeout_handler, &w);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = sd_event_source_set_exit_on_failure(timeout_source, true);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* If we don't have a disk sequence number then we cannot do exact matching,
|
||||
* and we cannot know if we missed it or if it has not been sent yet, so set
|
||||
* up additional retries to increase the chances of receiving the event. */
|
||||
if (diskseq == 0) {
|
||||
r = sd_event_add_time_relative(
|
||||
event, &retry_source,
|
||||
CLOCK_MONOTONIC, 500 * USEC_PER_MSEC, 0,
|
||||
retry_handler, &w);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = sd_event_source_set_exit_on_failure(retry_source, true);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
||||
r = sd_event_loop(event);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
assert(w.found);
|
||||
*ret = TAKE_PTR(w.found);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void check_partition_flags(
|
||||
const char *node,
|
||||
unsigned long long pflags,
|
||||
@ -530,77 +148,8 @@ static void check_partition_flags(
|
||||
log_debug("Unexpected partition flag %llu set on %s!", bit, node);
|
||||
}
|
||||
}
|
||||
|
||||
static int device_wait_for_initialization_harder(
|
||||
sd_device *device,
|
||||
const char *subsystem,
|
||||
usec_t deadline,
|
||||
sd_device **ret) {
|
||||
|
||||
usec_t start, left, retrigger_timeout;
|
||||
int r;
|
||||
|
||||
start = now(CLOCK_MONOTONIC);
|
||||
left = usec_sub_unsigned(deadline, start);
|
||||
|
||||
if (DEBUG_LOGGING) {
|
||||
const char *sn = NULL;
|
||||
|
||||
(void) sd_device_get_sysname(device, &sn);
|
||||
log_device_debug(device,
|
||||
"Will wait up to %s for '%s' to initialize…", FORMAT_TIMESPAN(left, 0), strna(sn));
|
||||
}
|
||||
|
||||
if (left != USEC_INFINITY)
|
||||
retrigger_timeout = CLAMP(left / 4, 1 * USEC_PER_SEC, 5 * USEC_PER_SEC); /* A fourth of the total timeout, but let's clamp to 1s…5s range */
|
||||
else
|
||||
retrigger_timeout = 2 * USEC_PER_SEC;
|
||||
|
||||
for (;;) {
|
||||
usec_t local_deadline, n;
|
||||
bool last_try;
|
||||
|
||||
n = now(CLOCK_MONOTONIC);
|
||||
assert(n >= start);
|
||||
|
||||
/* Find next deadline, when we'll retrigger */
|
||||
local_deadline = start +
|
||||
DIV_ROUND_UP(n - start, retrigger_timeout) * retrigger_timeout;
|
||||
|
||||
if (deadline != USEC_INFINITY && deadline <= local_deadline) {
|
||||
local_deadline = deadline;
|
||||
last_try = true;
|
||||
} else
|
||||
last_try = false;
|
||||
|
||||
r = device_wait_for_initialization(device, subsystem, local_deadline, ret);
|
||||
if (r >= 0 && DEBUG_LOGGING) {
|
||||
const char *sn = NULL;
|
||||
|
||||
(void) sd_device_get_sysname(device, &sn);
|
||||
log_device_debug(device,
|
||||
"Successfully waited for device '%s' to initialize for %s.",
|
||||
strna(sn),
|
||||
FORMAT_TIMESPAN(usec_sub_unsigned(now(CLOCK_MONOTONIC), start), 0));
|
||||
|
||||
}
|
||||
if (r != -ETIMEDOUT || last_try)
|
||||
return r;
|
||||
|
||||
if (DEBUG_LOGGING)
|
||||
log_device_debug(device,
|
||||
"Device didn't initialize within %s, assuming lost event. Retriggering device.",
|
||||
FORMAT_TIMESPAN(usec_sub_unsigned(now(CLOCK_MONOTONIC), start), 0));
|
||||
|
||||
r = sd_device_trigger(device, SD_DEVICE_CHANGE);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#define DEVICE_TIMEOUT_USEC (45 * USEC_PER_SEC)
|
||||
|
||||
static void dissected_partition_done(DissectedPartition *p) {
|
||||
assert(p);
|
||||
|
||||
@ -617,6 +166,62 @@ static void dissected_partition_done(DissectedPartition *p) {
|
||||
};
|
||||
}
|
||||
|
||||
#if HAVE_BLKID
|
||||
static int ioctl_partition_add(
|
||||
int fd,
|
||||
const char *name,
|
||||
int nr,
|
||||
uint64_t start,
|
||||
uint64_t size) {
|
||||
|
||||
assert(fd >= 0);
|
||||
assert(name);
|
||||
assert(nr > 0);
|
||||
|
||||
struct blkpg_partition bp = {
|
||||
.pno = nr,
|
||||
.start = start,
|
||||
.length = size,
|
||||
};
|
||||
|
||||
struct blkpg_ioctl_arg ba = {
|
||||
.op = BLKPG_ADD_PARTITION,
|
||||
.data = &bp,
|
||||
.datalen = sizeof(bp),
|
||||
};
|
||||
|
||||
if (strlen(name) >= sizeof(bp.devname))
|
||||
return -EINVAL;
|
||||
|
||||
strcpy(bp.devname, name);
|
||||
|
||||
return RET_NERRNO(ioctl(fd, BLKPG, &ba));
|
||||
}
|
||||
|
||||
static int make_partition_devname(
|
||||
const char *whole_devname,
|
||||
int nr,
|
||||
char **ret) {
|
||||
|
||||
bool need_p;
|
||||
|
||||
assert(whole_devname);
|
||||
assert(nr > 0);
|
||||
|
||||
/* Given a whole block device node name (e.g. /dev/sda or /dev/loop7) generate a partition device
|
||||
* name (e.g. /dev/sda7 or /dev/loop7p5). The rule the kernel uses is simple: if whole block device
|
||||
* node name ends in a digit, then suffix a 'p', followed by the partition number. Otherwise, just
|
||||
* suffix the partition number without any 'p'. */
|
||||
|
||||
if (isempty(whole_devname)) /* Make sure there *is* a last char */
|
||||
return -EINVAL;
|
||||
|
||||
need_p = strchr(DIGITS, whole_devname[strlen(whole_devname)-1]); /* Last char a digit? */
|
||||
|
||||
return asprintf(ret, "%s%s%i", whole_devname, need_p ? "p" : "", nr);
|
||||
}
|
||||
#endif
|
||||
|
||||
int dissect_image(
|
||||
int fd,
|
||||
const VeritySettings *verity,
|
||||
@ -638,11 +243,10 @@ int dissect_image(
|
||||
_cleanup_(blkid_free_probep) blkid_probe b = NULL;
|
||||
_cleanup_free_ char *generic_node = NULL;
|
||||
sd_id128_t generic_uuid = SD_ID128_NULL;
|
||||
const char *pttype = NULL, *sysname = NULL;
|
||||
const char *pttype = NULL, *sysname = NULL, *devname = NULL;
|
||||
blkid_partlist pl;
|
||||
int r, generic_nr = -1, n_partitions;
|
||||
struct stat st;
|
||||
usec_t deadline;
|
||||
|
||||
assert(fd >= 0);
|
||||
assert(ret);
|
||||
@ -698,23 +302,6 @@ int dissect_image(
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (!FLAGS_SET(flags, DISSECT_IMAGE_NO_UDEV)) {
|
||||
_cleanup_(sd_device_unrefp) sd_device *initialized = NULL;
|
||||
|
||||
/* If udev support is enabled, then let's wait for the device to be initialized before we doing anything. */
|
||||
|
||||
r = device_wait_for_initialization_harder(
|
||||
d,
|
||||
"block",
|
||||
usec_add(now(CLOCK_MONOTONIC), DEVICE_TIMEOUT_USEC),
|
||||
&initialized);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
sd_device_unref(d);
|
||||
d = TAKE_PTR(initialized);
|
||||
}
|
||||
|
||||
b = blkid_new_probe();
|
||||
if (!b)
|
||||
return -ENOMEM;
|
||||
@ -770,6 +357,9 @@ int dissect_image(
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
r = sd_device_get_devname(d, &devname);
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to get device devname: %m");
|
||||
|
||||
if (!image_name_is_valid(m->image_name)) {
|
||||
log_debug("Image name %s is not valid, ignoring", strempty(m->image_name));
|
||||
@ -785,8 +375,8 @@ int dissect_image(
|
||||
|
||||
(void) blkid_probe_lookup_value(b, "USAGE", &usage, NULL);
|
||||
if (STRPTR_IN_SET(usage, "filesystem", "crypto")) {
|
||||
const char *fstype = NULL, *options = NULL, *devname = NULL;
|
||||
_cleanup_free_ char *t = NULL, *n = NULL, *o = NULL;
|
||||
const char *fstype = NULL, *options = NULL;
|
||||
|
||||
/* OK, we have found a file system, that's our root partition then. */
|
||||
(void) blkid_probe_lookup_value(b, "TYPE", &fstype, NULL);
|
||||
@ -797,10 +387,6 @@ int dissect_image(
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
r = sd_device_get_devname(d, &devname);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
n = strdup(devname);
|
||||
if (!n)
|
||||
return -ENOMEM;
|
||||
@ -873,13 +459,11 @@ int dissect_image(
|
||||
if (n_partitions < 0)
|
||||
return errno_or_else(EIO);
|
||||
|
||||
deadline = usec_add(now(CLOCK_MONOTONIC), DEVICE_TIMEOUT_USEC);
|
||||
for (int i = 0; i < n_partitions; i++) {
|
||||
_cleanup_(sd_device_unrefp) sd_device *q = NULL;
|
||||
_cleanup_free_ char *node = NULL;
|
||||
unsigned long long pflags;
|
||||
blkid_loff_t start, size;
|
||||
blkid_partition pp;
|
||||
const char *node;
|
||||
int nr;
|
||||
|
||||
errno = 0;
|
||||
@ -887,14 +471,6 @@ int dissect_image(
|
||||
if (!pp)
|
||||
return errno_or_else(EIO);
|
||||
|
||||
r = wait_for_partition_device(d, pp, deadline, diskseq, uevent_seqnum_not_before, timestamp_not_before, flags, &q);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = sd_device_get_devname(q, &node);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
pflags = blkid_partition_get_flags(pp);
|
||||
|
||||
errno = 0;
|
||||
@ -916,6 +492,31 @@ int dissect_image(
|
||||
|
||||
assert((uint64_t) size < UINT64_MAX/512);
|
||||
|
||||
r = make_partition_devname(devname, nr, &node);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
/* So here's the thing: after the main ("whole") block device popped up it might take a while
|
||||
* before the kernel fully probed the partition table. Waiting for that to finish is icky in
|
||||
* userspace. So here's what we do instead. We issue the BLKPG_ADD_PARTITION ioctl to add the
|
||||
* partition ourselves, racing against the kernel. Good thing is: if this call fails with
|
||||
* EBUSY then the kernel was quicker than us, and that's totally OK, the outcome is good for
|
||||
* us: the device node will exist. If OTOH our call was successful we won the race. Which is
|
||||
* also good as the outcome is the same: the partition block device exists, and we can use
|
||||
* it.
|
||||
*
|
||||
* Kernel returns EBUSY if there's already a partition by that number or an overlapping
|
||||
* partition already existent. */
|
||||
|
||||
r = ioctl_partition_add(fd, node, nr, (uint64_t) start * 512, (uint64_t) size * 512);
|
||||
if (r < 0) {
|
||||
if (r != -EBUSY)
|
||||
return log_debug_errno(r, "BLKPG_ADD_PARTITION failed: %m");
|
||||
|
||||
log_debug_errno(r, "Kernel was quicker than us in adding partition %i.", nr);
|
||||
} else
|
||||
log_debug("We were quicker than kernel in adding partition %i.", nr);
|
||||
|
||||
if (is_gpt) {
|
||||
PartitionDesignator designator = _PARTITION_DESIGNATOR_INVALID;
|
||||
Architecture architecture = _ARCHITECTURE_INVALID;
|
||||
@ -1447,7 +1048,7 @@ int dissect_image(
|
||||
(flags & DISSECT_IMAGE_GENERIC_ROOT) &&
|
||||
(!verity || !verity->root_hash || verity->designator != PARTITION_USR)) {
|
||||
|
||||
/* OK, we found nothing usable, then check if there's a single generic one distro, and use
|
||||
/* OK, we found nothing usable, then check if there's a single generic partition, and use
|
||||
* that. If the root hash was set however, then we won't fall back to a generic node, because
|
||||
* the root hash decides. */
|
||||
|
||||
@ -3379,6 +2980,11 @@ int mount_image_privately_interactively(
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to set up loopback device for %s: %m", image);
|
||||
|
||||
/* Make sure udevd doesn't issue BLKRRPART behind our backs */
|
||||
r = loop_device_flock(d, LOCK_SH);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = dissect_image_and_warn(d->fd, image, &verity, NULL, d->diskseq, d->uevent_seqnum_not_before, d->timestamp_not_before, flags, &dissected_image);
|
||||
if (r < 0)
|
||||
return r;
|
||||
@ -3405,6 +3011,10 @@ int mount_image_privately_interactively(
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = loop_device_flock(d, LOCK_UN);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
if (decrypted_image) {
|
||||
r = decrypted_image_relinquish(decrypted_image);
|
||||
if (r < 0)
|
||||
@ -3485,6 +3095,10 @@ int verity_dissect_and_mount(
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to create loop device for image: %m");
|
||||
|
||||
r = loop_device_flock(loop_device, LOCK_SH);
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to lock loop device: %m");
|
||||
|
||||
r = dissect_image(
|
||||
loop_device->fd,
|
||||
&verity,
|
||||
@ -3532,6 +3146,10 @@ int verity_dissect_and_mount(
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to mount image: %m");
|
||||
|
||||
r = loop_device_flock(loop_device, LOCK_UN);
|
||||
if (r < 0)
|
||||
return log_debug_errno(r, "Failed to unlock loopback device: %m");
|
||||
|
||||
/* If we got os-release values from the caller, then we need to match them with the image's
|
||||
* extension-release.d/ content. Return -EINVAL if there's any mismatch.
|
||||
* First, check the distro ID. If that matches, then check the new SYSEXT_LEVEL value if
|
||||
|
@ -188,19 +188,18 @@ typedef enum DissectImageFlags {
|
||||
DISSECT_IMAGE_MOUNT_NON_ROOT_ONLY = 1 << 7, /* Mount only the non-root and non-/usr partitions */
|
||||
DISSECT_IMAGE_VALIDATE_OS = 1 << 8, /* Refuse mounting images that aren't identifiable as OS images */
|
||||
DISSECT_IMAGE_VALIDATE_OS_EXT = 1 << 9, /* Refuse mounting images that aren't identifiable as OS extension images */
|
||||
DISSECT_IMAGE_NO_UDEV = 1 << 10, /* Don't wait for udev initializing things */
|
||||
DISSECT_IMAGE_RELAX_VAR_CHECK = 1 << 11, /* Don't insist that the UUID of /var is hashed from /etc/machine-id */
|
||||
DISSECT_IMAGE_FSCK = 1 << 12, /* File system check the partition before mounting (no effect when combined with DISSECT_IMAGE_READ_ONLY) */
|
||||
DISSECT_IMAGE_NO_PARTITION_TABLE = 1 << 13, /* Only recognize single file system images */
|
||||
DISSECT_IMAGE_VERITY_SHARE = 1 << 14, /* When activating a verity device, reuse existing one if already open */
|
||||
DISSECT_IMAGE_MKDIR = 1 << 15, /* Make top-level directory to mount right before mounting, if missing */
|
||||
DISSECT_IMAGE_USR_NO_ROOT = 1 << 16, /* If no root fs is in the image, but /usr is, then allow this (so that we can mount the rootfs as tmpfs or so */
|
||||
DISSECT_IMAGE_REQUIRE_ROOT = 1 << 17, /* Don't accept disks without root partition (or at least /usr partition if DISSECT_IMAGE_USR_NO_ROOT is set) */
|
||||
DISSECT_IMAGE_MOUNT_READ_ONLY = 1 << 18, /* Make mounts read-only */
|
||||
DISSECT_IMAGE_RELAX_VAR_CHECK = 1 << 10, /* Don't insist that the UUID of /var is hashed from /etc/machine-id */
|
||||
DISSECT_IMAGE_FSCK = 1 << 11, /* File system check the partition before mounting (no effect when combined with DISSECT_IMAGE_READ_ONLY) */
|
||||
DISSECT_IMAGE_NO_PARTITION_TABLE = 1 << 12, /* Only recognize single file system images */
|
||||
DISSECT_IMAGE_VERITY_SHARE = 1 << 13, /* When activating a verity device, reuse existing one if already open */
|
||||
DISSECT_IMAGE_MKDIR = 1 << 14, /* Make top-level directory to mount right before mounting, if missing */
|
||||
DISSECT_IMAGE_USR_NO_ROOT = 1 << 15, /* If no root fs is in the image, but /usr is, then allow this (so that we can mount the rootfs as tmpfs or so */
|
||||
DISSECT_IMAGE_REQUIRE_ROOT = 1 << 16, /* Don't accept disks without root partition (or at least /usr partition if DISSECT_IMAGE_USR_NO_ROOT is set) */
|
||||
DISSECT_IMAGE_MOUNT_READ_ONLY = 1 << 17, /* Make mounts read-only */
|
||||
DISSECT_IMAGE_READ_ONLY = DISSECT_IMAGE_DEVICE_READ_ONLY |
|
||||
DISSECT_IMAGE_MOUNT_READ_ONLY,
|
||||
DISSECT_IMAGE_GROWFS = 1 << 19, /* Grow file systems in partitions marked for that to the size of the partitions after mount */
|
||||
DISSECT_IMAGE_MOUNT_IDMAPPED = 1 << 20, /* Mount mounts with kernel 5.12-style userns ID mapping, if file system type doesn't support uid=/gid= */
|
||||
DISSECT_IMAGE_GROWFS = 1 << 18, /* Grow file systems in partitions marked for that to the size of the partitions after mount */
|
||||
DISSECT_IMAGE_MOUNT_IDMAPPED = 1 << 19, /* Mount mounts with kernel 5.12-style userns ID mapping, if file system type doesn't support uid=/gid= */
|
||||
} DissectImageFlags;
|
||||
|
||||
struct DissectedImage {
|
||||
|
@ -533,6 +533,10 @@ static int merge_subprocess(Hashmap *images, const char *workspace) {
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to set up loopback device for %s: %m", img->path);
|
||||
|
||||
r = loop_device_flock(d, LOCK_SH);
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to lock loopback device: %m");
|
||||
|
||||
r = dissect_image_and_warn(
|
||||
d->fd,
|
||||
img->path,
|
||||
|
@ -55,6 +55,9 @@ static void* thread_func(void *ptr) {
|
||||
|
||||
log_notice("Acquired loop device %s, will mount on %s", loop->node, mounted);
|
||||
|
||||
/* Let's make sure udev doesn't call BLKRRPART in the background, while we try to mount the device. */
|
||||
assert_se(loop_device_flock(loop, LOCK_SH) >= 0);
|
||||
|
||||
r = dissect_image(loop->fd, NULL, NULL, loop->diskseq, loop->uevent_seqnum_not_before, loop->timestamp_not_before, DISSECT_IMAGE_READ_ONLY, &dissected);
|
||||
if (r < 0)
|
||||
log_error_errno(r, "Failed dissect loopback device %s: %m", loop->node);
|
||||
@ -85,6 +88,10 @@ static void* thread_func(void *ptr) {
|
||||
log_notice_errno(r, "Mounted %s → %s: %m", loop->node, mounted);
|
||||
assert_se(r >= 0);
|
||||
|
||||
/* Now the block device is mounted, we don't need no manual lock anymore, the devices are now
|
||||
* pinned by the mounts. */
|
||||
assert_se(loop_device_flock(loop, LOCK_UN) >= 0);
|
||||
|
||||
log_notice("Unmounting %s", mounted);
|
||||
mounted = umount_and_rmdir_and_free(mounted);
|
||||
|
||||
@ -158,12 +165,6 @@ static int run(int argc, char *argv[]) {
|
||||
return EXIT_TEST_SKIP;
|
||||
}
|
||||
|
||||
if (strstr_ptr(ci_environment(), "autopkgtest") || strstr_ptr(ci_environment(), "github-actions")) {
|
||||
// FIXME: we should reenable this one day
|
||||
log_tests_skipped("Skipping test on Ubuntu autopkgtest CI/GH Actions, test too slow and installed udev too flakey.");
|
||||
return EXIT_TEST_SKIP;
|
||||
}
|
||||
|
||||
/* This is a test for the loopback block device setup code and it's use by the image dissection
|
||||
* logic: since the kernel APIs are hard use and prone to races, let's test this in a heavy duty
|
||||
* test: we open a bunch of threads and repeatedly allocate and deallocate loopback block devices in
|
||||
@ -221,6 +222,11 @@ static int run(int argc, char *argv[]) {
|
||||
pthread_t threads[arg_n_threads];
|
||||
sd_id128_t id;
|
||||
|
||||
/* Take an explicit lock while we format the file systems, in accordance with
|
||||
* https://systemd.io/BLOCK_DEVICE_LOCKING/. We don't want udev to interfere and probe while we write
|
||||
* or even issue BLKRRPART or similar while we are working on this. */
|
||||
assert_se(loop_device_flock(loop, LOCK_EX) >= 0);
|
||||
|
||||
assert_se(dissect_image(loop->fd, NULL, NULL, loop->diskseq, loop->uevent_seqnum_not_before, loop->timestamp_not_before, 0, &dissected) >= 0);
|
||||
|
||||
assert_se(dissected->partitions[PARTITION_ESP].found);
|
||||
@ -249,9 +255,21 @@ static int run(int argc, char *argv[]) {
|
||||
|
||||
assert_se(mkdtemp_malloc(NULL, &mounted) >= 0);
|
||||
|
||||
/* We are particularly correct here, and now downgrade LOCK → LOCK_SH. That's because we are done
|
||||
* with formatting the file systems, so we don't need the exclusive lock anymore. From now on a
|
||||
* shared one is fine. This way udev can now probe the device if it wants, but still won't call
|
||||
* BLKRRPART on it, and that's good, because that would destroy our partition table while we are at
|
||||
* it. */
|
||||
assert_se(loop_device_flock(loop, LOCK_SH) >= 0);
|
||||
|
||||
/* This first (writable) mount will initialize the mount point dirs, so that the subsequent read-only ones can work */
|
||||
assert_se(dissected_image_mount(dissected, mounted, UID_INVALID, UID_INVALID, 0) >= 0);
|
||||
|
||||
/* Now we mounted everything, the partitions are pinned. Now it's fine to release the lock
|
||||
* fully. This means udev could now issue BLKRRPART again, but that's OK given this will fail because
|
||||
* we now mounted the device. */
|
||||
assert_se(loop_device_flock(loop, LOCK_UN) >= 0);
|
||||
|
||||
assert_se(umount_recursive(mounted, 0) >= 0);
|
||||
loop = loop_device_unref(loop);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user