1
0
mirror of https://github.com/systemd/systemd.git synced 2025-01-11 09:18:07 +03:00

Merge pull request #24530 from poettering/loop-with-more-lock

a variety of loopback block device fixes
This commit is contained in:
Luca Boccassi 2022-09-02 00:04:32 +01:00 committed by GitHub
commit dddec402de
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 189 additions and 246 deletions

View File

@ -2058,16 +2058,11 @@ int setup_namespace(
root_image,
FLAGS_SET(dissect_image_flags, DISSECT_IMAGE_DEVICE_READ_ONLY) ? O_RDONLY : -1 /* < 0 means writable if possible, read-only as fallback */,
FLAGS_SET(dissect_image_flags, DISSECT_IMAGE_NO_PARTITION_TABLE) ? 0 : LO_FLAGS_PARTSCAN,
LOCK_SH,
&loop_device);
if (r < 0)
return log_debug_errno(r, "Failed to create loop device for root image: %m");
/* Make sure udevd won't issue BLKRRPART (which might flush out the loaded partition table)
* while we are still trying to mount things */
r = loop_device_flock(loop_device, LOCK_SH);
if (r < 0)
return log_debug_errno(r, "Failed to lock loopback device with LOCK_SH: %m");
r = dissect_image(
loop_device->fd,
&verity,
@ -2432,7 +2427,6 @@ int setup_namespace(
}
}
dissected_image_relinquish(dissected_image);
loop_device_relinquish(loop_device);
} else if (root_directory) {

View File

@ -685,7 +685,6 @@ static int action_mount(DissectedImage *m, LoopDevice *d) {
return log_error_errno(r, "Failed to relinquish DM devices: %m");
}
dissected_image_relinquish(m);
loop_device_relinquish(d);
return 0;
}
@ -738,7 +737,6 @@ static int action_copy(DissectedImage *m, LoopDevice *d) {
return log_error_errno(r, "Failed to relinquish DM devices: %m");
}
dissected_image_relinquish(m);
loop_device_relinquish(d);
if (arg_action == ACTION_COPY_FROM) {
@ -864,7 +862,7 @@ static int action_umount(const char *path) {
_cleanup_free_ char *canonical = NULL, *devname = NULL;
_cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
dev_t devno;
int r, k;
int r;
fd = chase_symlinks_and_open(path, NULL, 0, O_DIRECTORY, &canonical);
if (fd == -ENOTDIR)
@ -887,14 +885,10 @@ static int action_umount(const char *path) {
return log_error_errno(r, "Failed to get devname of block device " DEVNUM_FORMAT_STR ": %m",
DEVNUM_FORMAT_VAL(devno));
r = loop_device_open(devname, 0, &d);
r = loop_device_open(devname, 0, LOCK_EX, &d);
if (r < 0)
return log_error_errno(r, "Failed to open loop device '%s': %m", devname);
r = loop_device_flock(d, LOCK_EX);
if (r < 0)
return log_error_errno(r, "Failed to lock loop device '%s': %m", devname);
/* We've locked the loop device, now we're ready to unmount. To allow the unmount to succeed, we have
* to close the O_PATH fd we opened earlier. */
fd = safe_close(fd);
@ -907,25 +901,12 @@ static int action_umount(const char *path) {
loop_device_unrelinquish(d);
if (arg_rmdir) {
k = RET_NERRNO(rmdir(canonical));
if (k < 0)
log_error_errno(k, "Failed to remove mount directory '%s': %m", canonical);
} else
k = 0;
/* Before loop_device_unrefp() kicks in, let's explicitly remove all the partition subdevices of the
* loop device. We do this to ensure that all traces of the loop device are gone by the time this
* command exits. */
r = block_device_remove_all_partitions(d->fd);
if (r == -EBUSY) {
log_error_errno(r, "One or more partitions of '%s' are busy, ignoring", devname);
r = 0;
r = RET_NERRNO(rmdir(canonical));
if (r < 0)
return log_error_errno(r, "Failed to remove mount directory '%s': %m", canonical);
}
if (r < 0)
log_error_errno(r, "Failed to remove one or more partitions of '%s': %m", devname);
return k < 0 ? k : r;
return 0;
}
static int run(int argc, char *argv[]) {
@ -959,16 +940,11 @@ static int run(int argc, char *argv[]) {
arg_image,
FLAGS_SET(arg_flags, DISSECT_IMAGE_DEVICE_READ_ONLY) ? O_RDONLY : O_RDWR,
FLAGS_SET(arg_flags, DISSECT_IMAGE_NO_PARTITION_TABLE) ? 0 : LO_FLAGS_PARTSCAN,
LOCK_SH,
&d);
if (r < 0)
return log_error_errno(r, "Failed to set up loopback device for %s: %m", arg_image);
/* Make sure udevd doesn't issue BLKRRPART underneath us thus making devices disappear in the middle,
* that we assume already are there. */
r = loop_device_flock(d, LOCK_SH);
if (r < 0)
return log_error_errno(r, "Failed to lock loopback device: %m");
r = dissect_image_and_warn(
d->fd,
arg_image,

View File

@ -772,8 +772,6 @@ static int enumerate_partitions(dev_t devnum) {
r = k;
}
dissected_image_relinquish(m);
return r;
}

View File

@ -1284,7 +1284,7 @@ int home_setup_luks(
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Failed to determine backing device for DM %s.", setup->dm_name);
if (!setup->loop) {
r = loop_device_open(n, O_RDWR, &setup->loop);
r = loop_device_open(n, O_RDWR, LOCK_UN, &setup->loop);
if (r < 0)
return log_error_errno(r, "Failed to open loopback device %s: %m", n);
}
@ -1378,7 +1378,7 @@ int home_setup_luks(
return r;
}
r = loop_device_make(setup->image_fd, O_RDWR, offset, size, 0, &setup->loop);
r = loop_device_make(setup->image_fd, O_RDWR, offset, size, 0, LOCK_UN, &setup->loop);
if (r == -ENOENT) {
log_error_errno(r, "Loopback block device support is not available on this system.");
return -ENOLINK; /* make recognizable */
@ -2299,7 +2299,7 @@ int home_create_luks(
log_info("Writing of partition table completed.");
r = loop_device_make(setup->image_fd, O_RDWR, partition_offset, partition_size, 0, &setup->loop);
r = loop_device_make(setup->image_fd, O_RDWR, partition_offset, partition_size, 0, LOCK_EX, &setup->loop);
if (r < 0) {
if (r == -ENOENT) { /* this means /dev/loop-control doesn't exist, i.e. we are in a container
* or similar and loopback bock devices are not available, return a
@ -2311,10 +2311,6 @@ int home_create_luks(
return log_error_errno(r, "Failed to set up loopback device for %s: %m", setup->temporary_image_path);
}
r = loop_device_flock(setup->loop, LOCK_EX); /* make sure udev won't read before we are done */
if (r < 0)
return log_error_errno(r, "Failed to take lock on loop device: %m");
log_info("Setting up loopback device %s completed.", setup->loop->node ?: ip);
r = luks_format(setup->loop->node,

View File

@ -5746,19 +5746,13 @@ static int run(int argc, char *argv[]) {
arg_image,
arg_read_only ? O_RDONLY : O_RDWR,
FLAGS_SET(dissect_image_flags, DISSECT_IMAGE_NO_PARTITION_TABLE) ? 0 : LO_FLAGS_PARTSCAN,
LOCK_SH,
&loop);
if (r < 0) {
log_error_errno(r, "Failed to set up loopback block device: %m");
goto finish;
}
/* Take a LOCK_SH lock on the device, so that udevd doesn't issue BLKRRPART in our back */
r = loop_device_flock(loop, LOCK_SH);
if (r < 0) {
log_error_errno(r, "Failed to take lock on loopback block device: %m");
goto finish;
}
r = dissect_image_and_warn(
loop->fd,
arg_image,

View File

@ -2789,14 +2789,10 @@ static int context_copy_blocks(Context *context) {
assert_se((whole_fd = fdisk_get_devfd(context->fdisk_context)) >= 0);
if (p->encrypt != ENCRYPT_OFF) {
r = loop_device_make(whole_fd, O_RDWR, p->offset, p->new_size, 0, &d);
r = loop_device_make(whole_fd, O_RDWR, p->offset, p->new_size, 0, LOCK_EX, &d);
if (r < 0)
return log_error_errno(r, "Failed to make loopback device of future partition %" PRIu64 ": %m", p->partno);
r = loop_device_flock(d, LOCK_EX);
if (r < 0)
return log_error_errno(r, "Failed to lock loopback device: %m");
r = partition_encrypt(context, p, d->node, &cd, &encrypted, &encrypted_dev_fd);
if (r < 0)
return log_error_errno(r, "Failed to encrypt device: %m");
@ -3038,14 +3034,10 @@ static int context_mkfs(Context *context) {
/* Loopback block devices are not only useful to turn regular files into block devices, but
* also to cut out sections of block devices into new block devices. */
r = loop_device_make(fd, O_RDWR, p->offset, p->new_size, 0, &d);
r = loop_device_make(fd, O_RDWR, p->offset, p->new_size, 0, LOCK_EX, &d);
if (r < 0)
return log_error_errno(r, "Failed to make loopback device of future partition %" PRIu64 ": %m", p->partno);
r = loop_device_flock(d, LOCK_EX);
if (r < 0)
return log_error_errno(r, "Failed to lock loopback device: %m");
if (p->encrypt != ENCRYPT_OFF) {
r = partition_encrypt(context, p, d->node, &cd, &encrypted, &encrypted_dev_fd);
if (r < 0)

View File

@ -333,7 +333,7 @@ static int portable_extract_by_path(
assert(path);
r = loop_device_make_by_path(path, O_RDONLY, LO_FLAGS_PARTSCAN, &d);
r = loop_device_make_by_path(path, O_RDONLY, LO_FLAGS_PARTSCAN, LOCK_SH, &d);
if (r == -EISDIR) {
_cleanup_free_ char *image_name = NULL;
@ -359,10 +359,6 @@ static int portable_extract_by_path(
/* We now have a loopback block device, let's fork off a child in its own mount namespace, mount it
* there, and extract the metadata we need. The metadata is sent from the child back to us. */
r = loop_device_flock(d, LOCK_SH);
if (r < 0)
return log_debug_errno(r, "Failed to acquire lock on loopback block device: %m");
BLOCK_SIGNALS(SIGCHLD);
r = mkdtemp_malloc("/tmp/inspect-XXXXXX", &tmpdir);

View File

@ -1192,13 +1192,7 @@ int image_read_metadata(Image *i) {
_cleanup_(loop_device_unrefp) LoopDevice *d = NULL;
_cleanup_(dissected_image_unrefp) DissectedImage *m = NULL;
r = loop_device_make_by_path(i->path, O_RDONLY, LO_FLAGS_PARTSCAN, &d);
if (r < 0)
return r;
/* Make sure udevd doesn't issue BLKRRPART in the background which might make our partitions
* disappear temporarily. */
r = loop_device_flock(d, LOCK_SH);
r = loop_device_make_by_path(i->path, O_RDONLY, LO_FLAGS_PARTSCAN, LOCK_SH, &d);
if (r < 0)
return r;

View File

@ -150,20 +150,9 @@ static void check_partition_flags(
}
#endif
static void dissected_partition_done(int fd, DissectedPartition *p) {
assert(fd >= 0);
static void dissected_partition_done(DissectedPartition *p) {
assert(p);
#if HAVE_BLKID
if (p->node && p->partno > 0 && !p->relinquished) {
int r;
r = block_device_remove_partition(fd, p->node, p->partno);
if (r < 0)
log_debug_errno(r, "BLKPG_DEL_PARTITION failed, ignoring: %m");
}
#endif
free(p->fstype);
free(p->node);
free(p->label);
@ -312,14 +301,9 @@ int dissect_image(
return -ENOMEM;
*m = (DissectedImage) {
.fd = -1,
.has_init_system = -1,
};
m->fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
if (m->fd < 0)
return -errno;
r = sd_device_get_sysname(d, &sysname);
if (r < 0)
return log_debug_errno(r, "Failed to get device sysname: %m");
@ -775,14 +759,10 @@ int dissect_image(
* scheme in OS images. */
if (!PARTITION_DESIGNATOR_VERSIONED(designator) ||
strverscmp_improved(m->partitions[designator].label, label) >= 0) {
r = block_device_remove_partition(fd, node, nr);
if (r < 0)
log_debug_errno(r, "BLKPG_DEL_PARTITION failed, ignoring: %m");
strverscmp_improved(m->partitions[designator].label, label) >= 0)
continue;
}
dissected_partition_done(fd, m->partitions + designator);
dissected_partition_done(m->partitions + designator);
}
if (fstype) {
@ -852,12 +832,8 @@ int dissect_image(
const char *sid, *options = NULL;
/* First one wins */
if (m->partitions[PARTITION_XBOOTLDR].found) {
r = block_device_remove_partition(fd, node, nr);
if (r < 0)
log_debug_errno(r, "BLKPG_DEL_PARTITION failed, ignoring: %m");
if (m->partitions[PARTITION_XBOOTLDR].found)
continue;
}
sid = blkid_partition_get_uuid(pp);
if (sid)
@ -1171,9 +1147,8 @@ DissectedImage* dissected_image_unref(DissectedImage *m) {
return NULL;
for (PartitionDesignator i = 0; i < _PARTITION_DESIGNATOR_MAX; i++)
dissected_partition_done(m->fd, m->partitions + i);
dissected_partition_done(m->partitions + i);
safe_close(m->fd);
free(m->image_name);
free(m->hostname);
strv_free(m->machine_info);
@ -1183,16 +1158,6 @@ DissectedImage* dissected_image_unref(DissectedImage *m) {
return mfree(m);
}
void dissected_image_relinquish(DissectedImage *m) {
assert(m);
/* Partitions are automatically removed when the underlying loop device is closed. We just need to
* make sure we don't try to remove the partitions early. */
for (PartitionDesignator i = 0; i < _PARTITION_DESIGNATOR_MAX; i++)
m->partitions[i].relinquished = true;
}
static int is_loop_device(const char *path) {
char s[SYS_BLOCK_PATH_MAX("/../loop/")];
struct stat st;
@ -3001,15 +2966,11 @@ int mount_image_privately_interactively(
image,
FLAGS_SET(flags, DISSECT_IMAGE_DEVICE_READ_ONLY) ? O_RDONLY : O_RDWR,
FLAGS_SET(flags, DISSECT_IMAGE_NO_PARTITION_TABLE) ? 0 : LO_FLAGS_PARTSCAN,
LOCK_SH,
&d);
if (r < 0)
return log_error_errno(r, "Failed to set up loopback device for %s: %m", image);
/* Make sure udevd doesn't issue BLKRRPART behind our backs */
r = loop_device_flock(d, LOCK_SH);
if (r < 0)
return r;
r = dissect_image_and_warn(d->fd, image, &verity, NULL, d->diskseq, d->uevent_seqnum_not_before, d->timestamp_not_before, flags, &dissected_image);
if (r < 0)
return r;
@ -3046,7 +3007,6 @@ int mount_image_privately_interactively(
return log_error_errno(r, "Failed to relinquish DM devices: %m");
}
dissected_image_relinquish(dissected_image);
loop_device_relinquish(d);
*ret_directory = TAKE_PTR(created_dir);
@ -3117,14 +3077,11 @@ int verity_dissect_and_mount(
src_fd >= 0 ? FORMAT_PROC_FD_PATH(src_fd) : src,
-1,
verity.data_path ? 0 : LO_FLAGS_PARTSCAN,
LOCK_SH,
&loop_device);
if (r < 0)
return log_debug_errno(r, "Failed to create loop device for image: %m");
r = loop_device_flock(loop_device, LOCK_SH);
if (r < 0)
return log_debug_errno(r, "Failed to lock loop device: %m");
r = dissect_image(
loop_device->fd,
&verity,
@ -3209,7 +3166,6 @@ int verity_dissect_and_mount(
return log_debug_errno(r, "Failed to relinquish decrypted image: %m");
}
dissected_image_relinquish(dissected_image);
loop_device_relinquish(loop_device);
return 0;

View File

@ -31,7 +31,6 @@ struct DissectedPartition {
char *mount_options;
uint64_t size;
uint64_t offset;
bool relinquished;
};
typedef enum PartitionDesignator {
@ -204,8 +203,6 @@ typedef enum DissectImageFlags {
} DissectImageFlags;
struct DissectedImage {
int fd; /* Backing fd */
bool encrypted:1;
bool has_verity:1; /* verity available in image, but not necessarily used */
bool has_verity_sig:1; /* pkcs#7 signature embedded in image */
@ -261,7 +258,6 @@ int dissect_image_and_warn(int fd, const char *name, const VeritySettings *verit
DissectedImage* dissected_image_unref(DissectedImage *m);
DEFINE_TRIVIAL_CLEANUP_FUNC(DissectedImage*, dissected_image_unref);
void dissected_image_relinquish(DissectedImage *m);
int dissected_image_decrypt(DissectedImage *m, const char *passphrase, const VeritySettings *verity, DissectImageFlags flags, DecryptedImage **ret);
int dissected_image_decrypt_interactively(DissectedImage *m, const char *passphrase, const VeritySettings *verity, DissectImageFlags flags, DecryptedImage **ret);

View File

@ -117,13 +117,28 @@ static int device_has_block_children(sd_device *d) {
return !!sd_device_enumerator_get_device_first(e);
}
static int open_lock_fd(int primary_fd, int operation) {
int lock_fd;
assert(primary_fd >= 0);
lock_fd = fd_reopen(primary_fd, O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
if (lock_fd < 0)
return lock_fd;
if (flock(lock_fd, operation) < 0)
return -errno;
return lock_fd;
}
static int loop_configure(
int fd,
int nr,
const struct loop_config *c,
bool *try_loop_configure,
uint64_t *ret_seqnum_not_before,
usec_t *ret_timestamp_not_before) {
usec_t *ret_timestamp_not_before,
int *ret_lock_fd) {
_cleanup_(sd_device_unrefp) sd_device *d = NULL;
_cleanup_free_ char *sysname = NULL;
@ -152,18 +167,15 @@ static int loop_configure(
* long time udev would possibly never run on it again, even though the fd is unlocked, simply
* because we never close() it. It also has the nice benefit we can use the _cleanup_close_ logic to
* automatically release the lock, after we are done. */
lock_fd = fd_reopen(fd, O_RDWR|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
lock_fd = open_lock_fd(fd, LOCK_EX);
if (lock_fd < 0)
return lock_fd;
if (flock(lock_fd, LOCK_EX) < 0)
return -errno;
/* Let's see if the device is really detached, i.e. currently has no associated partition block
* devices. On various kernels (such as 5.8) it is possible to have a loopback block device that
* superficially is detached but still has partition block devices associated for it. They only go
* away when the device is reattached. (Yes, LOOP_CLR_FD doesn't work then, because officially
* nothing is attached and LOOP_CTL_REMOVE doesn't either, since it doesn't care about partition
* block devices. */
* superficially is detached but still has partition block devices associated for it. Let's then
* manually remove the partitions via BLKPG, and tell the caller we did that via EUCLEAN, so they try
* again. */
r = device_has_block_children(d);
if (r < 0)
return r;
@ -174,8 +186,14 @@ static int loop_configure(
if (r > 0)
return -EBUSY;
return -EUCLEAN; /* Bound but children? Tell caller to reattach something so that the
* partition block devices are gone too. */
/* Unbound but has children? Remove all partitions, and report this to the caller, to try
* again, and count this as an attempt. */
r = block_device_remove_all_partitions(fd);
if (r < 0)
return r;
return -EUCLEAN;
}
if (*try_loop_configure) {
@ -247,12 +265,7 @@ static int loop_configure(
goto fail;
}
if (ret_seqnum_not_before)
*ret_seqnum_not_before = seqnum;
if (ret_timestamp_not_before)
*ret_timestamp_not_before = timestamp;
return 0;
goto success;
}
}
@ -317,65 +330,34 @@ static int loop_configure(
log_debug_errno(errno, "Failed to enable direct IO mode on loopback device /dev/loop%i, ignoring: %m", nr);
}
success:
if (ret_seqnum_not_before)
*ret_seqnum_not_before = seqnum;
if (ret_timestamp_not_before)
*ret_timestamp_not_before = timestamp;
if (ret_lock_fd)
*ret_lock_fd = TAKE_FD(lock_fd);
return 0;
fail:
/* Close the lock fd explicitly before clearing the loopback block device, since an additional open
* fd would block the clearing to succeed */
lock_fd = safe_close(lock_fd);
(void) ioctl(fd, LOOP_CLR_FD);
return r;
}
static int attach_empty_file(int loop, int nr) {
_cleanup_close_ int fd = -1;
/* So here's the thing: on various kernels (5.8 at least) loop block devices might enter a state
* where they are detached but nonetheless have partitions, when used heavily. Accessing these
* partitions results in immediatey IO errors. There's no pretty way to get rid of them
* again. Neither LOOP_CLR_FD nor LOOP_CTL_REMOVE suffice (see above). What does work is to
* reassociate them with a new fd however. This is what we do here hence: we associate the devices
* with an empty file (i.e. an image that definitely has no partitions). We then immediately clear it
* again. This suffices to make the partitions go away. Ugly but appears to work. */
log_debug("Found unattached loopback block device /dev/loop%i with partitions. Attaching empty file to remove them.", nr);
fd = open_tmpfile_unlinkable(NULL, O_RDONLY);
if (fd < 0)
return fd;
if (flock(loop, LOCK_EX) < 0)
return -errno;
if (ioctl(loop, LOOP_SET_FD, fd) < 0)
return -errno;
if (ioctl(loop, LOOP_SET_STATUS64, &(struct loop_info64) {
.lo_flags = LO_FLAGS_READ_ONLY|
LO_FLAGS_AUTOCLEAR|
LO_FLAGS_PARTSCAN, /* enable partscan, so that the partitions really go away */
}) < 0)
return -errno;
if (ioctl(loop, LOOP_CLR_FD) < 0)
return -errno;
/* The caller is expected to immediately close the loopback device after this, so that the BSD lock
* is released, and udev sees the changes. */
return 0;
}
static int loop_device_make_internal(
int fd,
int open_flags,
uint64_t offset,
uint64_t size,
uint32_t loop_flags,
int lock_op,
LoopDevice **ret) {
_cleanup_close_ int direct_io_fd = -1;
_cleanup_close_ int direct_io_fd = -1, lock_fd = -1;
_cleanup_free_ char *loopdev = NULL;
bool try_loop_configure = true;
struct loop_config config;
@ -393,6 +375,12 @@ static int loop_device_make_internal(
return -errno;
if (S_ISBLK(st.st_mode)) {
if (lock_op != LOCK_UN) {
lock_fd = open_lock_fd(fd, lock_op);
if (lock_fd < 0)
return lock_fd;
}
if (ioctl(fd, LOOP_GET_STATUS64, &config.info) >= 0) {
/* Oh! This is a loopback device? That's interesting! */
@ -430,6 +418,7 @@ static int loop_device_make_internal(
return -ENOMEM;
*d = (LoopDevice) {
.fd = TAKE_FD(copy),
.lock_fd = TAKE_FD(lock_fd),
.nr = nr,
.node = TAKE_PTR(loopdev),
.relinquished = true, /* It's not allocated by us, don't destroy it when this object is freed */
@ -501,7 +490,10 @@ static int loop_device_make_internal(
* unnecessary busywork less likely. Note that this is just something we do to optimize our
* own code (and whoever else decides to use LOCK_EX locks for this), taking this lock is not
* necessary, it just means it's less likely we have to iterate through this loop again and
* again if our own code races against our own code. */
* again if our own code races against our own code.
*
* Note: our lock protocol is to take the /dev/loop-control lock first, and the block device
* lock second, if both are taken, and always in this order, to avoid ABBA locking issues. */
if (flock(control, LOCK_EX) < 0)
return -errno;
@ -519,17 +511,13 @@ static int loop_device_make_internal(
if (!ERRNO_IS_DEVICE_ABSENT(errno))
return -errno;
} else {
r = loop_configure(loop, nr, &config, &try_loop_configure, &seqnum, &timestamp);
r = loop_configure(loop, nr, &config, &try_loop_configure, &seqnum, &timestamp, &lock_fd);
if (r >= 0) {
loop_with_fd = TAKE_FD(loop);
break;
}
if (r == -EUCLEAN) {
/* Make left-over partition disappear hack (see above) */
r = attach_empty_file(loop, nr);
if (r < 0 && r != -EBUSY)
return r;
} else if (r != -EBUSY)
if (!IN_SET(r, -EBUSY, -EUCLEAN)) /* Busy, or some left-over partition devices that
* were cleaned up. */
return r;
}
@ -583,11 +571,26 @@ static int loop_device_make_internal(
if (r < 0 && r != -EOPNOTSUPP)
return r;
switch (lock_op & ~LOCK_NB) {
case LOCK_EX: /* Already in effect */
break;
case LOCK_SH: /* Downgrade */
if (flock(lock_fd, lock_op) < 0)
return -errno;
break;
case LOCK_UN: /* Release */
lock_fd = safe_close(lock_fd);
break;
default:
assert_not_reached();
}
d = new(LoopDevice, 1);
if (!d)
return -ENOMEM;
*d = (LoopDevice) {
.fd = TAKE_FD(loop_with_fd),
.lock_fd = TAKE_FD(lock_fd),
.node = TAKE_PTR(loopdev),
.nr = nr,
.devno = st.st_rdev,
@ -622,6 +625,7 @@ int loop_device_make(
uint64_t offset,
uint64_t size,
uint32_t loop_flags,
int lock_op,
LoopDevice **ret) {
assert(fd >= 0);
@ -633,6 +637,7 @@ int loop_device_make(
offset,
size,
loop_flags_mangle(loop_flags),
lock_op,
ret);
}
@ -640,6 +645,7 @@ int loop_device_make_by_path(
const char *path,
int open_flags,
uint32_t loop_flags,
int lock_op,
LoopDevice **ret) {
int r, basic_flags, direct_flags, rdwr_flags;
@ -693,46 +699,74 @@ int loop_device_make_by_path(
direct ? "enabled" : "disabled",
direct != (direct_flags != 0) ? " (O_DIRECT was requested but not supported)" : "");
return loop_device_make_internal(fd, open_flags, 0, 0, loop_flags, ret);
return loop_device_make_internal(fd, open_flags, 0, 0, loop_flags, lock_op, ret);
}
LoopDevice* loop_device_unref(LoopDevice *d) {
_cleanup_close_ int control = -1;
int r;
if (!d)
return NULL;
/* Release any lock we might have on the device first. We want to open+lock the /dev/loop-control
* device below, but our lock protocol says that if both control and block device locks are taken,
* the control lock needs to be taken first, the block device lock second in order to avoid ABBA
* locking issues. Moreover, we want to issue LOOP_CLR_FD on the block device further down, and that
* would fail if we had another fd open to the device. */
d->lock_fd = safe_close(d->lock_fd);
/* Let's open the control device early, and lock it, so that we can release our block device and
* delete it in a synchronized fashion, and allocators won't needlessly see the block device as free
* while we are about to delete it. */
if (!LOOP_DEVICE_IS_FOREIGN(d) && !d->relinquished) {
control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
if (control < 0)
log_debug_errno(errno, "Failed to open loop control device, cannot remove loop device '%s', ignoring: %m", strna(d->node));
else if (flock(control, LOCK_EX) < 0)
log_debug_errno(errno, "Failed to lock loop control device, ignoring: %m");
}
/* Then let's release the loopback block device */
if (d->fd >= 0) {
/* Implicitly sync the device, since otherwise in-flight blocks might not get written */
if (fsync(d->fd) < 0)
log_debug_errno(errno, "Failed to sync loop block device, ignoring: %m");
if (d->nr >= 0 && !d->relinquished) {
if (ioctl(d->fd, LOOP_CLR_FD) < 0)
log_debug_errno(errno, "Failed to clear loop device: %m");
if (!LOOP_DEVICE_IS_FOREIGN(d) && !d->relinquished) {
/* We are supposed to clear the loopback device. Let's do this synchronously: lock
* the device, manually remove all partitions and then clear it. This should ensure
* udev doesn't concurrently access the devices, and we can be reasonably sure that
* once we are done here the device is cleared and all its partition children
* removed. Note that we lock our primary device fd here (and not a separate locking
* fd, as we do during allocation, since we want to keep the lock all the way through
* the LOOP_CLR_FD, but that call would fail if we had more than one fd open.) */
if (flock(d->fd, LOCK_EX) < 0)
log_debug_errno(errno, "Failed to lock loop block device, ignoring: %m");
r = block_device_remove_all_partitions(d->fd);
if (r < 0)
log_debug_errno(r, "Failed to remove partitions of loopback block device, ignoring: %m");
if (ioctl(d->fd, LOOP_CLR_FD) < 0)
log_debug_errno(errno, "Failed to clear loop device, ignoring: %m");
}
safe_close(d->fd);
}
if (d->nr >= 0 && !d->relinquished) {
_cleanup_close_ int control = -1;
control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
if (control < 0)
log_warning_errno(errno,
"Failed to open loop control device, cannot remove loop device %s: %m",
strna(d->node));
else
for (unsigned n_attempts = 0;;) {
if (ioctl(control, LOOP_CTL_REMOVE, d->nr) >= 0)
break;
if (errno != EBUSY || ++n_attempts >= 64) {
log_warning_errno(errno, "Failed to remove device %s: %m", strna(d->node));
break;
}
(void) usleep(50 * USEC_PER_MSEC);
/* Now that the block device is released, let's also try to remove it */
if (control >= 0)
for (unsigned n_attempts = 0;;) {
if (ioctl(control, LOOP_CTL_REMOVE, d->nr) >= 0)
break;
if (errno != EBUSY || ++n_attempts >= 64) {
log_debug_errno(errno, "Failed to remove device %s: %m", strna(d->node));
break;
}
}
(void) usleep(50 * USEC_PER_MSEC);
}
free(d->node);
return mfree(d);
@ -752,8 +786,13 @@ void loop_device_unrelinquish(LoopDevice *d) {
d->relinquished = false;
}
int loop_device_open(const char *loop_path, int open_flags, LoopDevice **ret) {
_cleanup_close_ int loop_fd = -1;
int loop_device_open(
const char *loop_path,
int open_flags,
int lock_op,
LoopDevice **ret) {
_cleanup_close_ int loop_fd = -1, lock_fd = -1;
_cleanup_free_ char *p = NULL;
struct loop_info64 info;
struct stat st;
@ -782,6 +821,12 @@ int loop_device_open(const char *loop_path, int open_flags, LoopDevice **ret) {
} else
nr = -1;
if ((lock_op & ~LOCK_NB) != LOCK_UN) {
lock_fd = open_lock_fd(loop_fd, lock_op);
if (lock_fd < 0)
return lock_fd;
}
p = strdup(loop_path);
if (!p)
return -ENOMEM;
@ -792,6 +837,7 @@ int loop_device_open(const char *loop_path, int open_flags, LoopDevice **ret) {
*d = (LoopDevice) {
.fd = TAKE_FD(loop_fd),
.lock_fd = TAKE_FD(lock_fd),
.nr = nr,
.node = TAKE_PTR(p),
.relinquished = true, /* It's not ours, don't try to destroy it when this object is freed */
@ -878,7 +924,9 @@ static int resize_partition(int partition_fd, uint64_t offset, uint64_t size) {
int loop_device_refresh_size(LoopDevice *d, uint64_t offset, uint64_t size) {
struct loop_info64 info;
assert(d);
assert(d->fd >= 0);
/* Changes the offset/start of the loop device relative to the beginning of the underlying file or
* block device. If this loop device actually refers to a partition and not a loopback device, we'll
@ -886,9 +934,6 @@ int loop_device_refresh_size(LoopDevice *d, uint64_t offset, uint64_t size) {
*
* If either offset or size is UINT64_MAX we won't change that parameter. */
if (d->fd < 0)
return -EBADF;
if (d->nr < 0) /* not a loopback device */
return resize_partition(d->fd, offset, size);
@ -914,22 +959,36 @@ int loop_device_refresh_size(LoopDevice *d, uint64_t offset, uint64_t size) {
}
int loop_device_flock(LoopDevice *d, int operation) {
assert(IN_SET(operation & ~LOCK_NB, LOCK_UN, LOCK_SH, LOCK_EX));
assert(d);
if (d->fd < 0)
return -EBADF;
/* When unlocking just close the lock fd */
if ((operation & ~LOCK_NB) == LOCK_UN) {
d->lock_fd = safe_close(d->lock_fd);
return 0;
}
return RET_NERRNO(flock(d->fd, operation));
/* If we had no lock fd so far, create one and lock it right-away */
if (d->lock_fd < 0) {
assert(d->fd >= 0);
d->lock_fd = open_lock_fd(d->fd, operation);
if (d->lock_fd < 0)
return d->lock_fd;
return 0;
}
/* Otherwise change the current lock mode on the existing fd */
return RET_NERRNO(flock(d->lock_fd, operation));
}
int loop_device_sync(LoopDevice *d) {
assert(d);
assert(d->fd >= 0);
/* We also do this implicitly in loop_device_unref(). Doing this explicitly here has the benefit that
* we can check the return value though. */
if (d->fd < 0)
return -EBADF;
return RET_NERRNO(fsync(d->fd));
}

View File

@ -10,7 +10,8 @@ typedef struct LoopDevice LoopDevice;
struct LoopDevice {
int fd;
int nr;
int lock_fd;
int nr; /* The loopback device index (i.e. 4 for /dev/loop4); if this object encapsulates a non-loopback block device, set to -1 */
dev_t devno;
char *node;
bool relinquished;
@ -19,9 +20,12 @@ struct LoopDevice {
usec_t timestamp_not_before; /* CLOCK_MONOTONIC timestamp taken immediately before attaching the loopback device, or USEC_INFINITY if we don't know */
};
int loop_device_make(int fd, int open_flags, uint64_t offset, uint64_t size, uint32_t loop_flags, LoopDevice **ret);
int loop_device_make_by_path(const char *path, int open_flags, uint32_t loop_flags, LoopDevice **ret);
int loop_device_open(const char *loop_path, int open_flags, LoopDevice **ret);
/* Returns true if LoopDevice object is not actually a loopback device but some other block device we just wrap */
#define LOOP_DEVICE_IS_FOREIGN(d) ((d)->nr < 0)
int loop_device_make(int fd, int open_flags, uint64_t offset, uint64_t size, uint32_t loop_flags, int lock_op, LoopDevice **ret);
int loop_device_make_by_path(const char *path, int open_flags, uint32_t loop_flags, int lock_op, LoopDevice **ret);
int loop_device_open(const char *loop_path, int open_flags, int lock_op, LoopDevice **ret);
LoopDevice* loop_device_unref(LoopDevice *d);
DEFINE_TRIVIAL_CLEANUP_FUNC(LoopDevice*, loop_device_unref);

View File

@ -534,14 +534,11 @@ static int merge_subprocess(Hashmap *images, const char *workspace) {
img->path,
O_RDONLY,
FLAGS_SET(flags, DISSECT_IMAGE_NO_PARTITION_TABLE) ? 0 : LO_FLAGS_PARTSCAN,
LOCK_SH,
&d);
if (r < 0)
return log_error_errno(r, "Failed to set up loopback device for %s: %m", img->path);
r = loop_device_flock(d, LOCK_SH);
if (r < 0)
return log_error_errno(r, "Failed to lock loopback device: %m");
r = dissect_image_and_warn(
d->fd,
img->path,
@ -585,7 +582,6 @@ static int merge_subprocess(Hashmap *images, const char *workspace) {
return log_error_errno(r, "Failed to relinquish DM devices: %m");
}
dissected_image_relinquish(m);
loop_device_relinquish(d);
break;
}

View File

@ -49,16 +49,13 @@ static void* thread_func(void *ptr) {
assert_se(mkdtemp_malloc(NULL, &mounted) >= 0);
r = loop_device_make(fd, O_RDONLY, 0, UINT64_MAX, LO_FLAGS_PARTSCAN, &loop);
r = loop_device_make(fd, O_RDONLY, 0, UINT64_MAX, LO_FLAGS_PARTSCAN, LOCK_SH, &loop);
if (r < 0)
log_error_errno(r, "Failed to allocate loopback device: %m");
assert_se(r >= 0);
log_notice("Acquired loop device %s, will mount on %s", loop->node, mounted);
/* Let's make sure udev doesn't call BLKRRPART in the background, while we try to mount the device. */
assert_se(loop_device_flock(loop, LOCK_SH) >= 0);
r = dissect_image(loop->fd, NULL, NULL, loop->diskseq, loop->uevent_seqnum_not_before, loop->timestamp_not_before, DISSECT_IMAGE_READ_ONLY, &dissected);
if (r < 0)
log_error_errno(r, "Failed dissect loopback device %s: %m", loop->node);
@ -215,7 +212,7 @@ static int run(int argc, char *argv[]) {
assert_se(pclose(sfdisk) == 0);
sfdisk = NULL;
assert_se(loop_device_make(fd, O_RDWR, 0, UINT64_MAX, LO_FLAGS_PARTSCAN, &loop) >= 0);
assert_se(loop_device_make(fd, O_RDWR, 0, UINT64_MAX, LO_FLAGS_PARTSCAN, LOCK_EX, &loop) >= 0);
#if HAVE_BLKID
_cleanup_(dissected_image_unrefp) DissectedImage *dissected = NULL;
@ -223,11 +220,6 @@ static int run(int argc, char *argv[]) {
pthread_t threads[arg_n_threads];
sd_id128_t id;
/* Take an explicit lock while we format the file systems, in accordance with
* https://systemd.io/BLOCK_DEVICE_LOCKING/. We don't want udev to interfere and probe while we write
* or even issue BLKRRPART or similar while we are working on this. */
assert_se(loop_device_flock(loop, LOCK_EX) >= 0);
assert_se(dissect_image(loop->fd, NULL, NULL, loop->diskseq, loop->uevent_seqnum_not_before, loop->timestamp_not_before, 0, &dissected) >= 0);
assert_se(dissected->partitions[PARTITION_ESP].found);