From 8a050905a30ff3a5108a8591130e27a1cb17b90c Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Wed, 23 Nov 2022 10:26:33 +0100 Subject: [PATCH 1/3] repart: Move comment --- src/partition/repart.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/partition/repart.c b/src/partition/repart.c index f6b8c9be65f..04cf66ec139 100644 --- a/src/partition/repart.c +++ b/src/partition/repart.c @@ -3721,6 +3721,10 @@ static int do_copy_files(Partition *p, const char *root, const Set *denylist) { if (pfd < 0) return log_error_errno(pfd, "Failed to open parent directory of target: %m"); + /* Make sure everything is owned by the user running repart so that + * make_filesystem() can map the user running repart to "root" in a user + * namespace to have the files owned by root in the final image. */ + r = copy_tree_at( sfd, ".", pfd, fn, @@ -3813,10 +3817,6 @@ static int partition_populate_directory(Partition *p, const Set *denylist, char if (fchmod(rfd, 0755) < 0) return log_error_errno(errno, "Failed to change mode of temporary directory: %m"); - /* Make sure everything is owned by the user running repart so that make_filesystem() can map the - * user running repart to "root" in a user namespace to have the files owned by root in the final - * image. */ - r = do_copy_files(p, root, denylist); if (r < 0) return r; From d2ac7698cb43807a2dd0af727599db486180ebf1 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Wed, 23 Nov 2022 12:07:27 +0100 Subject: [PATCH 2/3] repart: Make sure all files in the image are owned by root --- src/partition/repart.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/src/partition/repart.c b/src/partition/repart.c index 04cf66ec139..20772020ef7 100644 --- a/src/partition/repart.c +++ b/src/partition/repart.c @@ -3678,7 +3678,12 @@ static int context_copy_blocks(Context *context) { return 0; } -static int do_copy_files(Partition *p, const char *root, const Set *denylist) { +static int do_copy_files( + Partition *p, + const char *root, + uid_t override_uid, + gid_t override_gid, + const Set *denylist) { int r; @@ -3728,14 +3733,14 @@ static int do_copy_files(Partition *p, const char *root, const Set *denylist) { r = copy_tree_at( sfd, ".", pfd, fn, - getuid(), getgid(), + override_uid, override_gid, COPY_REFLINK|COPY_HOLES|COPY_MERGE|COPY_REPLACE|COPY_SIGINT|COPY_HARDLINKS|COPY_ALL_XATTRS, denylist); } else r = copy_tree_at( sfd, ".", tfd, ".", - getuid(), getgid(), + override_uid, override_gid, COPY_REFLINK|COPY_HOLES|COPY_MERGE|COPY_REPLACE|COPY_SIGINT|COPY_HARDLINKS|COPY_ALL_XATTRS, denylist); if (r < 0) @@ -3773,6 +3778,9 @@ static int do_copy_files(Partition *p, const char *root, const Set *denylist) { if (r < 0) return log_error_errno(r, "Failed to copy '%s' to '%s%s': %m", *source, strempty(arg_root), *target); + if (fchown(tfd, override_uid, override_gid) < 0) + return log_error_errno(r, "Failed to change ownership of %s", *target); + (void) copy_xattr(sfd, tfd, COPY_ALL_XATTRS); (void) copy_access(sfd, tfd); (void) copy_times(sfd, tfd, 0); @@ -3782,7 +3790,7 @@ static int do_copy_files(Partition *p, const char *root, const Set *denylist) { return 0; } -static int do_make_directories(Partition *p, const char *root) { +static int do_make_directories(Partition *p, uid_t override_uid, gid_t override_gid, const char *root) { int r; assert(p); @@ -3790,7 +3798,7 @@ static int do_make_directories(Partition *p, const char *root) { STRV_FOREACH(d, p->make_directories) { - r = mkdir_p_root(root, *d, getuid(), getgid(), 0755); + r = mkdir_p_root(root, *d, override_uid, override_gid, 0755); if (r < 0) return log_error_errno(r, "Failed to create directory '%s' in file system: %m", *d); } @@ -3817,11 +3825,11 @@ static int partition_populate_directory(Partition *p, const Set *denylist, char if (fchmod(rfd, 0755) < 0) return log_error_errno(errno, "Failed to change mode of temporary directory: %m"); - r = do_copy_files(p, root, denylist); + r = do_copy_files(p, root, getuid(), getgid(), denylist); if (r < 0) return r; - r = do_make_directories(p, root); + r = do_make_directories(p, getuid(), getgid(), root); if (r < 0) return r; @@ -3873,10 +3881,10 @@ static int partition_populate_filesystem(Partition *p, const char *node, const S if (mount_nofollow_verbose(LOG_ERR, node, fs, p->format, MS_NOATIME|MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL) < 0) _exit(EXIT_FAILURE); - if (do_copy_files(p, fs, denylist) < 0) + if (do_copy_files(p, fs, 0, 0, denylist) < 0) _exit(EXIT_FAILURE); - if (do_make_directories(p, fs) < 0) + if (do_make_directories(p, 0, 0, fs) < 0) _exit(EXIT_FAILURE); r = syncfs_path(AT_FDCWD, fs); From d3201eb4f4680f244ac422b5557f14f34732b135 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Wed, 23 Nov 2022 12:00:01 +0100 Subject: [PATCH 3/3] repart: Prefer using loop devices to populate filesystems when available Let's make sure we use loop devices if we have access to them and only fall back to regular files if we can't use loop devices. We prefer loop devices because when using mkfs --root options, we have to populate a temporary staging tree which means we're copying every file twice instead of once when using loop devices. --- src/partition/repart.c | 192 +++++++++++++++++++++++------------------ 1 file changed, 108 insertions(+), 84 deletions(-) diff --git a/src/partition/repart.c b/src/partition/repart.c index 20772020ef7..7857f7b3d19 100644 --- a/src/partition/repart.c +++ b/src/partition/repart.c @@ -3037,6 +3037,36 @@ static PartitionTarget *partition_target_free(PartitionTarget *t) { DEFINE_TRIVIAL_CLEANUP_FUNC(PartitionTarget*, partition_target_free); +static int prepare_temporary_file(PartitionTarget *t, uint64_t size) { + _cleanup_(unlink_and_freep) char *temp = NULL; + _cleanup_close_ int fd = -1; + const char *vt; + int r; + + assert(t); + + r = var_tmp_dir(&vt); + if (r < 0) + return log_error_errno(r, "Could not determine temporary directory: %m"); + + temp = path_join(vt, "repart-XXXXXX"); + if (!temp) + return log_oom(); + + fd = mkostemp_safe(temp); + if (fd < 0) + return log_error_errno(fd, "Failed to create temporary file: %m"); + + if (ftruncate(fd, size) < 0) + return log_error_errno(errno, "Failed to truncate temporary file to %s: %m", + FORMAT_BYTES(size)); + + t->fd = TAKE_FD(fd); + t->path = TAKE_PTR(temp); + + return 0; +} + static int partition_target_prepare( Context *context, Partition *p, @@ -3045,9 +3075,8 @@ static int partition_target_prepare( PartitionTarget **ret) { _cleanup_(partition_target_freep) PartitionTarget *t = NULL; - struct stat st; - int whole_fd; - int r; + _cleanup_(loop_device_unrefp) LoopDevice *d = NULL; + int whole_fd, r; assert(context); assert(p); @@ -3055,16 +3084,6 @@ static int partition_target_prepare( assert_se((whole_fd = fdisk_get_devfd(context->fdisk_context)) >= 0); - if (fstat(whole_fd, &st) < 0) - return -errno; - - /* If we're operating on a block device, we definitely need privileges to access block devices so we - * can just use loop devices as our target. Otherwise, we're operating on a regular file, in that - * case, let's write to regular files and copy those into the final image so we can run without root - * privileges. On filesystems with reflinking support, we can take advantage of this and just reflink - * the result into the image. - */ - t = new(PartitionTarget, 1); if (!t) return log_oom(); @@ -3073,47 +3092,38 @@ static int partition_target_prepare( .whole_fd = -1, }; - if (S_ISBLK(st.st_mode) || (p->format && !mkfs_supports_root_option(p->format))) { - _cleanup_(loop_device_unrefp) LoopDevice *d = NULL; - - /* Loopback block devices are not only useful to turn regular files into block devices, but - * also to cut out sections of block devices into new block devices. */ - - r = loop_device_make(whole_fd, O_RDWR, p->offset, size, 0, 0, LOCK_EX, &d); - if (r < 0) - return log_error_errno(r, "Failed to make loopback device of future partition %" PRIu64 ": %m", p->partno); - - t->loop = TAKE_PTR(d); - } else if (need_path) { - _cleanup_(unlink_and_freep) char *temp = NULL; - _cleanup_close_ int fd = -1; - const char *vt; - - r = var_tmp_dir(&vt); - if (r < 0) - return log_error_errno(r, "Could not determine temporary directory: %m"); - - temp = path_join(vt, "repart-XXXXXX"); - if (!temp) - return log_oom(); - - fd = mkostemp_safe(temp); - if (fd < 0) - return log_error_errno(fd, "Failed to create temporary file: %m"); - - if (ftruncate(fd, size) < 0) - return log_error_errno(errno, "Failed to truncate temporary file to %s: %m", - FORMAT_BYTES(size)); - - t->fd = TAKE_FD(fd); - t->path = TAKE_PTR(temp); - } else { + if (!need_path) { if (lseek(whole_fd, p->offset, SEEK_SET) == (off_t) -1) return log_error_errno(errno, "Failed to seek to partition offset: %m"); t->whole_fd = whole_fd; + *ret = TAKE_PTR(t); + return 0; } + /* Loopback block devices are not only useful to turn regular files into block devices, but + * also to cut out sections of block devices into new block devices. */ + + r = loop_device_make(whole_fd, O_RDWR, p->offset, size, 0, 0, LOCK_EX, &d); + if (r < 0 && r != -ENOENT && !ERRNO_IS_PRIVILEGE(r)) + return log_error_errno(r, "Failed to make loopback device of future partition %" PRIu64 ": %m", p->partno); + if (r >= 0) { + t->loop = TAKE_PTR(d); + *ret = TAKE_PTR(t); + return 0; + } + + /* If we can't allocate a loop device, let's write to a regular file that we copy into the final + * image so we can run in containers and without needing root privileges. On filesystems with + * reflinking support, we can take advantage of this and just reflink the result into the image. + */ + + log_debug_errno(r, "No access to loop devices, falling back to a regular file"); + + r = prepare_temporary_file(t, size); + if (r < 0) + return r; + *ret = TAKE_PTR(t); return 0; @@ -3806,6 +3816,11 @@ static int do_make_directories(Partition *p, uid_t override_uid, gid_t override_ return 0; } +static bool partition_needs_populate(Partition *p) { + assert(p); + return !strv_isempty(p->copy_files) || !strv_isempty(p->make_directories); +} + static int partition_populate_directory(Partition *p, const Set *denylist, char **ret) { _cleanup_(rm_rf_physical_and_freep) char *root = NULL; _cleanup_close_ int rfd = -1; @@ -3813,11 +3828,6 @@ static int partition_populate_directory(Partition *p, const Set *denylist, char assert(ret); - if (strv_isempty(p->copy_files) && strv_isempty(p->make_directories)) { - *ret = NULL; - return 0; - } - rfd = mkdtemp_open("/var/tmp/repart-XXXXXX", 0, &root); if (rfd < 0) return log_error_errno(rfd, "Failed to create temporary directory: %m"); @@ -3838,27 +3848,11 @@ static int partition_populate_directory(Partition *p, const Set *denylist, char } static int partition_populate_filesystem(Partition *p, const char *node, const Set *denylist) { - _cleanup_(loop_device_unrefp) LoopDevice *d = NULL; - struct stat st; int r; assert(p); assert(node); - if (strv_isempty(p->copy_files) && strv_isempty(p->make_directories)) - return 0; - - if (stat(node, &st) < 0) - return log_error_errno(errno, "Failed to stat %s: %m", node); - - if (!S_ISBLK(st.st_mode)) { - r = loop_device_make_by_path(node, O_RDWR, 0, LOCK_EX, &d); - if (r < 0) - return log_error_errno(r, "Failed to make loopback device of %s: %m", node); - - node = d->node; - } - log_info("Populating %s filesystem with files.", p->format); /* We copy in a child process, since we have to mount the fs for that, and we don't want that fs to @@ -3987,11 +3981,16 @@ static int context_mkfs(Context *context) { log_info("Formatting future partition %" PRIu64 ".", p->partno); - /* We prefer (or are required in the case of read-only filesystems) to populate filesystems - * directly via the corresponding mkfs binary if it supports a --rootdir (or equivalent) - * option. To do that, we need to setup the final directory tree beforehand. */ + /* If we're not writing to a loop device or if we're populating a read-only filesystem, we + * have to populate using the filesystem's mkfs's --root (or equivalent) option. To do that, + * we need to set up the final directory tree beforehand. */ + + if (partition_needs_populate(p) && (!t->loop || fstype_is_ro(p->format))) { + if (!mkfs_supports_root_option(p->format)) + return log_error_errno(SYNTHETIC_ERRNO(ENODEV), + "Loop device access is required to populate %s filesystems.", + p->format); - if (mkfs_supports_root_option(p->format)) { r = partition_populate_directory(p, denylist, &root); if (r < 0) return r; @@ -4004,9 +4003,11 @@ static int context_mkfs(Context *context) { log_info("Successfully formatted future partition %" PRIu64 ".", p->partno); - /* Now, we can populate all the other filesystems that we couldn't populate earlier. */ - if (!mkfs_supports_root_option(p->format)) { - r = partition_populate_filesystem(p, partition_target_path(t), denylist); + /* If we're writing to a loop device, we can now mount the empty filesystem and populate it. */ + if (partition_needs_populate(p) && !root) { + assert(t->loop); + + r = partition_populate_filesystem(p, t->loop->node, denylist); if (r < 0) return r; } @@ -5251,6 +5252,7 @@ static int context_minimize(Context *context) { LIST_FOREACH(partitions, p, context->partitions) { _cleanup_(rm_rf_physical_and_freep) char *root = NULL; _cleanup_(unlink_and_freep) char *temp = NULL; + _cleanup_(loop_device_unrefp) LoopDevice *d = NULL; _cleanup_close_ int fd = -1; sd_id128_t fs_uuid; uint64_t fsz; @@ -5267,6 +5269,9 @@ static int context_minimize(Context *context) { if (!p->minimize) continue; + if (!partition_needs_populate(p)) + continue; + assert(!p->copy_blocks_path); r = tempfn_random_child(vt, "repart", &temp); @@ -5281,11 +5286,15 @@ static int context_minimize(Context *context) { return log_error_errno(errno, "Failed to open temporary file %s: %m", temp); /* This may seem huge but it will be created sparse so it doesn't take up any space - * on disk until written to. */ + * on disk until written to. */ if (ftruncate(fd, 1024ULL * 1024ULL * 1024ULL * 1024ULL) < 0) return log_error_errno(errno, "Failed to truncate temporary file to %s: %m", FORMAT_BYTES(1024ULL * 1024ULL * 1024ULL * 1024ULL)); + r = loop_device_make(fd, O_RDWR, 0, UINT64_MAX, 0, 0, LOCK_EX, &d); + if (r < 0 && r != -ENOENT && !ERRNO_IS_PRIVILEGE(r)) + return log_error_errno(r, "Failed to make loopback device of %s: %m", temp); + /* We're going to populate this filesystem twice so use a random UUID the first time * to avoid UUID conflicts. */ r = sd_id128_randomize(&fs_uuid); @@ -5293,13 +5302,18 @@ static int context_minimize(Context *context) { return r; } - if (mkfs_supports_root_option(p->format)) { + if (!d || fstype_is_ro(p->format)) { + if (!mkfs_supports_root_option(p->format)) + return log_error_errno(SYNTHETIC_ERRNO(ENODEV), + "Loop device access is required to populate %s filesystems", + p->format); + r = partition_populate_directory(p, denylist, &root); if (r < 0) return r; } - r = make_filesystem(temp, p->format, strempty(p->new_label), root, fs_uuid, arg_discard); + r = make_filesystem(d ? d->node : temp, p->format, strempty(p->new_label), root, fs_uuid, arg_discard); if (r < 0) return r; @@ -5310,8 +5324,10 @@ static int context_minimize(Context *context) { continue; } - if (!mkfs_supports_root_option(p->format)) { - r = partition_populate_filesystem(p, temp, denylist); + if (!root) { + assert(d); + + r = partition_populate_filesystem(p, d->node, denylist); if (r < 0) return r; } @@ -5336,6 +5352,8 @@ static int context_minimize(Context *context) { if (minimal_size_by_fs_name(p->format) != UINT64_MAX) fsz = MAX(minimal_size_by_fs_name(p->format), fsz); + d = loop_device_unref(d); + /* Erase the previous filesystem first. */ if (ftruncate(fd, 0)) return log_error_errno(errno, "Failed to erase temporary file: %m"); @@ -5343,12 +5361,18 @@ static int context_minimize(Context *context) { if (ftruncate(fd, fsz)) return log_error_errno(errno, "Failed to truncate temporary file to %s: %m", FORMAT_BYTES(fsz)); - r = make_filesystem(temp, p->format, strempty(p->new_label), root, p->fs_uuid, arg_discard); + r = loop_device_make(fd, O_RDWR, 0, UINT64_MAX, 0, 0, LOCK_EX, &d); + if (r < 0 && r != -ENOENT && !ERRNO_IS_PRIVILEGE(r)) + return log_error_errno(r, "Failed to make loopback device of %s: %m", temp); + + r = make_filesystem(d ? d->node : temp, p->format, strempty(p->new_label), root, p->fs_uuid, arg_discard); if (r < 0) return r; - if (!mkfs_supports_root_option(p->format)) { - r = partition_populate_filesystem(p, temp, denylist); + if (!root) { + assert(d); + + r = partition_populate_filesystem(p, d->node, denylist); if (r < 0) return r; }