lib/deploy: Use fallocate for early prune space check

The `f_bfree` member of the `statvfs` struct is documented as the
"number of free blocks". However, different filesystems have different
interpretations of this. E.g. on XFS, this is truly the number of blocks
free for allocating data. On ext4 however, it includes blocks that
are actually reserved by the filesystem and cannot be used for file
data. (Note this is separate from the distinction between `f_bfree` and
`f_bavail` which isn't relevant to us here since we're privileged.)

If a kernel and initrd is sized just right so that it's still within the
`f_bfree` limit but above what we can actually allocate, the early prune
code won't kick in since it'll think that there is enough space. So we
end up hitting `ENOSPC` when we actually copy the files in.

Rework the early prune code to instead use `fallocate` which guarantees
us that a file of a certain size can fit on the filesystem. `fallocate`
requires filesystem support, but all the filesystems we care about for
the bootfs support it (including even FAT).

(There's technically a TOCTOU race here that existed also with the
`statvfs` code where free space could change between when we check
and when we copy. Ideally we'd be able to pass down that fd to the
copying bits, but anyway in practice the bootfs is pretty much owned by
libostree and one doesn't expect concurrent writes during a finalization
operation.)
This commit is contained in:
Jonathan Lebon 2023-05-27 10:37:30 -04:00
parent 76649127d1
commit 193ef29f3f
2 changed files with 94 additions and 18 deletions

View File

@ -2441,6 +2441,30 @@ get_kernel_layout_size (OstreeSysroot *self, OstreeDeployment *deployment, guint
return TRUE;
}
/* This is a roundabout but more trustworthy way of doing a space check than
* relying on statvfs's f_bfree when you know the size of the objects. */
static gboolean
dfd_fallocate_check (int dfd, __off_t len, gboolean *out_passed, GError **error)
{
g_auto (GLnxTmpfile) tmpf = {
0,
};
if (!glnx_open_tmpfile_linkable_at (dfd, ".", O_WRONLY | O_CLOEXEC, &tmpf, error))
return FALSE;
*out_passed = TRUE;
/* There's glnx_try_fallocate, but not with the same error semantics. */
if (TEMP_FAILURE_RETRY (fallocate (tmpf.fd, 0, 0, len)) < 0)
{
if (G_IN_SET (errno, ENOSYS, EOPNOTSUPP))
return TRUE;
else if (errno != ENOSPC)
return glnx_throw_errno_prefix (error, "fallocate");
*out_passed = FALSE;
}
return TRUE;
}
/* Analyze /boot and figure out if the new deployments won't fit in the
* remaining space. If they won't, check if deleting the deployments that are
* getting rotated out (e.g. the current rollback) would free up sufficient
@ -2553,16 +2577,17 @@ auto_early_prune_old_deployments (OstreeSysroot *self, GPtrArray *new_deployment
net_new_bootcsum_dirs_total_size += bootdir_size;
}
/* get bootfs free space */
struct statvfs stvfsbuf;
if (TEMP_FAILURE_RETRY (fstatvfs (self->boot_fd, &stvfsbuf)) < 0)
return glnx_throw_errno_prefix (error, "fstatvfs(boot)");
{
gboolean bootfs_has_space = FALSE;
if (!dfd_fallocate_check (self->boot_fd, net_new_bootcsum_dirs_total_size, &bootfs_has_space,
error))
return glnx_prefix_error (error, "Checking if bootfs has space");
guint64 available_size = stvfsbuf.f_bsize * stvfsbuf.f_bfree;
/* does the bootfs have enough free space for net-new bootdirs? */
if (net_new_bootcsum_dirs_total_size <= available_size)
return TRUE; /* nothing to do! */
/* does the bootfs have enough free space for temporarily holding both the new
* and old bootdirs? */
if (bootfs_has_space)
return TRUE; /* nothing to do! */
}
/* OK, we would fail if we tried to write the new bootdirs. Is it salvageable?
* First, calculate how much space we could save with the bootcsums scheduled
@ -2574,12 +2599,23 @@ auto_early_prune_old_deployments (OstreeSysroot *self, GPtrArray *new_deployment
bootcsum_dirs_to_remove_total_size += GPOINTER_TO_UINT (sizep);
}
if (net_new_bootcsum_dirs_total_size > (available_size + bootcsum_dirs_to_remove_total_size))
if (net_new_bootcsum_dirs_total_size > bootcsum_dirs_to_remove_total_size)
{
/* Even if we auto-pruned, the new bootdirs wouldn't fit. Just let the
* code continue and let it hit ENOSPC. */
g_printerr ("Disabling auto-prune optimization; insufficient space left in bootfs\n");
return TRUE;
/* Check whether if we did early prune, we'd have enough space to write
* the new bootcsum dirs. */
gboolean bootfs_has_space = FALSE;
if (!dfd_fallocate_check (
self->boot_fd, net_new_bootcsum_dirs_total_size - bootcsum_dirs_to_remove_total_size,
&bootfs_has_space, error))
return glnx_prefix_error (error, "Checking if bootfs has space");
if (!bootfs_has_space)
{
/* Even if we auto-pruned, the new bootdirs wouldn't fit. Just let the
* code continue and let it hit ENOSPC. */
g_printerr ("Disabling auto-prune optimization; insufficient space left in bootfs\n");
return TRUE;
}
}
g_printerr ("Insufficient space left in bootfs; updating bootloader in two steps\n");

View File

@ -9,9 +9,9 @@ set -xeuo pipefail
cd /root
mkdir -p rootfs/usr/lib/modules/`uname -r`
cp /usr/lib/modules/`uname -r`/vmlinuz rootfs/usr/lib/modules/`uname -r`
echo 1 >> rootfs/usr/lib/modules/`uname -r`/vmlinuz
dd if=/dev/urandom of=rootfs/usr/lib/modules/`uname -r`/vmlinuz count=1 conv=notrunc status=none
ostree commit --base "${host_refspec}" -P --tree=dir=rootfs -b modkernel1
echo 1 >> rootfs/usr/lib/modules/`uname -r`/vmlinuz
dd if=/dev/urandom of=rootfs/usr/lib/modules/`uname -r`/vmlinuz count=1 conv=notrunc status=none
ostree commit --base "${host_refspec}" -P --tree=dir=rootfs -b modkernel2
assert_bootfs_has_n_bootcsum_dirs() {
@ -25,8 +25,9 @@ assert_bootfs_has_n_bootcsum_dirs() {
}
consume_bootfs_space() {
local free_blocks=$(stat --file-system /boot -c '%a')
local block_size=$(stat --file-system /boot -c '%s')
local free_blocks block_size
free_blocks=${1:-$(stat --file-system /boot -c '%a')}
block_size=$(stat --file-system /boot -c '%s')
# leave 1 block free
unshare -m bash -c \
"mount -o rw,remount /boot && \
@ -92,4 +93,43 @@ rm out.txt
assert_bootfs_has_n_bootcsum_dirs 2
assert_not_streq "$bootloader_orig" "$(sha256sum /boot/loader/entries/*)"
# This next test relies on the fact that FCOS currently uses ext4 for /boot.
# If that ever changes, we can reprovision boot to be ext4.
if [[ $(findmnt -no FSTYPE /boot) != ext4 ]]; then
assert_not_reached "/boot is not ext4"
fi
# Put modkernel2 in rollback position
rpm-ostree rollback
# Below, we test that a bootcsum dir sized below f_bfree but still large enough
# to not actually fit (because some filesystems like ext4 include reserved
# overhead in their f_bfree count for some reason) will still trigger the auto-
# prune logic.
unconsume_bootfs_space
# Size the bigfile just right so that the kernel+initrd will be just at the max
# limit according to f_bfree.
unshare -m bash -c \
"mount -o rw,remount /boot && \
cp /usr/lib/modules/`uname -r`/{vmlinuz,initramfs.img} /boot"
free_blocks=$(stat --file-system /boot -c '%f')
unshare -m bash -c \
"mount -o rw,remount /boot && rm /boot/{vmlinuz,initramfs.img}"
consume_bootfs_space "$((free_blocks))"
rpm-ostree rebase :modkernel1
if ostree admin finalize-staged |& tee out.txt; then
assert_not_reached "successfully wrote kernel without auto-pruning"
fi
assert_file_has_content out.txt "No space left on device"
rm out.txt
# now, try again but with auto-pruning enabled
rpm-ostree rebase :modkernel1
OSTREE_SYSROOT_OPTS=early-prune ostree admin finalize-staged |& tee out.txt
assert_file_has_content out.txt "updating bootloader in two steps"
rm out.txt
echo "ok bootfs auto-prune"