From 8646b5d6e6019e95063a49fb3105833b07da5ebc Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Fri, 14 Jan 2022 16:41:28 +0000 Subject: [PATCH] shared: Copy holes in sparse files in copy_bytes_full() Previously, all holes in sparse files copied with copy_bytes_full() would be expanded in the target file. Now, we correctly detect holes in the input file and we replicate them in the target file. --- src/shared/copy.c | 41 +++++++++++++++++++++++++++++++++++++++++ src/shared/copy.h | 1 + 2 files changed, 42 insertions(+) diff --git a/src/shared/copy.c b/src/shared/copy.c index 1ace40424e5..6877b040a3a 100644 --- a/src/shared/copy.c +++ b/src/shared/copy.c @@ -202,6 +202,47 @@ int copy_bytes_full( if (max_bytes != UINT64_MAX && m > max_bytes) m = max_bytes; + if (copy_flags & COPY_HOLES) { + off_t c, e; + + c = lseek(fdf, 0, SEEK_CUR); + if (c < 0) + return -errno; + + /* To see if we're in a hole, we search for the next data offset. */ + e = lseek(fdf, c, SEEK_DATA); + if (e < 0 && errno == ENXIO) { + /* If errno == ENXIO, that means we've reached the final hole of the file and + * that hole isn't followed by more data. */ + e = lseek(fdf, 0, SEEK_END); + if (e < 0) + return -errno; + } else if (e < 0) + return -errno; + + /* If we're in a hole (current offset is not a data offset), create a hole of the same size + * in the target file. */ + if (e > c && lseek(fdt, e - c, SEEK_CUR) < 0) + return -errno; + + c = e; /* Set c to the start of the data segment. */ + + /* After copying a potential hole, find the end of the data segment by looking for the next + * hole. If we get ENXIO, we're at EOF. */ + e = lseek(fdf, c, SEEK_HOLE); + if (e < 0 && errno == ENXIO) + break; + else if (e < 0) + return -errno; + + /* SEEK_HOLE modifies the file offset so we need to move back to the initial offset. */ + if (lseek(fdf, c, SEEK_SET) < 0) + return -errno; + + /* Make sure we're not copying more than the current data segment. */ + m = MIN(m, (size_t) e - c); + } + /* First try copy_file_range(), unless we already tried */ if (try_cfr) { n = try_copy_file_range(fdf, NULL, fdt, NULL, m, 0u); diff --git a/src/shared/copy.h b/src/shared/copy.h index a7b45b4fbf4..d755916bd98 100644 --- a/src/shared/copy.h +++ b/src/shared/copy.h @@ -24,6 +24,7 @@ typedef enum CopyFlags { COPY_FSYNC_FULL = 1 << 11, /* fsync_full() after we are done */ COPY_SYNCFS = 1 << 12, /* syncfs() the *top-level* dir after we are done */ COPY_ALL_XATTRS = 1 << 13, /* Preserve all xattrs when copying, not just those in the user namespace */ + COPY_HOLES = 1 << 14, /* Copy holes */ } CopyFlags; typedef int (*copy_progress_bytes_t)(uint64_t n_bytes, void *userdata);