From 8d6a4d33e1c38864b064df39473f4e74ef949b36 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 19 Feb 2018 17:35:36 +0100 Subject: [PATCH 01/11] journal-file: refuse opening non-regular journal files Let's check the file node type when we open/stat journal files: refuse anything that is not a regular file... --- src/journal/journal-file.c | 6 ++++++ src/journal/sd-journal.c | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 3353b3a0d81..09c511b8b83 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -651,6 +651,12 @@ static int journal_file_fstat(JournalFile *f) { f->last_stat_usec = now(CLOCK_MONOTONIC); + /* Refuse dealing with with files that aren't regular */ + if (S_ISDIR(f->last_stat.st_mode)) + return -EISDIR; + if (!S_ISREG(f->last_stat.st_mode)) + return -EBADFD; + /* Refuse appending to files that are already deleted */ if (f->last_stat.st_nlink <= 0) return -EIDRM; diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index 4deee461c3f..8a591205c4a 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -2016,6 +2016,10 @@ _public_ int sd_journal_open_files_fd(sd_journal **ret, int fds[], unsigned n_fd goto fail; } + if (S_ISDIR(st.st_mode)) { + r = -EISDIR; + goto fail; + } if (!S_ISREG(st.st_mode)) { r = -EBADFD; goto fail; From 817b1c5b1e9af0a2cac9b895f1880ff61812f8cf Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 19 Feb 2018 17:37:47 +0100 Subject: [PATCH 02/11] journal-file: add O_NONBLOCK for paranoia when opening journal files --- src/journal/journal-file.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 09c511b8b83..1640a8baf09 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -3298,6 +3298,8 @@ int journal_file_open( goto fail; } } else { + assert(fd >= 0); + /* If we don't know the path, fill in something explanatory and vaguely useful */ if (asprintf(&f->path, "/proc/self/%i", fd) < 0) { r = -ENOMEM; @@ -3312,7 +3314,11 @@ int journal_file_open( } if (f->fd < 0) { - f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode); + /* We pass O_NONBLOCK here, so that in case somebody pointed us to some character device node or FIFO + * or so, we likely fail quickly than block for long. For regular files O_NONBLOCK has no effect, hence + * it doesn't hurt in that case. */ + + f->fd = open(f->path, f->flags|O_CLOEXEC|O_NONBLOCK, f->mode); if (f->fd < 0) { r = -errno; goto fail; @@ -3320,6 +3326,10 @@ int journal_file_open( /* fds we opened here by us should also be closed by us. */ f->close_fd = true; + + r = fd_nonblock(f->fd, false); + if (r < 0) + goto fail; } f->cache_fd = mmap_cache_add_fd(f->mmap, f->fd); From fc1813c0fe1f293909dbf4c6e67db0b86f476b8d Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 19 Feb 2018 17:40:01 +0100 Subject: [PATCH 03/11] =?UTF-8?q?sd-journal:=20rename=20add=5Ffile()=20?= =?UTF-8?q?=E2=86=92=20add=5Ffile=5Fby=5Fname()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's be more careful with the naming, and indicate that the function is about *named* journal files, and will validate the name as needed. (in opposition to add_any_file() which doesn't care about names) --- src/journal/sd-journal.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index 8a591205c4a..3062367385b 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -1333,7 +1333,11 @@ fail: return r; } -static int add_file(sd_journal *j, const char *prefix, const char *filename) { +static int add_file_by_name( + sd_journal *j, + const char *prefix, + const char *filename) { + const char *path; assert(j); @@ -1350,7 +1354,11 @@ static int add_file(sd_journal *j, const char *prefix, const char *filename) { return add_any_file(j, -1, path); } -static void remove_file(sd_journal *j, const char *prefix, const char *filename) { +static void remove_file_by_name( + sd_journal *j, + const char *prefix, + const char *filename) { + const char *path; JournalFile *f; @@ -1370,7 +1378,7 @@ static void remove_file_real(sd_journal *j, JournalFile *f) { assert(j); assert(f); - ordered_hashmap_remove(j->files, f->path); + (void) ordered_hashmap_remove(j->files, f->path); log_debug("File %s removed.", f->path); @@ -1463,8 +1471,9 @@ static void directory_enumerate(sd_journal *j, Directory *m, DIR *d) { assert(d); FOREACH_DIRENT_ALL(de, d, goto fail) { + if (dirent_is_journal_file(de)) - (void) add_file(j, m->path, de->d_name); + (void) add_file_by_name(j, m->path, de->d_name); if (m->is_root && dirent_is_id128_subdir(de)) (void) add_directory(j, m->path, de->d_name); @@ -2499,9 +2508,9 @@ static void process_inotify_event(sd_journal *j, struct inotify_event *e) { /* Event for a journal file */ if (e->mask & (IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB)) - (void) add_file(j, d->path, e->name); + (void) add_file_by_name(j, d->path, e->name); else if (e->mask & (IN_DELETE|IN_MOVED_FROM|IN_UNMOUNT)) - remove_file(j, d->path, e->name); + remove_file_by_name(j, d->path, e->name); } else if (!d->is_root && e->len == 0) { From 9c66f528138f4fc856b3e9e137245b7048d5747d Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 19 Feb 2018 17:42:47 +0100 Subject: [PATCH 04/11] sd-journal: when picking up a new file, compare inode/device info with previous open file by same name Let's make sure we aren't confused if a journal file is replaced by a different one (for example due to rotation) if we are in a q overflow: let's compare the inode/device information, and if it changed replace any open file object as needed. Fixes: #8198 --- src/journal/sd-journal.c | 133 ++++++++++++++++++++++++++------------- 1 file changed, 91 insertions(+), 42 deletions(-) diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index 3062367385b..7e1fda85964 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -1243,6 +1243,16 @@ static bool path_has_prefix(sd_journal *j, const char *path, const char *prefix) return path_startswith(path, prefix); } +static void track_file_disposition(sd_journal *j, JournalFile *f) { + assert(j); + assert(f); + + if (!j->has_runtime_files && path_has_prefix(j, f->path, "/run")) + j->has_runtime_files = true; + else if (!j->has_persistent_files && path_has_prefix(j, f->path, "/var")) + j->has_persistent_files = true; +} + static const char *skip_slash(const char *p) { if (!p) @@ -1254,81 +1264,120 @@ static const char *skip_slash(const char *p) { return p; } -static int add_any_file(sd_journal *j, int fd, const char *path) { - JournalFile *f = NULL; +static int add_any_file( + sd_journal *j, + int fd, + const char *path) { + bool close_fd = false; + JournalFile *f; + struct stat st; int r, k; assert(j); assert(fd >= 0 || path); - if (path) { - f = ordered_hashmap_get(j->files, path); - if (f) { - /* Mark this file as seen in this generation. This is used to GC old files in - * process_q_overflow() to detect journal files that are still and discern them from those who - * are gone. */ - f->last_seen_generation = j->generation; - return 0; + if (fd < 0) { + if (j->toplevel_fd >= 0) + /* If there's a top-level fd defined make the path relative, explicitly, since otherwise + * openat() ignores the first argument. */ + + fd = openat(j->toplevel_fd, skip_slash(path), O_RDONLY|O_CLOEXEC|O_NONBLOCK); + else + fd = open(path, O_RDONLY|O_CLOEXEC|O_NONBLOCK); + if (fd < 0) { + r = log_debug_errno(errno, "Failed to open journal file %s: %m", path); + goto finish; } + + close_fd = true; + + r = fd_nonblock(fd, false); + if (r < 0) { + r = log_debug_errno(errno, "Failed to turn off O_NONBLOCK for %s: %m", path); + goto finish; + } + } + + if (fstat(fd, &st) < 0) { + r = log_debug_errno(errno, "Failed to fstat file '%s': %m", path); + goto finish; + } + if (S_ISDIR(st.st_mode)) { + log_debug("Uh, file '%s' is a directory? Refusing.", path); + r = -EISDIR; + goto finish; + } + if (!S_ISREG(st.st_mode)) { + log_debug("Uh, file '%s' is not a regular file? Refusing.", path); + r = -EBADFD; + goto finish; + } + + f = ordered_hashmap_get(j->files, path); + if (f) { + if (f->last_stat.st_dev == st.st_dev && + f->last_stat.st_ino == st.st_ino) { + + /* We already track this file, under the same path and with the same device/inode numbers, it's + * hence really the same. Mark this file as seen in this generation. This is used to GC old + * files in process_q_overflow() to detect journal files that are still there and discern them + * from those which are gone. */ + + f->last_seen_generation = j->generation; + r = 0; + goto finish; + } + + /* So we tracked a file under this name, but it has a different inode/device. In that case, it got + * replaced (probably due to rotation?), let's drop it hence from our list. */ + remove_file_real(j, f); + f = NULL; } if (ordered_hashmap_size(j->files) >= JOURNAL_FILES_MAX) { log_debug("Too many open journal files, not adding %s.", path); r = -ETOOMANYREFS; - goto fail; - } - - if (fd < 0 && j->toplevel_fd >= 0) { - - /* If there's a top-level fd defined, open the file relative to this now. (Make the path relative, - * explicitly, since otherwise openat() ignores the first argument.) */ - - fd = openat(j->toplevel_fd, skip_slash(path), O_RDONLY|O_CLOEXEC); - if (fd < 0) { - r = log_debug_errno(errno, "Failed to open journal file %s: %m", path); - goto fail; - } - - close_fd = true; + goto finish; } r = journal_file_open(fd, path, O_RDONLY, 0, false, false, NULL, j->mmap, NULL, NULL, &f); if (r < 0) { - if (close_fd) - safe_close(fd); log_debug_errno(r, "Failed to open journal file %s: %m", path); - goto fail; + goto finish; } /* journal_file_dump(f); */ r = ordered_hashmap_put(j->files, f->path, f); if (r < 0) { - f->close_fd = close_fd; + f->close_fd = false; /* make sure journal_file_close() doesn't close the caller's fd (or our own). We'll let the caller do that, or ourselves */ (void) journal_file_close(f); - goto fail; + goto finish; } + close_fd = false; /* the fd is now owned by the JournalFile object */ + f->last_seen_generation = j->generation; - if (!j->has_runtime_files && path_has_prefix(j, f->path, "/run")) - j->has_runtime_files = true; - else if (!j->has_persistent_files && path_has_prefix(j, f->path, "/var")) - j->has_persistent_files = true; - - log_debug("File %s added.", f->path); - + track_file_disposition(j, f); check_network(j, f->fd); j->current_invalidate_counter++; - return 0; + log_debug("File %s added.", f->path); -fail: - k = journal_put_error(j, r, path); - if (k < 0) - return k; + r = 0; + +finish: + if (close_fd) + safe_close(fd); + + if (r < 0) { + k = journal_put_error(j, r, path); + if (k < 0) + return k; + } return r; } From 3cc44114038621237a9d8ee4be3ff836138a55c1 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 19 Feb 2018 18:01:05 +0100 Subject: [PATCH 05/11] stat-util: unify code that checks whether something is a regular file Let's add a common implementation for regular file checks, that are careful to return the right error code (EISDIR/EISLNK/EBADFD) when we are encountering a wrong file node. --- src/basic/btrfs-util.c | 37 +++++++++++++------------------------ src/basic/stat-util.c | 29 +++++++++++++++++++++++++++++ src/basic/stat-util.h | 3 +++ src/import/export-raw.c | 6 ++++-- src/journal/journal-file.c | 10 ++++++---- src/journal/sd-journal.c | 21 ++++++--------------- src/shared/install.c | 17 +++++++---------- src/shared/loop-util.c | 8 +++++--- 8 files changed, 73 insertions(+), 58 deletions(-) diff --git a/src/basic/btrfs-util.c b/src/basic/btrfs-util.c index 19d385ab7c5..3d30497f74a 100644 --- a/src/basic/btrfs-util.c +++ b/src/basic/btrfs-util.c @@ -232,23 +232,18 @@ int btrfs_subvol_get_read_only_fd(int fd) { } int btrfs_reflink(int infd, int outfd) { - struct stat st; int r; assert(infd >= 0); assert(outfd >= 0); - /* Make sure we invoke the ioctl on a regular file, so that no - * device driver accidentally gets it. */ + /* Make sure we invoke the ioctl on a regular file, so that no device driver accidentally gets it. */ - if (fstat(outfd, &st) < 0) - return -errno; - - if (!S_ISREG(st.st_mode)) - return -EINVAL; - - r = ioctl(outfd, BTRFS_IOC_CLONE, infd); + r = fd_verify_regular(outfd); if (r < 0) + return r; + + if (ioctl(outfd, BTRFS_IOC_CLONE, infd) < 0) return -errno; return 0; @@ -261,21 +256,17 @@ int btrfs_clone_range(int infd, uint64_t in_offset, int outfd, uint64_t out_offs .src_length = sz, .dest_offset = out_offset, }; - struct stat st; int r; assert(infd >= 0); assert(outfd >= 0); assert(sz > 0); - if (fstat(outfd, &st) < 0) - return -errno; - - if (!S_ISREG(st.st_mode)) - return -EINVAL; - - r = ioctl(outfd, BTRFS_IOC_CLONE_RANGE, &args); + r = fd_verify_regular(outfd); if (r < 0) + return r; + + if (ioctl(outfd, BTRFS_IOC_CLONE_RANGE, &args) < 0) return -errno; return 0; @@ -760,15 +751,13 @@ int btrfs_subvol_get_subtree_quota(const char *path, uint64_t subvol_id, BtrfsQu } int btrfs_defrag_fd(int fd) { - struct stat st; + int r; assert(fd >= 0); - if (fstat(fd, &st) < 0) - return -errno; - - if (!S_ISREG(st.st_mode)) - return -EINVAL; + r = fd_verify_regular(fd); + if (r < 0) + return r; if (ioctl(fd, BTRFS_IOC_DEFRAG, NULL) < 0) return -errno; diff --git a/src/basic/stat-util.c b/src/basic/stat-util.c index 0fb6750a075..3689f6e9833 100644 --- a/src/basic/stat-util.c +++ b/src/basic/stat-util.c @@ -269,3 +269,32 @@ int path_is_temporary_fs(const char *path) { return fd_is_temporary_fs(fd); } + +int stat_verify_regular(const struct stat *st) { + assert(st); + + /* Checks whether the specified stat() structure refers to a regular file. If not returns an appropriate error + * code. */ + + if (S_ISDIR(st->st_mode)) + return -EISDIR; + + if (S_ISLNK(st->st_mode)) + return -ELOOP; + + if (!S_ISREG(st->st_mode)) + return -EBADFD; + + return 0; +} + +int fd_verify_regular(int fd) { + struct stat st; + + assert(fd >= 0); + + if (fstat(fd, &st) < 0) + return -errno; + + return stat_verify_regular(&st); +} diff --git a/src/basic/stat-util.h b/src/basic/stat-util.h index da33e68db25..4b941f61042 100644 --- a/src/basic/stat-util.h +++ b/src/basic/stat-util.h @@ -75,3 +75,6 @@ int path_is_temporary_fs(const char *path); * signed/unsigned comparison, because the magic can be 32 bit unsigned. */ #define F_TYPE_EQUAL(a, b) (a == (typeof(a)) b) + +int stat_verify_regular(const struct stat *st); +int fd_verify_regular(int fd); diff --git a/src/import/export-raw.c b/src/import/export-raw.c index 8485027b2b5..eaa6d109157 100644 --- a/src/import/export-raw.c +++ b/src/import/export-raw.c @@ -37,6 +37,7 @@ #include "import-common.h" #include "missing.h" #include "ratelimit.h" +#include "stat-util.h" #include "string-util.h" #include "util.h" @@ -319,8 +320,9 @@ int raw_export_start(RawExport *e, const char *path, int fd, ImportCompressType if (fstat(sfd, &e->st) < 0) return -errno; - if (!S_ISREG(e->st.st_mode)) - return -ENOTTY; + r = stat_verify_regular(&e->st); + if (r < 0) + return r; /* Try to take a reflink snapshot of the file, if we can t make the export atomic */ tfd = reflink_snapshot(sfd, path); diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 1640a8baf09..96e45e92f5c 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -42,6 +42,7 @@ #include "random-util.h" #include "sd-event.h" #include "set.h" +#include "stat-util.h" #include "string-util.h" #include "strv.h" #include "xattr-util.h" @@ -643,6 +644,8 @@ static int journal_file_verify_header(JournalFile *f) { } static int journal_file_fstat(JournalFile *f) { + int r; + assert(f); assert(f->fd >= 0); @@ -652,10 +655,9 @@ static int journal_file_fstat(JournalFile *f) { f->last_stat_usec = now(CLOCK_MONOTONIC); /* Refuse dealing with with files that aren't regular */ - if (S_ISDIR(f->last_stat.st_mode)) - return -EISDIR; - if (!S_ISREG(f->last_stat.st_mode)) - return -EBADFD; + r = stat_verify_regular(&f->last_stat); + if (r < 0) + return r; /* Refuse appending to files that are already deleted */ if (f->last_stat.st_nlink <= 0) diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index 7e1fda85964..11dbd83f2de 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -1303,14 +1303,10 @@ static int add_any_file( r = log_debug_errno(errno, "Failed to fstat file '%s': %m", path); goto finish; } - if (S_ISDIR(st.st_mode)) { - log_debug("Uh, file '%s' is a directory? Refusing.", path); - r = -EISDIR; - goto finish; - } - if (!S_ISREG(st.st_mode)) { - log_debug("Uh, file '%s' is not a regular file? Refusing.", path); - r = -EBADFD; + + r = stat_verify_regular(&st); + if (r < 0) { + log_debug_errno(r, "Refusing to open '%s', as it is not a regular file.", path); goto finish; } @@ -2074,14 +2070,9 @@ _public_ int sd_journal_open_files_fd(sd_journal **ret, int fds[], unsigned n_fd goto fail; } - if (S_ISDIR(st.st_mode)) { - r = -EISDIR; + r = stat_verify_regular(&st); + if (r < 0) goto fail; - } - if (!S_ISREG(st.st_mode)) { - r = -EBADFD; - goto fail; - } r = add_any_file(j, fds[i], NULL); if (r < 0) diff --git a/src/shared/install.c b/src/shared/install.c index fdce447c89e..01e2ebf672a 100644 --- a/src/shared/install.c +++ b/src/shared/install.c @@ -1284,10 +1284,10 @@ static int unit_file_load( info->type = UNIT_FILE_TYPE_MASKED; return 0; } - if (S_ISDIR(st.st_mode)) - return -EISDIR; - if (!S_ISREG(st.st_mode)) - return -ENOTTY; + + r = stat_verify_regular(&st); + if (r < 0) + return r; f = fdopen(fd, "re"); if (!f) @@ -2163,12 +2163,9 @@ int unit_file_link( if (lstat(full, &st) < 0) return -errno; - if (S_ISLNK(st.st_mode)) - return -ELOOP; - if (S_ISDIR(st.st_mode)) - return -EISDIR; - if (!S_ISREG(st.st_mode)) - return -ENOTTY; + r = stat_verify_regular(&st); + if (r < 0) + return r; q = in_search_path(&paths, *i); if (q < 0) diff --git a/src/shared/loop-util.c b/src/shared/loop-util.c index 37b8479f881..0f3defd581a 100644 --- a/src/shared/loop-util.c +++ b/src/shared/loop-util.c @@ -27,6 +27,7 @@ #include "alloc-util.h" #include "fd-util.h" #include "loop-util.h" +#include "stat-util.h" int loop_device_make(int fd, int open_flags, LoopDevice **ret) { const struct loop_info64 info = { @@ -37,7 +38,7 @@ int loop_device_make(int fd, int open_flags, LoopDevice **ret) { _cleanup_free_ char *loopdev = NULL; struct stat st; LoopDevice *d; - int nr; + int nr, r; assert(fd >= 0); assert(ret); @@ -69,8 +70,9 @@ int loop_device_make(int fd, int open_flags, LoopDevice **ret) { return 0; } - if (!S_ISREG(st.st_mode)) - return -EINVAL; + r = stat_verify_regular(&st); + if (r < 0) + return r; control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK); if (control < 0) From 11b29a96e98054ed69a2314ed71a286e852fa24e Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 19 Feb 2018 18:23:38 +0100 Subject: [PATCH 06/11] fs-util: move fsync_directory_of_file() into generic code This function used by the journal code is pretty useful generically, let's move it to fs-util.c to make it useful for other code too. --- src/basic/fs-util.c | 30 ++++++++++++++++++++++++++++++ src/basic/fs-util.h | 2 ++ src/journal/journal-file.c | 34 +--------------------------------- src/test/test-fs-util.c | 10 ++++++++++ 4 files changed, 43 insertions(+), 33 deletions(-) diff --git a/src/basic/fs-util.c b/src/basic/fs-util.c index 47edcbb04f1..85c8070a1b2 100644 --- a/src/basic/fs-util.c +++ b/src/basic/fs-util.c @@ -967,3 +967,33 @@ int unlinkat_deallocate(int fd, const char *name, int flags) { return 0; } + +int fsync_directory_of_file(int fd) { + _cleanup_free_ char *path = NULL, *dn = NULL; + _cleanup_close_ int dfd = -1; + int r; + + r = fd_verify_regular(fd); + if (r < 0) + return r; + + r = fd_get_path(fd, &path); + if (r < 0) + return r; + + if (!path_is_absolute(path)) + return -EINVAL; + + dn = dirname_malloc(path); + if (!dn) + return -ENOMEM; + + dfd = open(dn, O_RDONLY|O_CLOEXEC|O_DIRECTORY); + if (dfd < 0) + return -errno; + + if (fsync(dfd) < 0) + return -errno; + + return 0; +} diff --git a/src/basic/fs-util.h b/src/basic/fs-util.h index acb83dfd83a..82d7e765b3e 100644 --- a/src/basic/fs-util.h +++ b/src/basic/fs-util.h @@ -107,3 +107,5 @@ DEFINE_TRIVIAL_CLEANUP_FUNC(char*, unlink_and_free); int access_fd(int fd, int mode); int unlinkat_deallocate(int fd, const char *name, int flags); + +int fsync_directory_of_file(int fd); diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 96e45e92f5c..98201236603 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -33,6 +33,7 @@ #include "chattr-util.h" #include "compress.h" #include "fd-util.h" +#include "fs-util.h" #include "journal-authenticate.h" #include "journal-def.h" #include "journal-file.h" @@ -454,39 +455,6 @@ static int journal_file_init_header(JournalFile *f, JournalFile *template) { return 0; } -static int fsync_directory_of_file(int fd) { - _cleanup_free_ char *path = NULL, *dn = NULL; - _cleanup_close_ int dfd = -1; - struct stat st; - int r; - - if (fstat(fd, &st) < 0) - return -errno; - - if (!S_ISREG(st.st_mode)) - return -EBADFD; - - r = fd_get_path(fd, &path); - if (r < 0) - return r; - - if (!path_is_absolute(path)) - return -EINVAL; - - dn = dirname_malloc(path); - if (!dn) - return -ENOMEM; - - dfd = open(dn, O_RDONLY|O_CLOEXEC|O_DIRECTORY); - if (dfd < 0) - return -errno; - - if (fsync(dfd) < 0) - return -errno; - - return 0; -} - static int journal_file_refresh_header(JournalFile *f) { sd_id128_t boot_id; int r; diff --git a/src/test/test-fs-util.c b/src/test/test-fs-util.c index 184a2a52c2c..ebcec4fcc55 100644 --- a/src/test/test-fs-util.c +++ b/src/test/test-fs-util.c @@ -552,6 +552,15 @@ static void test_unlinkat_deallocate(void) { assert_se(st.st_nlink == 0); } +static void test_fsync_directory_of_file(void) { + _cleanup_close_ int fd = -1; + + fd = open_tmpfile_unlinkable(NULL, O_RDWR); + assert_se(fd >= 0); + + assert_se(fsync_directory_of_file(fd) >= 0); +} + int main(int argc, char *argv[]) { test_unlink_noerrno(); test_get_files_in_directory(); @@ -562,6 +571,7 @@ int main(int argc, char *argv[]) { test_access_fd(); test_touch_file(); test_unlinkat_deallocate(); + test_fsync_directory_of_file(); return 0; } From 8ac2f74fb6bb798db1fb3f7a7bf97f2579e963c2 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 19 Feb 2018 18:24:36 +0100 Subject: [PATCH 07/11] tree-wide: make use of fsync_directory_of_file() all over the place Let's make use this at various places we call fsync(), to make things fully reliable, as the kernel devs suggest to first fsync() files and then fsync() the directories they are located in. --- src/basic/fileio.c | 4 ++++ src/boot/bootctl.c | 5 +++-- src/coredump/coredump.c | 2 ++ src/libsystemd/sd-id128/id128-util.c | 7 ++++++- 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/basic/fileio.c b/src/basic/fileio.c index 26d6174664e..29b941348aa 100644 --- a/src/basic/fileio.c +++ b/src/basic/fileio.c @@ -1187,6 +1187,10 @@ int fflush_sync_and_check(FILE *f) { if (fsync(fileno(f)) < 0) return -errno; + r = fsync_directory_of_file(fileno(f)); + if (r < 0) + return r; + return 0; } diff --git a/src/boot/bootctl.c b/src/boot/bootctl.c index ae034f5cdb1..37a5f1d6341 100644 --- a/src/boot/bootctl.c +++ b/src/boot/bootctl.c @@ -434,12 +434,13 @@ static int copy_file_with_version_check(const char *from, const char *to, bool f (void) copy_times(fd_from, fd_to); - r = fsync(fd_to); - if (r < 0) { + if (fsync(fd_to) < 0) { (void) unlink_noerrno(t); return log_error_errno(errno, "Failed to copy data from \"%s\" to \"%s\": %m", from, t); } + (void) fsync_directory_of_file(fd_to); + r = renameat(AT_FDCWD, t, AT_FDCWD, to); if (r < 0) { (void) unlink_noerrno(t); diff --git a/src/coredump/coredump.c b/src/coredump/coredump.c index 6900af9fb50..e924750d1b4 100644 --- a/src/coredump/coredump.c +++ b/src/coredump/coredump.c @@ -260,6 +260,8 @@ static int fix_permissions( if (fsync(fd) < 0) return log_error_errno(errno, "Failed to sync coredump %s: %m", coredump_tmpfile_name(filename)); + (void) fsync_directory_of_file(fd); + r = link_tmpfile(fd, filename, target); if (r < 0) return log_error_errno(r, "Failed to move coredump %s into place: %m", target); diff --git a/src/libsystemd/sd-id128/id128-util.c b/src/libsystemd/sd-id128/id128-util.c index a6e38578b16..8ce012e35fa 100644 --- a/src/libsystemd/sd-id128/id128-util.c +++ b/src/libsystemd/sd-id128/id128-util.c @@ -23,6 +23,7 @@ #include #include "fd-util.h" +#include "fs-util.h" #include "hexdecoct.h" #include "id128-util.h" #include "io-util.h" @@ -180,9 +181,13 @@ int id128_write_fd(int fd, Id128Format f, sd_id128_t id, bool do_sync) { if (do_sync) { if (fsync(fd) < 0) return -errno; + + r = fsync_directory_of_file(fd); + if (r < 0) + return r; } - return r; + return 0; } int id128_write(const char *p, Id128Format f, sd_id128_t id, bool do_sync) { From 7f7210c2102025500a68cfd42f31a4d97e4b0c6f Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 19 Feb 2018 18:37:50 +0100 Subject: [PATCH 08/11] io-util: add an unlikely decorator for a test that should never hold --- src/basic/io-util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/basic/io-util.c b/src/basic/io-util.c index 08ad42ed952..a6e34cb7625 100644 --- a/src/basic/io-util.c +++ b/src/basic/io-util.c @@ -137,7 +137,7 @@ int loop_write(int fd, const void *buf, size_t nbytes, bool do_poll) { assert(fd >= 0); assert(buf); - if (nbytes > (size_t) SSIZE_MAX) + if (_unlikely_(nbytes > (size_t) SSIZE_MAX)) return -EINVAL; do { From 8fc58f1ad3c32903b1747016a46069dc94f60d41 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 20 Feb 2018 11:40:07 +0100 Subject: [PATCH 09/11] journal-file: fix typo in log message --- src/journal/journal-file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 98201236603..707cfd8a0bb 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -3677,7 +3677,7 @@ void journal_default_metrics(JournalMetrics *m, int fd) { if (fstatvfs(fd, &ss) >= 0) fs_size = ss.f_frsize * ss.f_blocks; else { - log_debug_errno(errno, "Failed to detremine disk size: %m"); + log_debug_errno(errno, "Failed to determine disk size: %m"); fs_size = 0; } From 1133dea477b461a4ceaa8f9ceb58f536edccd8ec Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 20 Feb 2018 12:46:14 +0100 Subject: [PATCH 10/11] xattr-util: support AT_EMPTY_PATH in fgetxattrat_fake() Let's expose fstatat() like behaviour if AT_EMPTY_PATH is defined. Also, check the specified flags returning EINVAL on the flags we don't emulate. --- src/basic/xattr-util.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/basic/xattr-util.c b/src/basic/xattr-util.c index 12d04eeffb4..6f568ff0742 100644 --- a/src/basic/xattr-util.c +++ b/src/basic/xattr-util.c @@ -31,6 +31,7 @@ #include "macro.h" #include "sparse-endian.h" #include "stdio-util.h" +#include "string-util.h" #include "time-util.h" #include "xattr-util.h" @@ -111,11 +112,21 @@ ssize_t fgetxattrat_fake(int dirfd, const char *filename, const char *attribute, /* The kernel doesn't have a fgetxattrat() command, hence let's emulate one */ - fd = openat(dirfd, filename, O_CLOEXEC|O_PATH|(flags & AT_SYMLINK_NOFOLLOW ? O_NOFOLLOW : 0)); - if (fd < 0) - return -errno; + if (flags & ~(AT_SYMLINK_NOFOLLOW|AT_EMPTY_PATH)) + return -EINVAL; - xsprintf(fn, "/proc/self/fd/%i", fd); + if (isempty(filename)) { + if (!(flags & AT_EMPTY_PATH)) + return -EINVAL; + + xsprintf(fn, "/proc/self/fd/%i", dirfd); + } else { + fd = openat(dirfd, filename, O_CLOEXEC|O_PATH|(flags & AT_SYMLINK_NOFOLLOW ? O_NOFOLLOW : 0)); + if (fd < 0) + return -errno; + + xsprintf(fn, "/proc/self/fd/%i", fd); + } l = getxattr(fn, attribute, value, size); if (l < 0) From 4c2e1b399f5809f3ccad45fffd23d208ad9e8fa9 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 20 Feb 2018 12:48:33 +0100 Subject: [PATCH 11/11] xattr-util: use crtime/btime if statx() is available for implementation of fd_setcrtime() and friends MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Linux kernel exposes the birth time now for files through statx() hence make use of it where available. We keep the xattr logic in place for this however, since only a subset of file systems on Linux currently expose the birth time. NFS and tmpfs for example do not support it. OTOH there are other file systems that do support the birth time but might not support xattrs (smb…), hence make the best of the two, in particular in order to deal with journal files copied between file system types and to maintain compatibility with older file systems that are updated to newer version of the file system. --- meson.build | 6 +++ src/basic/missing.h | 41 ++++++++++++++++++ src/basic/missing_syscall.h | 23 ++++++++++ src/basic/xattr-util.c | 84 ++++++++++++++++++++++--------------- src/journal/journal-file.c | 17 +++----- src/test/test-xattr-util.c | 35 ++++++++++++++++ 6 files changed, 161 insertions(+), 45 deletions(-) diff --git a/meson.build b/meson.build index 6f1c44f8ca7..d4af95a44a9 100644 --- a/meson.build +++ b/meson.build @@ -449,6 +449,8 @@ decl_headers = ''' #include #include #include +#include +#include ''' # FIXME: key_serial_t is only defined in keyutils.h, this is bound to fail @@ -457,6 +459,7 @@ foreach decl : ['char16_t', 'key_serial_t', 'struct ethtool_link_settings', 'struct fib_rule_uid_range', + 'struct statx', ] # We get -1 if the size cannot be determined @@ -519,6 +522,9 @@ foreach ident : [ ['bpf', '''#include #include '''], ['explicit_bzero' , '''#include '''], + ['statx', '''#include + #include + #include '''], ] have = cc.has_function(ident[0], prefix : ident[1], args : '-D_GNU_SOURCE') diff --git a/src/basic/missing.h b/src/basic/missing.h index 2f68e3ae8a5..1cc3f08e489 100644 --- a/src/basic/missing.h +++ b/src/basic/missing.h @@ -34,10 +34,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include @@ -1372,4 +1374,43 @@ struct fib_rule_uid_range { #define PF_KTHREAD 0x00200000 #endif +#if ! HAVE_STRUCT_STATX +struct statx_timestamp { + int64_t tv_sec; + uint32_t tv_nsec; + uint32_t __reserved; +}; +struct statx { + uint32_t stx_mask; + uint32_t stx_blksize; + uint64_t stx_attributes; + uint32_t stx_nlink; + uint32_t stx_uid; + uint32_t stx_gid; + uint16_t stx_mode; + uint16_t __spare0[1]; + uint64_t stx_ino; + uint64_t stx_size; + uint64_t stx_blocks; + uint64_t stx_attributes_mask; + struct statx_timestamp stx_atime; + struct statx_timestamp stx_btime; + struct statx_timestamp stx_ctime; + struct statx_timestamp stx_mtime; + uint32_t stx_rdev_major; + uint32_t stx_rdev_minor; + uint32_t stx_dev_major; + uint32_t stx_dev_minor; + uint64_t __spare2[14]; +}; +#endif + +#ifndef STATX_BTIME +#define STATX_BTIME 0x00000800U +#endif + +#ifndef AT_STATX_DONT_SYNC +#define AT_STATX_DONT_SYNC 0x4000 +#endif + #include "missing_syscall.h" diff --git a/src/basic/missing_syscall.h b/src/basic/missing_syscall.h index c938d0d9767..2c4a87f31b1 100644 --- a/src/basic/missing_syscall.h +++ b/src/basic/missing_syscall.h @@ -383,3 +383,26 @@ static inline int bpf(int cmd, union bpf_attr *attr, size_t size) { # endif # endif #endif + +#if !HAVE_STATX +# ifndef __NR_statx +# if defined __i386__ +# define __NR_bpf 383 +# elif defined __x86_64__ +# define __NR_bpf 332 +# else +# warning "__NR_statx not defined for your architecture" +# endif +# endif + +struct statx; + +static inline ssize_t statx(int dfd, const char *filename, unsigned flags, unsigned int mask, struct statx *buffer) { +# ifdef __NR_statx + return syscall(__NR_statx, dfd, filename, flags, mask, buffer); +# else + errno = ENOSYS; + return -1; +# endif +} +#endif diff --git a/src/basic/xattr-util.c b/src/basic/xattr-util.c index 6f568ff0742..d806b550d8a 100644 --- a/src/basic/xattr-util.c +++ b/src/basic/xattr-util.c @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -29,6 +30,7 @@ #include "alloc-util.h" #include "fd-util.h" #include "macro.h" +#include "missing.h" #include "sparse-endian.h" #include "stdio-util.h" #include "string-util.h" @@ -148,52 +150,66 @@ static int parse_crtime(le64_t le, usec_t *usec) { return 0; } -int fd_getcrtime(int fd, usec_t *usec) { +int fd_getcrtime_at(int dirfd, const char *name, usec_t *ret, int flags) { + struct statx sx; + usec_t a, b; le64_t le; ssize_t n; + int r; - assert(fd >= 0); - assert(usec); + assert(ret); - /* Until Linux gets a real concept of birthtime/creation time, - * let's fake one with xattrs */ + if (flags & ~(AT_EMPTY_PATH|AT_SYMLINK_NOFOLLOW)) + return -EINVAL; - n = fgetxattr(fd, "user.crtime_usec", &le, sizeof(le)); - if (n < 0) - return -errno; - if (n != sizeof(le)) - return -EIO; + /* So here's the deal: the creation/birth time (crtime/btime) of a file is a relatively newly supported concept + * on Linux (or more strictly speaking: a concept that only recently got supported in the API, it was + * implemented on various file systems on the lower level since a while, but never was accessible). However, we + * needed a concept like that for vaccuuming algorithms and such, hence we emulated it via a user xattr for a + * long time. Starting with Linux 4.11 there's statx() which exposes the timestamp to userspace for the first + * time, where it is available. Thius function will read it, but it tries to keep some compatibility with older + * systems: we try to read both the crtime/btime and the xattr, and then use whatever is older. After all the + * concept is useful for determining how "old" a file really is, and hence using the older of the two makes + * most sense. */ - return parse_crtime(le, usec); -} - -int fd_getcrtime_at(int dirfd, const char *name, usec_t *usec, int flags) { - le64_t le; - ssize_t n; + if (statx(dirfd, strempty(name), flags|AT_STATX_DONT_SYNC, STATX_BTIME, &sx) >= 0 && + (sx.stx_mask & STATX_BTIME) && + sx.stx_btime.tv_sec != 0) + a = (usec_t) sx.stx_btime.tv_sec * USEC_PER_SEC + + (usec_t) sx.stx_btime.tv_nsec / NSEC_PER_USEC; + else + a = USEC_INFINITY; n = fgetxattrat_fake(dirfd, name, "user.crtime_usec", &le, sizeof(le), flags); if (n < 0) - return -errno; - if (n != sizeof(le)) - return -EIO; + r = -errno; + else if (n != sizeof(le)) + r = -EIO; + else + r = parse_crtime(le, &b); + if (r < 0) { + if (a != USEC_INFINITY) { + *ret = a; + return 0; + } - return parse_crtime(le, usec); + return r; + } + + if (a != USEC_INFINITY) + *ret = MIN(a, b); + else + *ret = b; + + return 0; } -int path_getcrtime(const char *p, usec_t *usec) { - le64_t le; - ssize_t n; +int fd_getcrtime(int fd, usec_t *ret) { + return fd_getcrtime_at(fd, NULL, ret, AT_EMPTY_PATH); +} - assert(p); - assert(usec); - - n = getxattr(p, "user.crtime_usec", &le, sizeof(le)); - if (n < 0) - return -errno; - if (n != sizeof(le)) - return -EIO; - - return parse_crtime(le, usec); +int path_getcrtime(const char *p, usec_t *ret) { + return fd_getcrtime_at(AT_FDCWD, p, ret, 0); } int fd_setcrtime(int fd, usec_t usec) { @@ -201,7 +217,7 @@ int fd_setcrtime(int fd, usec_t usec) { assert(fd >= 0); - if (usec <= 0) + if (IN_SET(usec, 0, USEC_INFINITY)) usec = now(CLOCK_REALTIME); le = htole64((uint64_t) usec); diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index 707cfd8a0bb..5643c0578d6 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -3316,17 +3316,12 @@ int journal_file_open( (void) journal_file_warn_btrfs(f); - /* Let's attach the creation time to the journal file, - * so that the vacuuming code knows the age of this - * file even if the file might end up corrupted one - * day... Ideally we'd just use the creation time many - * file systems maintain for each file, but there is - * currently no usable API to query this, hence let's - * emulate this via extended attributes. If extended - * attributes are not supported we'll just skip this, - * and rely solely on mtime/atime/ctime of the file. */ - - fd_setcrtime(f->fd, 0); + /* Let's attach the creation time to the journal file, so that the vacuuming code knows the age of this + * file even if the file might end up corrupted one day... Ideally we'd just use the creation time many + * file systems maintain for each file, but the API to query this is very new, hence let's emulate this + * via extended attributes. If extended attributes are not supported we'll just skip this, and rely + * solely on mtime/atime/ctime of the file. */ + (void) fd_setcrtime(f->fd, 0); #if HAVE_GCRYPT /* Try to load the FSPRG state, and if we can't, then diff --git a/src/test/test-xattr-util.c b/src/test/test-xattr-util.c index 01c371a39c8..17087a282db 100644 --- a/src/test/test-xattr-util.c +++ b/src/test/test-xattr-util.c @@ -26,6 +26,7 @@ #include "alloc-util.h" #include "fd-util.h" +#include "fileio.h" #include "fs-util.h" #include "macro.h" #include "string-util.h" @@ -63,8 +64,42 @@ cleanup: assert_se(rmdir(t) >= 0); } +static void test_getcrtime(void) { + + _cleanup_close_ int fd = -1; + char ts[FORMAT_TIMESTAMP_MAX]; + const char *vt; + usec_t usec, k; + int r; + + assert_se(tmp_dir(&vt) >= 0); + + fd = open_tmpfile_unlinkable(vt, O_RDWR); + assert_se(fd >= 0); + + r = fd_getcrtime(fd, &usec); + if (r < 0) + log_debug_errno(r, "btime: %m"); + else + log_debug("btime: %s", format_timestamp(ts, sizeof(ts), usec)); + + k = now(CLOCK_REALTIME); + + r = fd_setcrtime(fd, 1519126446UL * USEC_PER_SEC); + if (!IN_SET(r, -EOPNOTSUPP, -ENOTTY)) { + assert_se(fd_getcrtime(fd, &usec) >= 0); + assert_se(k < 1519126446UL * USEC_PER_SEC || + usec == 1519126446UL * USEC_PER_SEC); + } +} + int main(void) { + log_set_max_level(LOG_DEBUG); + log_parse_environment(); + log_open(); + test_fgetxattrat_fake(); + test_getcrtime(); return 0; }