diff --git a/src/basic/copy.c b/src/basic/copy.c index 0673ecd4a2d..a9e1a87622a 100644 --- a/src/basic/copy.c +++ b/src/basic/copy.c @@ -239,7 +239,7 @@ static int fd_copy_regular( r = copy_bytes(fdf, fdt, (uint64_t) -1, copy_flags); if (r < 0) { - unlinkat(dt, to, 0); + (void) unlinkat(dt, to, 0); return r; } @@ -261,7 +261,7 @@ static int fd_copy_regular( if (q < 0) { r = -errno; - unlinkat(dt, to, 0); + (void) unlinkat(dt, to, 0); } return r; @@ -525,7 +525,7 @@ int copy_file(const char *from, const char *to, int flags, mode_t mode, unsigned r = copy_file_fd(from, fdt, copy_flags); if (r < 0) { close(fdt); - unlink(to); + (void) unlink(to); return r; } diff --git a/src/basic/fs-util.c b/src/basic/fs-util.c index cf856ae8e2c..c96c7d0d25c 100644 --- a/src/basic/fs-util.c +++ b/src/basic/fs-util.c @@ -887,3 +887,72 @@ int access_fd(int fd, int mode) { return r; } + +int unlinkat_deallocate(int fd, const char *name, int flags) { + _cleanup_close_ int truncate_fd = -1; + struct stat st; + off_t l, bs; + + /* Operates like unlinkat() but also deallocates the file contents if it is a regular file and there's no other + * link to it. This is useful to ensure that other processes that might have the file open for reading won't be + * able to keep the data pinned on disk forever. This call is particular useful whenever we execute clean-up + * jobs ("vacuuming"), where we want to make sure the data is really gone and the disk space released and + * returned to the free pool. + * + * Deallocation is preferably done by FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE (👊) if supported, which means + * the file won't change size. That's a good thing since we shouldn't needlessly trigger SIGBUS in other + * programs that have mmap()ed the file. (The assumption here is that changing file contents to all zeroes + * underneath those programs is the better choice than simply triggering SIGBUS in them which truncation does.) + * However if hole punching is not implemented in the kernel or file system we'll fall back to normal file + * truncation (đŸ”Ē), as our goal of deallocating the data space trumps our goal of being nice to readers (💐). + * + * Note that we attempt deallocation, but failure to succeed with that is not considered fatal, as long as the + * primary job – to delete the file – is accomplished. */ + + if ((flags & AT_REMOVEDIR) == 0) { + truncate_fd = openat(fd, name, O_WRONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW|O_NONBLOCK); + if (truncate_fd < 0) { + + /* If this failed because the file doesn't exist propagate the error right-away. Also, + * AT_REMOVEDIR wasn't set, and we tried to open the file for writing, which means EISDIR is + * returned when this is a directory but we are not supposed to delete those, hence propagate + * the error right-away too. */ + if (IN_SET(errno, ENOENT, EISDIR)) + return -errno; + + if (errno != ELOOP) /* don't complain if this is a symlink */ + log_debug_errno(errno, "Failed to open file '%s' for deallocation, ignoring: %m", name); + } + } + + if (unlinkat(fd, name, flags) < 0) + return -errno; + + if (truncate_fd < 0) /* Don't have a file handle, can't do more ☚ī¸ */ + return 0; + + if (fstat(truncate_fd, &st) < 0) { + log_debug_errno(errno, "Failed to stat file '%s' for deallocation, ignoring.", name); + return 0; + } + + if (!S_ISREG(st.st_mode) || st.st_blocks == 0 || st.st_nlink > 0) + return 0; + + /* If this is a regular file, it actually took up space on disk and there are no other links it's time to + * punch-hole/truncate this to release the disk space. */ + + bs = MAX(st.st_blksize, 512); + l = DIV_ROUND_UP(st.st_size, bs) * bs; /* Round up to next block size */ + + if (fallocate(truncate_fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, 0, l) >= 0) + return 0; /* Successfully punched a hole! 😊 */ + + /* Fall back to truncation */ + if (ftruncate(truncate_fd, 0) < 0) { + log_debug_errno(errno, "Failed to truncate file to 0, ignoring: %m"); + return 0; + } + + return 0; +} diff --git a/src/basic/fs-util.h b/src/basic/fs-util.h index 6df3ebffe29..ae40d6d37fd 100644 --- a/src/basic/fs-util.h +++ b/src/basic/fs-util.h @@ -103,3 +103,5 @@ static inline void unlink_and_free(char *p) { DEFINE_TRIVIAL_CLEANUP_FUNC(char*, unlink_and_free); int access_fd(int fd, int mode); + +int unlinkat_deallocate(int fd, const char *name, int flags); diff --git a/src/basic/hash-funcs.c b/src/basic/hash-funcs.c index 5267758769c..947bcfd5843 100644 --- a/src/basic/hash-funcs.c +++ b/src/basic/hash-funcs.c @@ -22,6 +22,7 @@ #include #include "hash-funcs.h" +#include "path-util.h" void string_hash_func(const void *p, struct siphash *state) { siphash24_compress(p, strlen(p) + 1, state); @@ -36,6 +37,55 @@ const struct hash_ops string_hash_ops = { .compare = string_compare_func }; + +void path_hash_func(const void *p, struct siphash *state) { + const char *q = p; + size_t n; + + assert(q); + assert(state); + + /* Calculates a hash for a path in a way this duplicate inner slashes don't make a differences, and also + * whether there's a trailing slash or not. This fits well with the semantics of path_compare(), which does + * similar checks and also doesn't care for trailing slashes. Note that relative and absolute paths (i.e. those + * which begin in a slash or not) will hash differently though. */ + + n = strspn(q, "/"); + if (n > 0) { /* Eat up initial slashes, and add one "/" to the hash for all of them */ + siphash24_compress(q, 1, state); + q += n; + } + + for (;;) { + /* Determine length of next component */ + n = strcspn(q, "/"); + if (n == 0) /* Reached the end? */ + break; + + /* Add this component to the hash and skip over it */ + siphash24_compress(q, n, state); + q += n; + + /* How many slashes follow this component? */ + n = strspn(q, "/"); + if (q[n] == 0) /* Is this a trailing slash? If so, we are at the end, and don't care about the slashes anymore */ + break; + + /* We are not add the end yet. Hash exactly one slash for all of the ones we just encountered. */ + siphash24_compress(q, 1, state); + q += n; + } +} + +int path_compare_func(const void *a, const void *b) { + return path_compare(a, b); +} + +const struct hash_ops path_hash_ops = { + .hash = path_hash_func, + .compare = path_compare_func +}; + void trivial_hash_func(const void *p, struct siphash *state) { siphash24_compress(&p, sizeof(p), state); } diff --git a/src/basic/hash-funcs.h b/src/basic/hash-funcs.h index 959e2c101d3..945b4c251c7 100644 --- a/src/basic/hash-funcs.h +++ b/src/basic/hash-funcs.h @@ -36,29 +36,28 @@ void string_hash_func(const void *p, struct siphash *state); int string_compare_func(const void *a, const void *b) _pure_; extern const struct hash_ops string_hash_ops; -/* This will compare the passed pointers directly, and will not - * dereference them. This is hence not useful for strings or - * suchlike. */ +void path_hash_func(const void *p, struct siphash *state); +int path_compare_func(const void *a, const void *b) _pure_; +extern const struct hash_ops path_hash_ops; + +/* This will compare the passed pointers directly, and will not dereference them. This is hence not useful for strings + * or suchlike. */ void trivial_hash_func(const void *p, struct siphash *state); int trivial_compare_func(const void *a, const void *b) _const_; extern const struct hash_ops trivial_hash_ops; -/* 32bit values we can always just embed in the pointer itself, but - * in order to support 32bit archs we need store 64bit values - * indirectly, since they don't fit in a pointer. */ +/* 32bit values we can always just embed in the pointer itself, but in order to support 32bit archs we need store 64bit + * values indirectly, since they don't fit in a pointer. */ void uint64_hash_func(const void *p, struct siphash *state); int uint64_compare_func(const void *a, const void *b) _pure_; extern const struct hash_ops uint64_hash_ops; -/* On some archs dev_t is 32bit, and on others 64bit. And sometimes - * it's 64bit on 32bit archs, and sometimes 32bit on 64bit archs. Yuck! */ +/* On some archs dev_t is 32bit, and on others 64bit. And sometimes it's 64bit on 32bit archs, and sometimes 32bit on + * 64bit archs. Yuck! */ #if SIZEOF_DEV_T != 8 void devt_hash_func(const void *p, struct siphash *state) _pure_; int devt_compare_func(const void *a, const void *b) _pure_; -extern const struct hash_ops devt_hash_ops = { - .hash = devt_hash_func, - .compare = devt_compare_func -}; +extern const struct hash_ops devt_hash_ops; #else #define devt_hash_func uint64_hash_func #define devt_compare_func uint64_compare_func diff --git a/src/basic/missing.h b/src/basic/missing.h index 9d4d08e7a9f..ed884dddad7 100644 --- a/src/basic/missing.h +++ b/src/basic/missing.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -517,6 +518,10 @@ struct btrfs_ioctl_quota_ctl_args { #define BPF_FS_MAGIC 0xcafe4a11 #endif +#ifndef OCFS2_SUPER_MAGIC +#define OCFS2_SUPER_MAGIC 0x7461636f +#endif + #ifndef MS_MOVE #define MS_MOVE 8192 #endif @@ -1355,4 +1360,12 @@ struct fib_rule_uid_range { #define NS_GET_NSTYPE _IO(0xb7, 0x3) #endif +#ifndef FALLOC_FL_KEEP_SIZE +#define FALLOC_FL_KEEP_SIZE 0x01 +#endif + +#ifndef FALLOC_FL_PUNCH_HOLE +#define FALLOC_FL_PUNCH_HOLE 0x02 +#endif + #include "missing_syscall.h" diff --git a/src/basic/mount-util.c b/src/basic/mount-util.c index a8947cefc2e..b1243456597 100644 --- a/src/basic/mount-util.c +++ b/src/basic/mount-util.c @@ -426,7 +426,7 @@ int bind_remount_recursive_with_mountinfo(const char *prefix, bool ro, char **bl path_kill_slashes(cleaned); - done = set_new(&string_hash_ops); + done = set_new(&path_hash_ops); if (!done) return -ENOMEM; @@ -436,7 +436,7 @@ int bind_remount_recursive_with_mountinfo(const char *prefix, bool ro, char **bl char *x; unsigned long orig_flags; - todo = set_new(&string_hash_ops); + todo = set_new(&path_hash_ops); if (!todo) return -ENOMEM; diff --git a/src/basic/rm-rf.h b/src/basic/rm-rf.h index ad63e9be407..6e63d3feab5 100644 --- a/src/basic/rm-rf.h +++ b/src/basic/rm-rf.h @@ -25,10 +25,10 @@ #include "util.h" typedef enum RemoveFlags { - REMOVE_ONLY_DIRECTORIES = 1, - REMOVE_ROOT = 2, - REMOVE_PHYSICAL = 4, /* if not set, only removes files on tmpfs, never physical file systems */ - REMOVE_SUBVOLUME = 8, + REMOVE_ONLY_DIRECTORIES = 1 << 0, + REMOVE_ROOT = 1 << 1, + REMOVE_PHYSICAL = 1 << 2, /* if not set, only removes files on tmpfs, never physical file systems */ + REMOVE_SUBVOLUME = 1 << 3, } RemoveFlags; int rm_rf_children(int fd, RemoveFlags flags, struct stat *root_dev); diff --git a/src/basic/stat-util.c b/src/basic/stat-util.c index 3a54103f1be..0fb6750a075 100644 --- a/src/basic/stat-util.c +++ b/src/basic/stat-util.c @@ -214,8 +214,19 @@ int path_is_fs_type(const char *path, statfs_f_type_t magic_value) { } bool is_temporary_fs(const struct statfs *s) { - return is_fs_type(s, TMPFS_MAGIC) || - is_fs_type(s, RAMFS_MAGIC); + return is_fs_type(s, TMPFS_MAGIC) || + is_fs_type(s, RAMFS_MAGIC); +} + +bool is_network_fs(const struct statfs *s) { + return is_fs_type(s, CIFS_MAGIC_NUMBER) || + is_fs_type(s, CODA_SUPER_MAGIC) || + is_fs_type(s, NCP_SUPER_MAGIC) || + is_fs_type(s, NFS_SUPER_MAGIC) || + is_fs_type(s, SMB_SUPER_MAGIC) || + is_fs_type(s, V9FS_MAGIC) || + is_fs_type(s, AFS_SUPER_MAGIC) || + is_fs_type(s, OCFS2_SUPER_MAGIC); } int fd_is_temporary_fs(int fd) { @@ -227,15 +238,25 @@ int fd_is_temporary_fs(int fd) { return is_temporary_fs(&s); } +int fd_is_network_fs(int fd) { + struct statfs s; + + if (fstatfs(fd, &s) < 0) + return -errno; + + return is_network_fs(&s); +} + int fd_is_network_ns(int fd) { int r; r = fd_is_fs_type(fd, NSFS_MAGIC); if (r <= 0) return r; - r = ioctl(fd, NS_GET_NSTYPE); - if (r < 0) + + if (ioctl(fd, NS_GET_NSTYPE) < 0) return -errno; + return r == CLONE_NEWNET; } diff --git a/src/basic/stat-util.h b/src/basic/stat-util.h index d8d3c204960..da33e68db25 100644 --- a/src/basic/stat-util.h +++ b/src/basic/stat-util.h @@ -61,8 +61,13 @@ int fd_is_fs_type(int fd, statfs_f_type_t magic_value); int path_is_fs_type(const char *path, statfs_f_type_t magic_value); bool is_temporary_fs(const struct statfs *s) _pure_; +bool is_network_fs(const struct statfs *s) _pure_; + int fd_is_temporary_fs(int fd); +int fd_is_network_fs(int fd); + int fd_is_network_ns(int fd); + int path_is_temporary_fs(const char *path); /* Because statfs.t_type can be int on some architectures, we have to cast diff --git a/src/cgtop/cgtop.c b/src/cgtop/cgtop.c index 1a73fb099df..413946182c7 100644 --- a/src/cgtop/cgtop.c +++ b/src/cgtop/cgtop.c @@ -928,8 +928,8 @@ int main(int argc, char *argv[]) { } else log_debug("Cgroup path: %s", root); - a = hashmap_new(&string_hash_ops); - b = hashmap_new(&string_hash_ops); + a = hashmap_new(&path_hash_ops); + b = hashmap_new(&path_hash_ops); if (!a || !b) { r = log_oom(); goto finish; diff --git a/src/core/device.c b/src/core/device.c index a43664d3bd7..b0dd469fd14 100644 --- a/src/core/device.c +++ b/src/core/device.c @@ -77,7 +77,7 @@ static int device_set_sysfs(Device *d, const char *sysfs) { if (streq_ptr(d->sysfs, sysfs)) return 0; - r = hashmap_ensure_allocated(&UNIT(d)->manager->devices_by_sysfs, &string_hash_ops); + r = hashmap_ensure_allocated(&UNIT(d)->manager->devices_by_sysfs, &path_hash_ops); if (r < 0) return r; diff --git a/src/core/manager.c b/src/core/manager.c index a2a3eea2f52..08aee317519 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -731,7 +731,7 @@ int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) { if (r < 0) goto fail; - r = hashmap_ensure_allocated(&m->cgroup_unit, &string_hash_ops); + r = hashmap_ensure_allocated(&m->cgroup_unit, &path_hash_ops); if (r < 0) goto fail; @@ -1314,7 +1314,7 @@ static void manager_build_unit_path_cache(Manager *m) { set_free_free(m->unit_path_cache); - m->unit_path_cache = set_new(&string_hash_ops); + m->unit_path_cache = set_new(&path_hash_ops); if (!m->unit_path_cache) { r = -ENOMEM; goto fail; diff --git a/src/core/mount.c b/src/core/mount.c index 914458f8e6d..7be4e89a614 100644 --- a/src/core/mount.c +++ b/src/core/mount.c @@ -1837,7 +1837,7 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, mount->parameters_proc_self_mountinfo.what) { /* Remember that this device might just have disappeared */ - if (set_ensure_allocated(&gone, &string_hash_ops) < 0 || + if (set_ensure_allocated(&gone, &path_hash_ops) < 0 || set_put(gone, mount->parameters_proc_self_mountinfo.what) < 0) log_oom(); /* we don't care too much about OOM here... */ } @@ -1892,7 +1892,7 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, mount->from_proc_self_mountinfo && mount->parameters_proc_self_mountinfo.what) { - if (set_ensure_allocated(&around, &string_hash_ops) < 0 || + if (set_ensure_allocated(&around, &path_hash_ops) < 0 || set_put(around, mount->parameters_proc_self_mountinfo.what) < 0) log_oom(); } diff --git a/src/core/swap.c b/src/core/swap.c index fffd8d4627b..37c97bc14e1 100644 --- a/src/core/swap.c +++ b/src/core/swap.c @@ -85,7 +85,7 @@ static int swap_set_devnode(Swap *s, const char *devnode) { assert(s); - r = hashmap_ensure_allocated(&UNIT(s)->manager->swaps_by_devnode, &string_hash_ops); + r = hashmap_ensure_allocated(&UNIT(s)->manager->swaps_by_devnode, &path_hash_ops); if (r < 0) return r; diff --git a/src/core/unit.c b/src/core/unit.c index 9a57bcfb4b3..32119171fea 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -4594,7 +4594,7 @@ int unit_require_mounts_for(Unit *u, const char *path, UnitDependencyMask mask) if (!path_is_absolute(path)) return -EINVAL; - r = hashmap_ensure_allocated(&u->requires_mounts_for, &string_hash_ops); + r = hashmap_ensure_allocated(&u->requires_mounts_for, &path_hash_ops); if (r < 0) return r; @@ -4631,7 +4631,7 @@ int unit_require_mounts_for(Unit *u, const char *path, UnitDependencyMask mask) if (!x) { char *q; - r = hashmap_ensure_allocated(&u->manager->units_requiring_mounts_for, &string_hash_ops); + r = hashmap_ensure_allocated(&u->manager->units_requiring_mounts_for, &path_hash_ops); if (r < 0) return r; diff --git a/src/coredump/coredump-vacuum.c b/src/coredump/coredump-vacuum.c index aede180b43a..e27512167c2 100644 --- a/src/coredump/coredump-vacuum.c +++ b/src/coredump/coredump-vacuum.c @@ -24,6 +24,7 @@ #include "coredump-vacuum.h" #include "dirent-util.h" #include "fd-util.h" +#include "fs-util.h" #include "hashmap.h" #include "macro.h" #include "string-util.h" @@ -247,14 +248,13 @@ int coredump_vacuum(int exclude_fd, uint64_t keep_free, uint64_t max_use) { if (r <= 0) return r; - if (unlinkat(dirfd(d), worst->oldest_file, 0) < 0) { + r = unlinkat_deallocate(dirfd(d), worst->oldest_file, 0); + if (r == -ENOENT) + continue; + if (r < 0) + return log_error_errno(r, "Failed to remove file %s: %m", worst->oldest_file); - if (errno == ENOENT) - continue; - - return log_error_errno(errno, "Failed to remove file %s: %m", worst->oldest_file); - } else - log_info("Removed old coredump %s.", worst->oldest_file); + log_info("Removed old coredump %s.", worst->oldest_file); } return 0; diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h index c5cfa3d878a..67abf8da498 100644 --- a/src/journal/journal-file.h +++ b/src/journal/journal-file.h @@ -122,6 +122,8 @@ typedef struct JournalFile { pthread_t offline_thread; volatile OfflineState offline_state; + unsigned last_seen_generation; + #if HAVE_XZ || HAVE_LZ4 void *compress_buffer; size_t compress_buffer_size; diff --git a/src/journal/journal-internal.h b/src/journal/journal-internal.h index d0d2842cc47..e5f563cced0 100644 --- a/src/journal/journal-internal.h +++ b/src/journal/journal-internal.h @@ -80,6 +80,7 @@ struct Directory { char *path; int wd; bool is_root; + unsigned last_seen_generation; }; struct sd_journal { @@ -104,6 +105,7 @@ struct sd_journal { int inotify_fd; unsigned current_invalidate_counter, last_invalidate_counter; usec_t last_process_usec; + unsigned generation; /* Iterating through unique fields and their data values */ char *unique_field; diff --git a/src/journal/journal-vacuum.c b/src/journal/journal-vacuum.c index c21e87858aa..db36a6ab804 100644 --- a/src/journal/journal-vacuum.c +++ b/src/journal/journal-vacuum.c @@ -27,6 +27,7 @@ #include "alloc-util.h" #include "dirent-util.h" #include "fd-util.h" +#include "fs-util.h" #include "journal-def.h" #include "journal-file.h" #include "journal-vacuum.h" @@ -278,14 +279,15 @@ int journal_directory_vacuum( if (r > 0) { /* Always vacuum empty non-online files. */ - if (unlinkat(dirfd(d), p, 0) >= 0) { + r = unlinkat_deallocate(dirfd(d), p, 0); + if (r >= 0) { log_full(verbose ? LOG_INFO : LOG_DEBUG, "Deleted empty archived journal %s/%s (%s).", directory, p, format_bytes(sbytes, sizeof(sbytes), size)); freed += size; - } else if (errno != ENOENT) - log_warning_errno(errno, "Failed to delete empty archived journal %s/%s: %m", directory, p); + } else if (r != -ENOENT) + log_warning_errno(r, "Failed to delete empty archived journal %s/%s: %m", directory, p); continue; } @@ -321,7 +323,8 @@ int journal_directory_vacuum( (n_max_files <= 0 || left <= n_max_files)) break; - if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) { + r = unlinkat_deallocate(dirfd(d), list[i].filename, 0); + if (r >= 0) { log_full(verbose ? LOG_INFO : LOG_DEBUG, "Deleted archived journal %s/%s (%s).", directory, list[i].filename, format_bytes(sbytes, sizeof(sbytes), list[i].usage)); freed += list[i].usage; @@ -330,8 +333,8 @@ int journal_directory_vacuum( else sum = 0; - } else if (errno != ENOENT) - log_warning_errno(errno, "Failed to delete archived journal %s/%s: %m", directory, list[i].filename); + } else if (r != -ENOENT) + log_warning_errno(r, "Failed to delete archived journal %s/%s: %m", directory, list[i].filename); } if (oldest_usec && i < n_list && (*oldest_usec == 0 || list[i].realtime < *oldest_usec)) diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c index bb9cfb6dc4d..0aa4c1f7724 100644 --- a/src/journal/journalctl.c +++ b/src/journal/journalctl.c @@ -81,6 +81,8 @@ #define DEFAULT_FSS_INTERVAL_USEC (15*USEC_PER_MINUTE) +#define PROCESS_INOTIFY_INTERVAL 1024 /* Every 1,024 messages processed */ + #if HAVE_PCRE2 DEFINE_TRIVIAL_CLEANUP_FUNC(pcre2_match_data*, pcre2_match_data_free); DEFINE_TRIVIAL_CLEANUP_FUNC(pcre2_code*, pcre2_code_free); @@ -2639,6 +2641,20 @@ int main(int argc, char *argv[]) { goto finish; n_shown++; + + /* If journalctl take a long time to process messages, and during that time journal file + * rotation occurs, a journalctl client will keep those rotated files open until it calls + * sd_journal_process(), which typically happens as a result of calling sd_journal_wait() below + * in the "following" case. By periodically calling sd_journal_process() during the processing + * loop we shrink the window of time a client instance has open file descriptors for rotated + * (deleted) journal files. */ + if ((n_shown % PROCESS_INOTIFY_INTERVAL) == 0) { + r = sd_journal_process(j); + if (r < 0) { + log_error_errno(r, "Failed to process inotify events: %m"); + goto finish; + } + } } if (!arg_follow) { diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index a8812c9af83..4deee461c3f 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -40,6 +40,7 @@ #include "fs-util.h" #include "hashmap.h" #include "hostname-util.h" +#include "id128-util.h" #include "io-util.h" #include "journal-def.h" #include "journal-file.h" @@ -51,6 +52,7 @@ #include "process-util.h" #include "replace-var.h" #include "stat-util.h" +#include "stat-util.h" #include "stdio-util.h" #include "string-util.h" #include "strv.h" @@ -1139,7 +1141,6 @@ _public_ int sd_journal_test_cursor(sd_journal *j, const char *cursor) { return 1; } - _public_ int sd_journal_seek_monotonic_usec(sd_journal *j, sd_id128_t boot_id, uint64_t usec) { assert_return(j, -EINVAL); assert_return(!journal_pid_changed(j), -ECHILD); @@ -1186,22 +1187,12 @@ _public_ int sd_journal_seek_tail(sd_journal *j) { } static void check_network(sd_journal *j, int fd) { - struct statfs sfs; - assert(j); if (j->on_network) return; - if (fstatfs(fd, &sfs) < 0) - return; - - j->on_network = - F_TYPE_EQUAL(sfs.f_type, CIFS_MAGIC_NUMBER) || - F_TYPE_EQUAL(sfs.f_type, CODA_SUPER_MAGIC) || - F_TYPE_EQUAL(sfs.f_type, NCP_SUPER_MAGIC) || - F_TYPE_EQUAL(sfs.f_type, NFS_SUPER_MAGIC) || - F_TYPE_EQUAL(sfs.f_type, SMB_SUPER_MAGIC); + j->on_network = fd_is_network_fs(fd); } static bool file_has_type_prefix(const char *prefix, const char *filename) { @@ -1271,8 +1262,16 @@ static int add_any_file(sd_journal *j, int fd, const char *path) { assert(j); assert(fd >= 0 || path); - if (path && ordered_hashmap_get(j->files, path)) - return 0; + if (path) { + f = ordered_hashmap_get(j->files, path); + if (f) { + /* Mark this file as seen in this generation. This is used to GC old files in + * process_q_overflow() to detect journal files that are still and discern them from those who + * are gone. */ + f->last_seen_generation = j->generation; + return 0; + } + } if (ordered_hashmap_size(j->files) >= JOURNAL_FILES_MAX) { log_debug("Too many open journal files, not adding %s.", path); @@ -1311,6 +1310,8 @@ static int add_any_file(sd_journal *j, int fd, const char *path) { goto fail; } + f->last_seen_generation = j->generation; + if (!j->has_runtime_files && path_has_prefix(j, f->path, "/run")) j->has_runtime_files = true; else if (!j->has_persistent_files && path_has_prefix(j, f->path, "/var")) @@ -1413,10 +1414,101 @@ static int dirname_is_machine_id(const char *fn) { return sd_id128_equal(id, machine); } +static bool dirent_is_journal_file(const struct dirent *de) { + assert(de); + + if (!IN_SET(de->d_type, DT_REG, DT_LNK, DT_UNKNOWN)) + return false; + + return endswith(de->d_name, ".journal") || + endswith(de->d_name, ".journal~"); +} + +static bool dirent_is_id128_subdir(const struct dirent *de) { + assert(de); + + if (!IN_SET(de->d_type, DT_DIR, DT_LNK, DT_UNKNOWN)) + return false; + + return id128_is_valid(de->d_name); +} + +static int directory_open(sd_journal *j, const char *path, DIR **ret) { + DIR *d; + + assert(j); + assert(path); + assert(ret); + + if (j->toplevel_fd < 0) + d = opendir(path); + else + /* Open the specified directory relative to the toplevel fd. Enforce that the path specified is + * relative, by dropping the initial slash */ + d = xopendirat(j->toplevel_fd, skip_slash(path), 0); + if (!d) + return -errno; + + *ret = d; + return 0; +} + +static int add_directory(sd_journal *j, const char *prefix, const char *dirname); + +static void directory_enumerate(sd_journal *j, Directory *m, DIR *d) { + struct dirent *de; + + assert(j); + assert(m); + assert(d); + + FOREACH_DIRENT_ALL(de, d, goto fail) { + if (dirent_is_journal_file(de)) + (void) add_file(j, m->path, de->d_name); + + if (m->is_root && dirent_is_id128_subdir(de)) + (void) add_directory(j, m->path, de->d_name); + } + + return; + +fail: + log_debug_errno(errno, "Failed to enumerate directory %s, ignoring: %m", m->path); +} + +static void directory_watch(sd_journal *j, Directory *m, int fd, uint32_t mask) { + int r; + + assert(j); + assert(m); + assert(fd >= 0); + + /* Watch this directory if that's enabled and if it not being watched yet. */ + + if (m->wd > 0) /* Already have a watch? */ + return; + if (j->inotify_fd < 0) /* Not watching at all? */ + return; + + m->wd = inotify_add_watch_fd(j->inotify_fd, fd, mask); + if (m->wd < 0) { + log_debug_errno(errno, "Failed to watch journal directory '%s', ignoring: %m", m->path); + return; + } + + r = hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m); + if (r == -EEXIST) + log_debug_errno(r, "Directory '%s' already being watched under a different path, ignoring: %m", m->path); + if (r < 0) { + log_debug_errno(r, "Failed to add watch for journal directory '%s' to hashmap, ignoring: %m", m->path); + (void) inotify_rm_watch(j->inotify_fd, m->wd); + m->wd = -1; + } +} + static int add_directory(sd_journal *j, const char *prefix, const char *dirname) { _cleanup_free_ char *path = NULL; _cleanup_closedir_ DIR *d = NULL; - struct dirent *de = NULL; Directory *m; int r, k; @@ -1435,22 +1527,16 @@ static int add_directory(sd_journal *j, const char *prefix, const char *dirname) goto fail; } - log_debug("Considering directory %s.", path); + log_debug("Considering directory '%s'.", path); /* We consider everything local that is in a directory for the local machine ID, or that is stored in /run */ if ((j->flags & SD_JOURNAL_LOCAL_ONLY) && !((dirname && dirname_is_machine_id(dirname) > 0) || path_has_prefix(j, path, "/run"))) - return 0; + return 0; - - if (j->toplevel_fd < 0) - d = opendir(path); - else - /* Open the specified directory relative to the toplevel fd. Enforce that the path specified is - * relative, by dropping the initial slash */ - d = xopendirat(j->toplevel_fd, skip_slash(path), 0); - if (!d) { - r = log_debug_errno(errno, "Failed to open directory %s: %m", path); + r = directory_open(j, path, &d); + if (r < 0) { + log_debug_errno(r, "Failed to open directory '%s': %m", path); goto fail; } @@ -1477,26 +1563,17 @@ static int add_directory(sd_journal *j, const char *prefix, const char *dirname) log_debug("Directory %s added.", m->path); } else if (m->is_root) - return 0; + return 0; /* Don't 'downgrade' from root directory */ - if (m->wd <= 0 && j->inotify_fd >= 0) { - /* Watch this directory, if it not being watched yet. */ + m->last_seen_generation = j->generation; - m->wd = inotify_add_watch_fd(j->inotify_fd, dirfd(d), - IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE| - IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM| - IN_ONLYDIR); + directory_watch(j, m, dirfd(d), + IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE| + IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM| + IN_ONLYDIR); - if (m->wd > 0 && hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m) < 0) - inotify_rm_watch(j->inotify_fd, m->wd); - } - - FOREACH_DIRENT_ALL(de, d, r = log_debug_errno(errno, "Failed to read directory %s: %m", m->path); goto fail) { - - if (dirent_is_file_with_suffix(de, ".journal") || - dirent_is_file_with_suffix(de, ".journal~")) - (void) add_file(j, m->path, de->d_name); - } + if (!j->no_new_files) + directory_enumerate(j, m, d); check_network(j, dirfd(d)); @@ -1513,7 +1590,6 @@ fail: static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) { _cleanup_closedir_ DIR *d = NULL; - struct dirent *de; Directory *m; int r, k; @@ -1526,6 +1602,8 @@ static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) { if (p) { /* If there's a path specified, use it. */ + log_debug("Considering root directory '%s'.", p); + if ((j->flags & SD_JOURNAL_RUNTIME_ONLY) && !path_has_prefix(j, p, "/run")) return -EINVAL; @@ -1533,16 +1611,11 @@ static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) { if (j->prefix) p = strjoina(j->prefix, p); - if (j->toplevel_fd < 0) - d = opendir(p); - else - d = xopendirat(j->toplevel_fd, skip_slash(p), 0); - - if (!d) { - if (errno == ENOENT && missing_ok) - return 0; - - r = log_debug_errno(errno, "Failed to open root directory %s: %m", p); + r = directory_open(j, p, &d); + if (r == -ENOENT && missing_ok) + return 0; + if (r < 0) { + log_debug_errno(r, "Failed to open root directory %s: %m", p); goto fail; } } else { @@ -1600,29 +1673,12 @@ static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) { } else if (!m->is_root) return 0; - if (m->wd <= 0 && j->inotify_fd >= 0) { + directory_watch(j, m, dirfd(d), + IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE| + IN_ONLYDIR); - m->wd = inotify_add_watch_fd(j->inotify_fd, dirfd(d), - IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE| - IN_ONLYDIR); - - if (m->wd > 0 && hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m) < 0) - inotify_rm_watch(j->inotify_fd, m->wd); - } - - if (j->no_new_files) - return 0; - - FOREACH_DIRENT_ALL(de, d, r = log_debug_errno(errno, "Failed to read directory %s: %m", m->path); goto fail) { - sd_id128_t id; - - if (dirent_is_file_with_suffix(de, ".journal") || - dirent_is_file_with_suffix(de, ".journal~")) - (void) add_file(j, m->path, de->d_name); - else if (IN_SET(de->d_type, DT_DIR, DT_LNK, DT_UNKNOWN) && - sd_id128_from_string(de->d_name, &id) >= 0) - (void) add_directory(j, m->path, de->d_name); - } + if (!j->no_new_files) + directory_enumerate(j, m, d); check_network(j, dirfd(d)); @@ -1742,12 +1798,12 @@ static sd_journal *journal_new(int flags, const char *path) { j->path = t; } - j->files = ordered_hashmap_new(&string_hash_ops); + j->files = ordered_hashmap_new(&path_hash_ops); if (!j->files) goto fail; j->files_cache = ordered_hashmap_iterated_cache_new(j->files); - j->directories_by_path = hashmap_new(&string_hash_ops); + j->directories_by_path = hashmap_new(&path_hash_ops); j->mmap = mmap_cache_new(); if (!j->files_cache || !j->directories_by_path || !j->mmap) goto fail; @@ -2297,6 +2353,24 @@ _public_ void sd_journal_restart_data(sd_journal *j) { j->current_field = 0; } +static int reiterate_all_paths(sd_journal *j) { + assert(j); + + if (j->no_new_files) + return add_current_paths(j); + + if (j->flags & SD_JOURNAL_OS_ROOT) + return add_search_paths(j); + + if (j->toplevel_fd >= 0) + return add_root_directory(j, NULL, false); + + if (j->path) + return add_root_directory(j, j->path, true); + + return add_search_paths(j); +} + _public_ int sd_journal_get_fd(sd_journal *j) { int r; @@ -2313,20 +2387,10 @@ _public_ int sd_journal_get_fd(sd_journal *j) { if (r < 0) return r; - log_debug("Reiterating files to get inotify watches established"); + log_debug("Reiterating files to get inotify watches established."); - /* Iterate through all dirs again, to add them to the - * inotify */ - if (j->no_new_files) - r = add_current_paths(j); - else if (j->flags & SD_JOURNAL_OS_ROOT) - r = add_search_paths(j); - else if (j->toplevel_fd >= 0) - r = add_root_directory(j, NULL, false); - else if (j->path) - r = add_root_directory(j, j->path, true); - else - r = add_search_paths(j); + /* Iterate through all dirs again, to add them to the inotify */ + r = reiterate_all_paths(j); if (r < 0) return r; @@ -2369,17 +2433,61 @@ _public_ int sd_journal_get_timeout(sd_journal *j, uint64_t *timeout_usec) { return 1; } +static void process_q_overflow(sd_journal *j) { + JournalFile *f; + Directory *m; + Iterator i; + + assert(j); + + /* When the inotify queue overruns we need to enumerate and re-validate all journal files to bring our list + * back in sync with what's on disk. For this we pick a new generation counter value. It'll be assigned to all + * journal files we encounter. All journal files and all directories that don't carry it after reenumeration + * are subject for unloading. */ + + log_debug("Inotify queue overrun, reiterating everything."); + + j->generation++; + (void) reiterate_all_paths(j); + + ORDERED_HASHMAP_FOREACH(f, j->files, i) { + + if (f->last_seen_generation == j->generation) + continue; + + log_debug("File '%s' hasn't been seen in this enumeration, removing.", f->path); + remove_file_real(j, f); + } + + HASHMAP_FOREACH(m, j->directories_by_path, i) { + + if (m->last_seen_generation == j->generation) + continue; + + if (m->is_root) /* Never GC root directories */ + continue; + + log_debug("Directory '%s' hasn't been seen in this enumeration, removing.", f->path); + remove_directory(j, m); + } + + log_debug("Reiteration complete."); +} + static void process_inotify_event(sd_journal *j, struct inotify_event *e) { Directory *d; assert(j); assert(e); + if (e->mask & IN_Q_OVERFLOW) { + process_q_overflow(j); + return; + } + /* Is this a subdirectory we watch? */ d = hashmap_get(j->directories_by_wd, INT_TO_PTR(e->wd)); if (d) { - sd_id128_t id; - if (!(e->mask & IN_ISDIR) && e->len > 0 && (endswith(e->name, ".journal") || endswith(e->name, ".journal~"))) { @@ -2398,7 +2506,7 @@ static void process_inotify_event(sd_journal *j, struct inotify_event *e) { if (e->mask & (IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT)) remove_directory(j, d); - } else if (d->is_root && (e->mask & IN_ISDIR) && e->len > 0 && sd_id128_from_string(e->name, &id) >= 0) { + } else if (d->is_root && (e->mask & IN_ISDIR) && e->len > 0 && id128_is_valid(e->name)) { /* Event for root directory */ @@ -2412,7 +2520,7 @@ static void process_inotify_event(sd_journal *j, struct inotify_event *e) { if (e->mask & IN_IGNORED) return; - log_debug("Unknown inotify event."); + log_debug("Unexpected inotify event."); } static int determine_change(sd_journal *j) { @@ -2432,6 +2540,9 @@ _public_ int sd_journal_process(sd_journal *j) { assert_return(j, -EINVAL); assert_return(!journal_pid_changed(j), -ECHILD); + if (j->inotify_fd < 0) /* We have no inotify fd yet? Then there's noting to process. */ + return 0; + j->last_process_usec = now(CLOCK_MONOTONIC); j->last_invalidate_counter = j->current_invalidate_counter; diff --git a/src/login/logind-acl.c b/src/login/logind-acl.c index d785f67ca31..3f355e7ea80 100644 --- a/src/login/logind-acl.c +++ b/src/login/logind-acl.c @@ -192,7 +192,7 @@ int devnode_acl_all(struct udev *udev, assert(udev); - nodes = set_new(&string_hash_ops); + nodes = set_new(&path_hash_ops); if (!nodes) return -ENOMEM; diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index bc77c3abdb9..78b9b695577 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -2369,7 +2369,7 @@ int unit_show_processes( if (r < 0) return r; - cgroups = hashmap_new(&string_hash_ops); + cgroups = hashmap_new(&path_hash_ops); if (!cgroups) return -ENOMEM; diff --git a/src/shared/install.c b/src/shared/install.c index 026aa323028..fdce447c89e 100644 --- a/src/shared/install.c +++ b/src/shared/install.c @@ -522,7 +522,7 @@ static int mark_symlink_for_removal( assert(p); - r = set_ensure_allocated(remove_symlinks_to, &string_hash_ops); + r = set_ensure_allocated(remove_symlinks_to, &path_hash_ops); if (r < 0) return r; diff --git a/src/sysctl/sysctl.c b/src/sysctl/sysctl.c index a1dc95b2bb2..d97656d6fec 100644 --- a/src/sysctl/sysctl.c +++ b/src/sysctl/sysctl.c @@ -110,19 +110,19 @@ static int parse_file(OrderedHashmap *sysctl_options, const char *path, bool ign _cleanup_free_ char *l = NULL; void *v; int k; + k = read_line(f, LONG_LINE_MAX, &l); if (k == 0) break; - if (k < 0) return log_error_errno(k, "Failed to read file '%s', ignoring: %m", path); c++; p = strstrip(l); - if (!*p) - continue; + if (isempty(p)) + continue; if (strchr(COMMENTS "\n", *p)) continue; @@ -261,7 +261,7 @@ int main(int argc, char *argv[]) { umask(0022); - sysctl_options = ordered_hashmap_new(&string_hash_ops); + sysctl_options = ordered_hashmap_new(&path_hash_ops); if (!sysctl_options) { r = log_oom(); goto finish; diff --git a/src/test/test-fs-util.c b/src/test/test-fs-util.c index 9f3a500080c..184a2a52c2c 100644 --- a/src/test/test-fs-util.c +++ b/src/test/test-fs-util.c @@ -527,6 +527,31 @@ static void test_touch_file(void) { assert_se(timespec_load(&st.st_mtim) == test_mtime); } +static void test_unlinkat_deallocate(void) { + _cleanup_free_ char *p = NULL; + _cleanup_close_ int fd = -1; + struct stat st; + + assert_se(tempfn_random_child(NULL, "unlink-deallocation", &p) >= 0); + + fd = open(p, O_WRONLY|O_CLOEXEC|O_CREAT|O_EXCL, 0600); + assert_se(fd >= 0); + + assert_se(write(fd, "hallo\n", 6) == 6); + + assert_se(fstat(fd, &st) >= 0); + assert_se(st.st_size == 6); + assert_se(st.st_blocks > 0); + assert_se(st.st_nlink == 1); + + assert_se(unlinkat_deallocate(AT_FDCWD, p, 0) >= 0); + + assert_se(fstat(fd, &st) >= 0); + assert_se(IN_SET(st.st_size, 0, 6)); /* depending on whether hole punching worked the size will be 6 (it worked) or 0 (we had to resort to truncation) */ + assert_se(st.st_blocks == 0); + assert_se(st.st_nlink == 0); +} + int main(int argc, char *argv[]) { test_unlink_noerrno(); test_get_files_in_directory(); @@ -536,6 +561,7 @@ int main(int argc, char *argv[]) { test_dot_or_dot_dot(); test_access_fd(); test_touch_file(); + test_unlinkat_deallocate(); return 0; } diff --git a/src/test/test-hashmap.c b/src/test/test-hashmap.c index 16ca27cd5fd..ad0b7390e9b 100644 --- a/src/test/test-hashmap.c +++ b/src/test/test-hashmap.c @@ -137,6 +137,34 @@ static void test_iterated_cache(void) { assert_se(iterated_cache_free(c) == NULL); } +static void test_path_hashmap(void) { + _cleanup_(hashmap_freep) Hashmap *h = NULL; + + assert_se(h = hashmap_new(&path_hash_ops)); + + assert_se(hashmap_put(h, "foo", INT_TO_PTR(1)) >= 0); + assert_se(hashmap_put(h, "/foo", INT_TO_PTR(2)) >= 0); + assert_se(hashmap_put(h, "//foo", INT_TO_PTR(3)) == -EEXIST); + assert_se(hashmap_put(h, "//foox/", INT_TO_PTR(4)) >= 0); + assert_se(hashmap_put(h, "/foox////", INT_TO_PTR(5)) == -EEXIST); + assert_se(hashmap_put(h, "foo//////bar/quux//", INT_TO_PTR(6)) >= 0); + assert_se(hashmap_put(h, "foo/bar//quux/", INT_TO_PTR(8)) == -EEXIST); + + assert_se(hashmap_get(h, "foo") == INT_TO_PTR(1)); + assert_se(hashmap_get(h, "foo/") == INT_TO_PTR(1)); + assert_se(hashmap_get(h, "foo////") == INT_TO_PTR(1)); + assert_se(hashmap_get(h, "/foo") == INT_TO_PTR(2)); + assert_se(hashmap_get(h, "//foo") == INT_TO_PTR(2)); + assert_se(hashmap_get(h, "/////foo////") == INT_TO_PTR(2)); + assert_se(hashmap_get(h, "/////foox////") == INT_TO_PTR(4)); + assert_se(hashmap_get(h, "/foox/") == INT_TO_PTR(4)); + assert_se(hashmap_get(h, "/foox") == INT_TO_PTR(4)); + assert_se(!hashmap_get(h, "foox")); + assert_se(hashmap_get(h, "foo/bar/quux") == INT_TO_PTR(6)); + assert_se(hashmap_get(h, "foo////bar////quux/////") == INT_TO_PTR(6)); + assert_se(!hashmap_get(h, "/foo////bar////quux/////")); +} + int main(int argc, const char *argv[]) { test_hashmap_funcs(); test_ordered_hashmap_funcs(); @@ -147,4 +175,7 @@ int main(int argc, const char *argv[]) { test_trivial_compare_func(); test_string_compare_func(); test_iterated_cache(); + test_path_hashmap(); + + return 0; } diff --git a/src/tmpfiles/tmpfiles.c b/src/tmpfiles/tmpfiles.c index 4b357465a2c..98368c36f13 100644 --- a/src/tmpfiles/tmpfiles.c +++ b/src/tmpfiles/tmpfiles.c @@ -383,9 +383,11 @@ static void load_unix_sockets(void) { /* We maintain a cache of the sockets we found in /proc/net/unix to speed things up a little. */ - unix_sockets = set_new(&string_hash_ops); - if (!unix_sockets) + unix_sockets = set_new(&path_hash_ops); + if (!unix_sockets) { + log_oom(); return; + } f = fopen("/proc/net/unix", "re"); if (!f) {