From eb6ba873447e378d40a13dfda764d2772ce36389 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Mon, 25 Sep 2023 10:44:50 +0900 Subject: [PATCH 1/7] sd-journal: drop redundant re-read of entry array object This effectively reverts d9b61db922404a216de018cc5ddff9b69bcaf1db. In the do-while loop, we do not read any other entry array object, hence the current object is always in the mmap cache and not necessary to re-read it. --- src/libsystemd/sd-journal/journal-file.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/libsystemd/sd-journal/journal-file.c b/src/libsystemd/sd-journal/journal-file.c index 5d3f85e4cc3..31d4db97c0c 100644 --- a/src/libsystemd/sd-journal/journal-file.c +++ b/src/libsystemd/sd-journal/journal-file.c @@ -2840,10 +2840,6 @@ static int generic_array_get( * disk properly, let's see if the next one might work for us instead. */ log_debug_errno(r, "Entry item %" PRIu64 " is bad, skipping over it.", i); - r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o); - if (r < 0) - return r; - } while (bump_array_index(&i, direction, k) > 0); r = bump_entry_array(f, o, a, first, direction, &a); From b7264911aa91ce1aede9da621cb8aa7eb0f88c2b Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Wed, 27 Sep 2023 01:14:58 +0900 Subject: [PATCH 2/7] sd-journal: make bump_entry_array() return positive when a valid offset found When it returns 0 offset, then the subsequent journal_file_move_to_object() will fail. Let's return generic_array_get() earlier in such situation. --- src/libsystemd/sd-journal/journal-file.c | 61 ++++++++++++------------ 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/src/libsystemd/sd-journal/journal-file.c b/src/libsystemd/sd-journal/journal-file.c index 31d4db97c0c..22de80bac0f 100644 --- a/src/libsystemd/sd-journal/journal-file.c +++ b/src/libsystemd/sd-journal/journal-file.c @@ -2698,47 +2698,48 @@ static int bump_array_index(uint64_t *i, direction_t direction, uint64_t n) { static int bump_entry_array( JournalFile *f, - Object *o, - uint64_t offset, - uint64_t first, + Object *o, /* the current entry array object. */ + uint64_t offset, /* the offset of the entry array object. */ + uint64_t first, /* The offset of the first entry array object in the chain. */ direction_t direction, uint64_t *ret) { - uint64_t p, q = 0; int r; assert(f); - assert(offset); assert(ret); if (direction == DIRECTION_DOWN) { assert(o); + assert(o->object.type == OBJECT_ENTRY_ARRAY); + *ret = le64toh(o->entry_array.next_entry_array_offset); - return 0; + } else { + + /* Entry array chains are a singly linked list, so to find the previous array in the chain, we have + * to start iterating from the top. */ + + assert(offset > 0); + + uint64_t p = first, q = 0; + while (p > 0 && p != offset) { + r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, p, &o); + if (r < 0) + return r; + + q = p; + p = le64toh(o->entry_array.next_entry_array_offset); + } + + /* If we can't find the previous entry array in the entry array chain, we're likely dealing with a + * corrupted journal file. */ + if (p == 0) + return -EBADMSG; + + *ret = q; } - /* Entry array chains are a singly linked list, so to find the previous array in the chain, we have - * to start iterating from the top. */ - - p = first; - - while (p > 0 && p != offset) { - r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, p, &o); - if (r < 0) - return r; - - q = p; - p = le64toh(o->entry_array.next_entry_array_offset); - } - - /* If we can't find the previous entry array in the entry array chain, we're likely dealing with a - * corrupted journal file. */ - if (p == 0) - return -EBADMSG; - - *ret = q; - - return 0; + return *ret > 0; } static int generic_array_get( @@ -2781,7 +2782,7 @@ static int generic_array_get( * array and start iterating entries from there. */ r = bump_entry_array(f, NULL, a, first, DIRECTION_UP, &a); - if (r < 0) + if (r <= 0) return r; i = UINT64_MAX; @@ -2843,7 +2844,7 @@ static int generic_array_get( } while (bump_array_index(&i, direction, k) > 0); r = bump_entry_array(f, o, a, first, direction, &a); - if (r < 0) + if (r <= 0) return r; t += k; From b63f09e4ee5ccfbb764e996154d78953093d9132 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Wed, 27 Sep 2023 01:29:11 +0900 Subject: [PATCH 3/7] sd-journal: merge two bump_entry_array() calls No functional changes, just refactoring. --- src/libsystemd/sd-journal/journal-file.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/libsystemd/sd-journal/journal-file.c b/src/libsystemd/sd-journal/journal-file.c index 22de80bac0f..5371c6d79bd 100644 --- a/src/libsystemd/sd-journal/journal-file.c +++ b/src/libsystemd/sd-journal/journal-file.c @@ -2752,7 +2752,7 @@ static int generic_array_get( uint64_t a, t = 0, k; ChainCacheItem *ci; - Object *o; + Object *o = NULL; int r; assert(f); @@ -2781,12 +2781,7 @@ static int generic_array_get( /* If there's corruption and we're going upwards, move back to the previous entry * array and start iterating entries from there. */ - r = bump_entry_array(f, NULL, a, first, DIRECTION_UP, &a); - if (r <= 0) - return r; - i = UINT64_MAX; - break; } if (r < 0) @@ -2808,6 +2803,10 @@ static int generic_array_get( /* In the first iteration of the while loop, we reuse i, k and o from the previous while * loop. */ if (i == UINT64_MAX) { + r = bump_entry_array(f, o, a, first, direction, &a); + if (r <= 0) + return r; + r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o); if (r < 0) return r; @@ -2843,10 +2842,6 @@ static int generic_array_get( } while (bump_array_index(&i, direction, k) > 0); - r = bump_entry_array(f, o, a, first, direction, &a); - if (r <= 0) - return r; - t += k; i = UINT64_MAX; } From fe6f2bd8a64108ab9d42ee4633f1cab2e05f2915 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Mon, 25 Sep 2023 11:10:01 +0900 Subject: [PATCH 4/7] sd-journal: fix calculation of number of 'total' entries in the chained arrays If there's corruption and we are going upwards, then the 'total' must be decreased when we go to the previous array. However, previously, we wrongly kept or increased the number. This fixes the behavior. --- src/libsystemd/sd-journal/journal-file.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/libsystemd/sd-journal/journal-file.c b/src/libsystemd/sd-journal/journal-file.c index 5371c6d79bd..474d7b1d27d 100644 --- a/src/libsystemd/sd-journal/journal-file.c +++ b/src/libsystemd/sd-journal/journal-file.c @@ -2815,7 +2815,16 @@ static int generic_array_get( if (k == 0) break; - i = direction == DIRECTION_DOWN ? 0 : k - 1; + if (direction == DIRECTION_DOWN) + i = 0; + else { + /* We moved to the previous array. The total must be decreased. */ + if (t < k) + return -EBADMSG; /* chain cache is broken ? */ + + i = k - 1; + t -= k; + } } do { @@ -2842,7 +2851,10 @@ static int generic_array_get( } while (bump_array_index(&i, direction, k) > 0); - t += k; + if (direction == DIRECTION_DOWN) + /* We are going to the next array, the total must be incremented. */ + t += k; + i = UINT64_MAX; } From f85e79d3e57c0639f583d1e27c649ee561075d62 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Wed, 27 Sep 2023 01:35:31 +0900 Subject: [PATCH 5/7] sd-journal: add missing 'error' handling When we reach an empty array, there are at least two possibilities: - journal file is corrupted, - invalid index is requested. We cannot distinguish them here, let's simply return earlier. --- src/libsystemd/sd-journal/journal-file.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/libsystemd/sd-journal/journal-file.c b/src/libsystemd/sd-journal/journal-file.c index 474d7b1d27d..99528680163 100644 --- a/src/libsystemd/sd-journal/journal-file.c +++ b/src/libsystemd/sd-journal/journal-file.c @@ -2788,6 +2788,9 @@ static int generic_array_get( return r; k = journal_file_entry_array_n_items(f, o); + if (k == 0) + return 0; + if (i < k) break; From 3a23e41883822671254a046d3b007b5afc9a6c7a Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Thu, 28 Sep 2023 02:12:00 +0900 Subject: [PATCH 6/7] sd-journal: add/update comments --- src/libsystemd/sd-journal/journal-file.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/libsystemd/sd-journal/journal-file.c b/src/libsystemd/sd-journal/journal-file.c index 99528680163..e34ca360d9b 100644 --- a/src/libsystemd/sd-journal/journal-file.c +++ b/src/libsystemd/sd-journal/journal-file.c @@ -2628,11 +2628,12 @@ int journal_file_append_entry( } typedef struct ChainCacheItem { - uint64_t first; /* the array at the beginning of the chain */ - uint64_t array; /* the cached array */ - uint64_t begin; /* the first item in the cached array */ - uint64_t total; /* the total number of items in all arrays before this one in the chain */ - uint64_t last_index; /* the last index we looked at, to optimize locality when bisecting */ + uint64_t first; /* The offset of the entry array object at the beginning of the chain, + * i.e., le64toh(f->header->entry_array_offset), or le64toh(o->data.entry_offset). */ + uint64_t array; /* The offset of the cached entry array object. */ + uint64_t begin; /* The offset of the first item in the cached array. */ + uint64_t total; /* The total number of items in all arrays before the cached one in the chain. */ + uint64_t last_index; /* The last index we looked at in the cached array, to optimize locality when bisecting. */ } ChainCacheItem; static void chain_cache_put( @@ -2744,11 +2745,11 @@ static int bump_entry_array( static int generic_array_get( JournalFile *f, - uint64_t first, - uint64_t i, + uint64_t first, /* The offset of the first entry array object in the chain. */ + uint64_t i, /* The index of the target object counted from the beginning of the entry array chain. */ direction_t direction, - Object **ret_object, - uint64_t *ret_offset) { + Object **ret_object, /* The found object. */ + uint64_t *ret_offset) { /* The offset of the found object. */ uint64_t a, t = 0, k; ChainCacheItem *ci; @@ -2794,6 +2795,7 @@ static int generic_array_get( if (i < k) break; + /* The index is larger than the number of elements in the array. Let's move to the next array. */ i -= k; t += k; a = le64toh(o->entry_array.next_entry_array_offset); @@ -2803,8 +2805,6 @@ static int generic_array_get( * direction). */ while (a > 0) { - /* In the first iteration of the while loop, we reuse i, k and o from the previous while - * loop. */ if (i == UINT64_MAX) { r = bump_entry_array(f, o, a, first, direction, &a); if (r <= 0) @@ -2854,6 +2854,8 @@ static int generic_array_get( } while (bump_array_index(&i, direction, k) > 0); + /* All entries tried in the above do-while loop are broken. Let's move to the next (or previous) array. */ + if (direction == DIRECTION_DOWN) /* We are going to the next array, the total must be incremented. */ t += k; From d37eeabc4febf0340c32e01afb5c0cab2224bd98 Mon Sep 17 00:00:00 2001 From: Yu Watanabe Date: Wed, 27 Sep 2023 14:52:21 +0900 Subject: [PATCH 7/7] sd-journal: merge journal_file_next_entry_for_data() with generic_array_get_plus_one() Because journal_file_next_entry_for_data() provides the first entry, while journal_file_next_entry() actually provides the next entry of the input, this also renames it to journal_file_move_to_entry_for_data(). Also, previously, on DIRECTION_UP the function did not fall back to the 'extra' entry when all entries linked in the chained array are broken. This also fixes the issue, and now it fall back to the extra entry. --- src/journal/test-journal.c | 8 +- src/libsystemd/sd-journal/journal-file.c | 96 +++++++++++------------- src/libsystemd/sd-journal/journal-file.h | 4 +- src/libsystemd/sd-journal/sd-journal.c | 6 +- 4 files changed, 53 insertions(+), 61 deletions(-) diff --git a/src/journal/test-journal.c b/src/journal/test-journal.c index cd295b2a7e5..b227b9be6cd 100644 --- a/src/journal/test-journal.c +++ b/src/journal/test-journal.c @@ -74,17 +74,17 @@ static void test_non_empty_one(void) { assert_se(le64toh(o->entry.seqnum) == 1); assert_se(journal_file_find_data_object(f->file, test, strlen(test), &d, NULL) == 1); - assert_se(journal_file_next_entry_for_data(f->file, d, DIRECTION_DOWN, &o, NULL) == 1); + assert_se(journal_file_move_to_entry_for_data(f->file, d, DIRECTION_DOWN, &o, NULL) == 1); assert_se(le64toh(o->entry.seqnum) == 1); - assert_se(journal_file_next_entry_for_data(f->file, d, DIRECTION_UP, &o, NULL) == 1); + assert_se(journal_file_move_to_entry_for_data(f->file, d, DIRECTION_UP, &o, NULL) == 1); assert_se(le64toh(o->entry.seqnum) == 3); assert_se(journal_file_find_data_object(f->file, test2, strlen(test2), &d, NULL) == 1); - assert_se(journal_file_next_entry_for_data(f->file, d, DIRECTION_UP, &o, NULL) == 1); + assert_se(journal_file_move_to_entry_for_data(f->file, d, DIRECTION_UP, &o, NULL) == 1); assert_se(le64toh(o->entry.seqnum) == 2); - assert_se(journal_file_next_entry_for_data(f->file, d, DIRECTION_DOWN, &o, NULL) == 1); + assert_se(journal_file_move_to_entry_for_data(f->file, d, DIRECTION_DOWN, &o, NULL) == 1); assert_se(le64toh(o->entry.seqnum) == 2); assert_se(journal_file_find_data_object(f->file, "quux", 4, &d, NULL) == 0); diff --git a/src/libsystemd/sd-journal/journal-file.c b/src/libsystemd/sd-journal/journal-file.c index e34ca360d9b..1b80afe74dc 100644 --- a/src/libsystemd/sd-journal/journal-file.c +++ b/src/libsystemd/sd-journal/journal-file.c @@ -2866,37 +2866,6 @@ static int generic_array_get( return 0; } -static int generic_array_get_plus_one( - JournalFile *f, - uint64_t extra, - uint64_t first, - uint64_t i, - direction_t direction, - Object **ret_object, - uint64_t *ret_offset) { - - int r; - - assert(f); - - /* FIXME: fix return value assignment on success. */ - - if (i == 0) { - r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, ret_object); - if (IN_SET(r, -EADDRNOTAVAIL, -EBADMSG)) - return generic_array_get(f, first, 0, direction, ret_object, ret_offset); - if (r < 0) - return r; - - if (ret_offset) - *ret_offset = extra; - - return 1; - } - - return generic_array_get(f, first, i - 1, direction, ret_object, ret_offset); -} - enum { TEST_FOUND, TEST_LEFT, @@ -3470,39 +3439,67 @@ int journal_file_next_entry( return 1; } -int journal_file_next_entry_for_data( +int journal_file_move_to_entry_for_data( JournalFile *f, Object *d, direction_t direction, Object **ret_object, uint64_t *ret_offset) { - uint64_t i, n, ofs; - int r; + uint64_t extra, first, n; + int r = 0; assert(f); assert(d); assert(d->object.type == OBJECT_DATA); + assert(IN_SET(direction, DIRECTION_DOWN, DIRECTION_UP)); /* FIXME: fix return value assignment. */ - n = le64toh(READ_NOW(d->data.n_entries)); + /* This returns the first (when the direction is down, otherwise the last) entry linked to the + * specified data object. */ + + n = le64toh(d->data.n_entries); if (n <= 0) - return n; + return 0; + n--; /* n_entries is the number of entries linked to the data object, including the 'extra' entry. */ - i = direction == DIRECTION_DOWN ? 0 : n - 1; + extra = le64toh(d->data.entry_offset); + first = le64toh(d->data.entry_array_offset); - r = generic_array_get_plus_one(f, - le64toh(d->data.entry_offset), - le64toh(d->data.entry_array_offset), - i, - direction, - ret_object, &ofs); - if (r <= 0) - return r; + if (direction == DIRECTION_DOWN && extra > 0) { + /* When we are going downwards, first try to read the extra entry. */ + r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, ret_object); + if (r >= 0) + goto use_extra; + if (!IN_SET(r, -EADDRNOTAVAIL, -EBADMSG)) + return r; + } + if (n > 0) { + /* DIRECTION_DOWN : The extra entry is broken, falling back to the entries in the array. + * DIRECTION_UP : Try to find a valid entry in the array from the tail. */ + r = generic_array_get(f, + first, + direction == DIRECTION_DOWN ? 0 : n - 1, + direction, + ret_object, ret_offset); + if (!IN_SET(r, 0, -EADDRNOTAVAIL, -EBADMSG)) + return r; /* found or critical error. */ + } + + if (direction == DIRECTION_UP && extra > 0) { + /* No valid entry exists in the chained array, falling back to the extra entry. */ + r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, ret_object); + if (r >= 0) + goto use_extra; + } + + return r; + +use_extra: if (ret_offset) - *ret_offset = ofs; + *ret_offset = extra; return 1; } @@ -4464,12 +4461,7 @@ int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, u if (r < 0) return r; - r = generic_array_get_plus_one(f, - le64toh(o->data.entry_offset), - le64toh(o->data.entry_array_offset), - le64toh(o->data.n_entries) - 1, - DIRECTION_UP, - &o, NULL); + r = journal_file_move_to_entry_for_data(f, o, DIRECTION_UP, &o, NULL); if (r <= 0) return r; diff --git a/src/libsystemd/sd-journal/journal-file.h b/src/libsystemd/sd-journal/journal-file.h index b018fd8788e..6c46dff4c45 100644 --- a/src/libsystemd/sd-journal/journal-file.h +++ b/src/libsystemd/sd-journal/journal-file.h @@ -286,13 +286,13 @@ void journal_file_reset_location(JournalFile *f); void journal_file_save_location(JournalFile *f, Object *o, uint64_t offset); int journal_file_next_entry(JournalFile *f, uint64_t p, direction_t direction, Object **ret_object, uint64_t *ret_offset); -int journal_file_next_entry_for_data(JournalFile *f, Object *d, direction_t direction, Object **ret_object, uint64_t *ret_offset); - int journal_file_move_to_entry_by_offset(JournalFile *f, uint64_t p, direction_t direction, Object **ret_object, uint64_t *ret_offset); int journal_file_move_to_entry_by_seqnum(JournalFile *f, uint64_t seqnum, direction_t direction, Object **ret_object, uint64_t *ret_offset); int journal_file_move_to_entry_by_realtime(JournalFile *f, uint64_t realtime, direction_t direction, Object **ret_object, uint64_t *ret_offset); int journal_file_move_to_entry_by_monotonic(JournalFile *f, sd_id128_t boot_id, uint64_t monotonic, direction_t direction, Object **ret_object, uint64_t *ret_offset); +int journal_file_move_to_entry_for_data(JournalFile *f, Object *d, direction_t direction, Object **ret_object, uint64_t *ret_offset); + int journal_file_move_to_entry_by_offset_for_data(JournalFile *f, Object *d, uint64_t p, direction_t direction, Object **ret_object, uint64_t *ret_offset); int journal_file_move_to_entry_by_seqnum_for_data(JournalFile *f, Object *d, uint64_t seqnum, direction_t direction, Object **ret_object, uint64_t *ret_offset); int journal_file_move_to_entry_by_realtime_for_data(JournalFile *f, Object *d, uint64_t realtime, direction_t direction, Object **ret_object, uint64_t *ret_offset); diff --git a/src/libsystemd/sd-journal/sd-journal.c b/src/libsystemd/sd-journal/sd-journal.c index 1b9dcbdecb3..3b8ca21725c 100644 --- a/src/libsystemd/sd-journal/sd-journal.c +++ b/src/libsystemd/sd-journal/sd-journal.c @@ -660,9 +660,9 @@ static int find_location_for_match( /* FIXME: missing: find by monotonic */ if (j->current_location.type == LOCATION_HEAD) - return direction == DIRECTION_DOWN ? journal_file_next_entry_for_data(f, d, DIRECTION_DOWN, ret, offset) : 0; + return direction == DIRECTION_DOWN ? journal_file_move_to_entry_for_data(f, d, DIRECTION_DOWN, ret, offset) : 0; if (j->current_location.type == LOCATION_TAIL) - return direction == DIRECTION_UP ? journal_file_next_entry_for_data(f, d, DIRECTION_UP, ret, offset) : 0; + return direction == DIRECTION_UP ? journal_file_move_to_entry_for_data(f, d, DIRECTION_UP, ret, offset) : 0; if (j->current_location.seqnum_set && sd_id128_equal(j->current_location.seqnum_id, f->header->seqnum_id)) return journal_file_move_to_entry_by_seqnum_for_data(f, d, j->current_location.seqnum, direction, ret, offset); if (j->current_location.monotonic_set) { @@ -678,7 +678,7 @@ static int find_location_for_match( if (j->current_location.realtime_set) return journal_file_move_to_entry_by_realtime_for_data(f, d, j->current_location.realtime, direction, ret, offset); - return journal_file_next_entry_for_data(f, d, direction, ret, offset); + return journal_file_move_to_entry_for_data(f, d, direction, ret, offset); } else if (m->type == MATCH_OR_TERM) { uint64_t np = 0;