From e81710d3d08886f8957bdbdb6746017ff0538818 Mon Sep 17 00:00:00 2001 From: Daan De Meyer Date: Fri, 21 Jan 2022 18:29:41 +0000 Subject: [PATCH] journal: Store offsets to tail entry array objects in chain Previously, we'd iterate an entry array from start to end every time we added an entry offset to it. To speed up this operation, we cache the last entry array in the chain and how many items it contains. This allows the addition of an entry to the chain to be done in constant time instead of linear time as we don't have to iterate the entire chain anymore every time we add an entry. --- docs/JOURNAL_FILE_FORMAT.md | 19 +++- .../sd-journal/journal-authenticate.c | 2 +- src/libsystemd/sd-journal/journal-def.h | 18 ++- src/libsystemd/sd-journal/journal-file.c | 107 +++++++++++------- src/libsystemd/sd-journal/journal-file.h | 10 ++ src/libsystemd/sd-journal/journal-verify.c | 8 +- 6 files changed, 117 insertions(+), 47 deletions(-) diff --git a/docs/JOURNAL_FILE_FORMAT.md b/docs/JOURNAL_FILE_FORMAT.md index 5f7f97c1b80..2d0debd858c 100644 --- a/docs/JOURNAL_FILE_FORMAT.md +++ b/docs/JOURNAL_FILE_FORMAT.md @@ -177,6 +177,9 @@ _packed_ struct Header { /* Added in 246 */ le64_t data_hash_chain_depth; le64_t field_hash_chain_depth; + /* Added in 252 */ + le32_t tail_entry_array_offset; \ + le32_t tail_entry_array_n_entries; \ }; ``` @@ -231,6 +234,8 @@ became too frequent. Similar, **field_hash_chain_depth** is a counter of the deepest chain in the field hash table, minus one. +**tail_entry_array_offset** and **tail_entry_array_n_entries** allow immediate +access to the last entry array in the global entry array chain. ## Extensibility @@ -397,7 +402,16 @@ _packed_ struct DataObject { le64_t entry_offset; /* the first array entry we store inline */ le64_t entry_array_offset; le64_t n_entries; - uint8_t payload[]; + union { \ + struct { \ + uint8_t payload[] ; \ + } regular; \ + struct { \ + le32_t tail_entry_array_offset; \ + le32_t tail_entry_array_n_entries; \ + uint8_t payload[]; \ + } compact; \ + }; \ }; ``` @@ -430,6 +444,9 @@ OBJECT_COMPRESSED_XZ/OBJECT_COMPRESSED_LZ4/OBJECT_COMPRESSED_ZSTD is set in the `ObjectHeader`, in which case the payload is compressed with the indicated compression algorithm. +If the `HEADER_INCOMPATIBLE_COMPACT` flag is set, Two extra fields are stored to +allow immediate access to the tail entry array in the DATA object's entry array +chain. ## Field Objects diff --git a/src/libsystemd/sd-journal/journal-authenticate.c b/src/libsystemd/sd-journal/journal-authenticate.c index 3965f3f589c..1cb89433897 100644 --- a/src/libsystemd/sd-journal/journal-authenticate.c +++ b/src/libsystemd/sd-journal/journal-authenticate.c @@ -248,7 +248,7 @@ int journal_file_hmac_put_object(JournalFile *f, ObjectType type, Object *o, uin case OBJECT_DATA: /* All but hash and payload are mutable */ gcry_md_write(f->hmac, &o->data.hash, sizeof(o->data.hash)); - gcry_md_write(f->hmac, o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload)); + gcry_md_write(f->hmac, journal_file_data_payload_field(f, o), le64toh(o->object.size) - journal_file_data_payload_offset(f)); break; case OBJECT_FIELD: diff --git a/src/libsystemd/sd-journal/journal-def.h b/src/libsystemd/sd-journal/journal-def.h index f04a2298c43..8f994b01787 100644 --- a/src/libsystemd/sd-journal/journal-def.h +++ b/src/libsystemd/sd-journal/journal-def.h @@ -65,8 +65,17 @@ struct ObjectHeader { le64_t entry_offset; /* the first array entry we store inline */ \ le64_t entry_array_offset; \ le64_t n_entries; \ - uint8_t payload[]; \ - } + union { \ + struct { \ + uint8_t payload[0]; \ + } regular; \ + struct { \ + le32_t tail_entry_array_offset; \ + le32_t tail_entry_array_n_entries; \ + uint8_t payload[0]; \ + } compact; \ + }; \ +} struct DataObject DataObject__contents; struct DataObject__packed DataObject__contents _packed_; @@ -222,12 +231,15 @@ enum { /* Added in 246 */ \ le64_t data_hash_chain_depth; \ le64_t field_hash_chain_depth; \ + /* Added in 252 */ \ + le32_t tail_entry_array_offset; \ + le32_t tail_entry_array_n_entries; \ } struct Header struct_Header__contents; struct Header__packed struct_Header__contents _packed_; assert_cc(sizeof(struct Header) == sizeof(struct Header__packed)); -assert_cc(sizeof(struct Header) == 256); +assert_cc(sizeof(struct Header) == 264); #define FSS_HEADER_SIGNATURE \ ((const char[]) { 'K', 'S', 'H', 'H', 'R', 'H', 'L', 'P' }) diff --git a/src/libsystemd/sd-journal/journal-file.c b/src/libsystemd/sd-journal/journal-file.c index 67bd2305adb..7dbbd4889c4 100644 --- a/src/libsystemd/sd-journal/journal-file.c +++ b/src/libsystemd/sd-journal/journal-file.c @@ -662,7 +662,7 @@ static int journal_file_move_to( return mmap_cache_fd_get(f->cache_fd, type_to_context(type), keep_always, offset, size, &f->last_stat, ret); } -static uint64_t minimum_header_size(Object *o) { +static uint64_t minimum_header_size(JournalFile *f, Object *o) { static const uint64_t table[] = { [OBJECT_DATA] = sizeof(DataObject), @@ -674,15 +674,22 @@ static uint64_t minimum_header_size(Object *o) { [OBJECT_TAG] = sizeof(TagObject), }; + assert(f); + assert(o); + + if (o->object.type == OBJECT_DATA) + return journal_file_data_payload_offset(f); + if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0) return sizeof(ObjectHeader); return table[o->object.type]; } -static int check_object_header(Object *o, ObjectType type, uint64_t offset) { +static int check_object_header(JournalFile *f, Object *o, ObjectType type, uint64_t offset) { uint64_t s; + assert(f); assert(o); s = le64toh(READ_NOW(o->object.size)); @@ -706,7 +713,7 @@ static int check_object_header(Object *o, ObjectType type, uint64_t offset) { "Attempt to move to object of unexpected type: %" PRIu64, offset); - if (s < minimum_header_size(o)) + if (s < minimum_header_size(f, o)) return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), "Attempt to move to truncated object: %" PRIu64, offset); @@ -728,10 +735,10 @@ static int check_object(JournalFile *f, Object *o, uint64_t offset) { le64toh(o->data.n_entries), offset); - if (le64toh(o->object.size) <= offsetof(Object, data.payload)) + if (le64toh(o->object.size) <= journal_file_data_payload_offset(f)) return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), "Bad object size (<= %zu): %" PRIu64 ": %" PRIu64, - offsetof(Object, data.payload), + journal_file_data_payload_offset(f), le64toh(o->object.size), offset); @@ -883,7 +890,7 @@ int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset if (r < 0) return r; - r = check_object_header(o, type, offset); + r = check_object_header(f, o, type, offset); if (r < 0) return r; @@ -891,7 +898,7 @@ int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset if (r < 0) return r; - r = check_object_header(o, type, offset); + r = check_object_header(f, o, type, offset); if (r < 0) return r; @@ -935,11 +942,11 @@ int journal_file_read_object_header(JournalFile *f, ObjectType type, uint64_t of "Failed to read short object at offset: %" PRIu64, offset); - r = check_object_header(&o, type, offset); + r = check_object_header(f, &o, type, offset); if (r < 0) return r; - if ((size_t) n < minimum_header_size(&o)) + if ((size_t) n < minimum_header_size(f, &o)) return log_debug_errno(SYNTHETIC_ERRNO(EIO), "Short read while reading object: %" PRIu64, offset); @@ -1541,15 +1548,35 @@ static int journal_file_append_field( return 0; } +static Compression maybe_compress_payload(JournalFile *f, uint8_t *dst, const uint8_t *src, uint64_t size, size_t *rsize) { + Compression compression = COMPRESSION_NONE; + +#if HAVE_COMPRESSION + if (JOURNAL_FILE_COMPRESS(f) && size >= f->compress_threshold_bytes) { + compression = compress_blob(src, size, dst, size - 1, rsize); + if (compression > 0) { + log_debug("Compressed data object %"PRIu64" -> %zu using %s", + size, *rsize, compression_to_string(compression)); + } else + /* Compression didn't work, we don't really care why, let's continue without compression */ + compression = COMPRESSION_NONE; + } +#endif + + return compression; +} + static int journal_file_append_data( JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *ret_offset) { - uint64_t hash, p, fp, osize; + uint64_t hash, p, osize; Object *o, *fo; - int r, compression = 0; + size_t rsize = 0; + Compression c; const void *eq; + int r; assert(f); @@ -1568,32 +1595,20 @@ static int journal_file_append_data( if (!eq) return -EINVAL; - osize = offsetof(Object, data.payload) + size; + osize = journal_file_data_payload_offset(f) + size; r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p); if (r < 0) return r; o->data.hash = htole64(hash); -#if HAVE_COMPRESSION - if (JOURNAL_FILE_COMPRESS(f) && size >= f->compress_threshold_bytes) { - size_t rsize = 0; + c = maybe_compress_payload(f, journal_file_data_payload_field(f, o), data, size, &rsize); - compression = compress_blob(data, size, o->data.payload, size - 1, &rsize); - if (compression > COMPRESSION_NONE) { - o->object.size = htole64(offsetof(Object, data.payload) + rsize); - o->object.flags |= COMPRESSION_TO_OBJECT_FLAG(compression); - - log_debug("Compressed data object %"PRIu64" -> %zu using %s", - size, rsize, compression_to_string(compression)); - } else - /* Compression didn't work, we don't really care why, let's continue without compression */ - compression = COMPRESSION_NONE; - } -#endif - - if (compression == 0) - memcpy_safe(o->data.payload, data, size); + if (c != COMPRESSION_NONE) { + o->object.size = htole64(journal_file_data_payload_offset(f) + rsize); + o->object.flags |= COMPRESSION_TO_OBJECT_FLAG(c); + } else + memcpy_safe(journal_file_data_payload_field(f, o), data, size); r = journal_file_link_data(f, o, p, hash); if (r < 0) @@ -1611,7 +1626,7 @@ static int journal_file_append_data( #endif /* Create field object ... */ - r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, &fp); + r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, NULL); if (r < 0) return r; @@ -1715,17 +1730,17 @@ int journal_file_data_payload( } size = le64toh(READ_NOW(o->object.size)); - if (size < offsetof(Object, data.payload)) + if (size < journal_file_data_payload_offset(f)) return -EBADMSG; - size -= offsetof(Object, data.payload); + size -= journal_file_data_payload_offset(f); c = COMPRESSION_FROM_OBJECT(o); if (c < 0) return -EPROTONOSUPPORT; - return maybe_decompress_payload(f, o->data.payload, size, c, field, field_length, data_threshold, - ret_data, ret_size); + return maybe_decompress_payload(f, journal_file_data_payload_field(f, o), size, c, field, + field_length, data_threshold, ret_data, ret_size); } uint64_t journal_file_entry_n_items(JournalFile *f, Object *o) { @@ -1788,6 +1803,8 @@ static void write_entry_array_item(JournalFile *f, Object *o, uint64_t i, uint64 static int link_entry_into_array(JournalFile *f, le64_t *first, le64_t *idx, + le32_t *tail, + le32_t *tidx, uint64_t p) { int r; uint64_t n = 0, ap = 0, q, i, a, hidx; @@ -1799,8 +1816,9 @@ static int link_entry_into_array(JournalFile *f, assert(idx); assert(p > 0); - a = le64toh(*first); - i = hidx = le64toh(READ_NOW(*idx)); + a = tail ? le32toh(*tail) : le64toh(*first); + hidx = le64toh(READ_NOW(*idx)); + i = tidx ? le32toh(READ_NOW(*tidx)) : hidx; while (a > 0) { r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o); @@ -1811,6 +1829,8 @@ static int link_entry_into_array(JournalFile *f, if (i < n) { write_entry_array_item(f, o, i, p); *idx = htole64(hidx + 1); + if (tidx) + *tidx = htole32(le32toh(*tidx) + 1); return 0; } @@ -1851,10 +1871,15 @@ static int link_entry_into_array(JournalFile *f, o->entry_array.next_entry_array_offset = htole64(q); } + if (tail) + *tail = htole32(q); + if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays)) f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1); *idx = htole64(hidx + 1); + if (tidx) + *tidx = htole32(1); return 0; } @@ -1863,6 +1888,8 @@ static int link_entry_into_array_plus_one(JournalFile *f, le64_t *extra, le64_t *first, le64_t *idx, + le32_t *tail, + le32_t *tidx, uint64_t p) { uint64_t hidx; @@ -1883,7 +1910,7 @@ static int link_entry_into_array_plus_one(JournalFile *f, le64_t i; i = htole64(hidx - 1); - r = link_entry_into_array(f, first, &i, p); + r = link_entry_into_array(f, first, &i, tail, tidx, p); if (r < 0) return r; } @@ -1907,6 +1934,8 @@ static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offs &o->data.entry_offset, &o->data.entry_array_offset, &o->data.n_entries, + JOURNAL_HEADER_COMPACT(f->header) ? &o->data.compact.tail_entry_array_offset : NULL, + JOURNAL_HEADER_COMPACT(f->header) ? &o->data.compact.tail_entry_array_n_entries : NULL, offset); } @@ -1933,6 +1962,8 @@ static int journal_file_link_entry( r = link_entry_into_array(f, &f->header->entry_array_offset, &f->header->n_entries, + JOURNAL_HEADER_CONTAINS(f->header, tail_entry_array_offset) ? &f->header->tail_entry_array_offset : NULL, + JOURNAL_HEADER_CONTAINS(f->header, tail_entry_array_n_entries) ? &f->header->tail_entry_array_n_entries : NULL, offset); if (r < 0) return r; diff --git a/src/libsystemd/sd-journal/journal-file.h b/src/libsystemd/sd-journal/journal-file.h index 79769537937..e5b9765471e 100644 --- a/src/libsystemd/sd-journal/journal-file.h +++ b/src/libsystemd/sd-journal/journal-file.h @@ -223,6 +223,16 @@ int journal_file_data_payload( void **ret_data, size_t *ret_size); +static inline size_t journal_file_data_payload_offset(JournalFile *f) { + return JOURNAL_HEADER_COMPACT(f->header) + ? offsetof(Object, data.compact.payload) + : offsetof(Object, data.regular.payload); +} + +static inline uint8_t* journal_file_data_payload_field(JournalFile *f, Object *o) { + return JOURNAL_HEADER_COMPACT(f->header) ? o->data.compact.payload : o->data.regular.payload; +} + uint64_t journal_file_entry_array_n_items(JournalFile *f, Object *o) _pure_; static inline uint64_t journal_file_entry_array_item(JournalFile *f, Object *o, size_t i) { diff --git a/src/libsystemd/sd-journal/journal-verify.c b/src/libsystemd/sd-journal/journal-verify.c index 37d2a656b21..8b2c468a0b7 100644 --- a/src/libsystemd/sd-journal/journal-verify.c +++ b/src/libsystemd/sd-journal/journal-verify.c @@ -170,16 +170,16 @@ static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o return -EBADMSG; } - if (le64toh(o->object.size) - offsetof(Object, data.payload) <= 0) { + if (le64toh(o->object.size) - journal_file_data_payload_offset(f) <= 0) { error(offset, "Bad object size (<= %zu): %"PRIu64, - offsetof(Object, data.payload), + journal_file_data_payload_offset(f), le64toh(o->object.size)); return -EBADMSG; } h1 = le64toh(o->data.hash); - r = hash_payload(f, o, offset, o->data.payload, - le64toh(o->object.size) - offsetof(Object, data.payload), + r = hash_payload(f, o, offset, journal_file_data_payload_field(f, o), + le64toh(o->object.size) - journal_file_data_payload_offset(f), &h2); if (r < 0) return r;