diff --git a/docs/JOURNAL_FILE_FORMAT.md b/docs/JOURNAL_FILE_FORMAT.md index 2d0debd858c..712f3bce36d 100644 --- a/docs/JOURNAL_FILE_FORMAT.md +++ b/docs/JOURNAL_FILE_FORMAT.md @@ -151,7 +151,7 @@ _packed_ struct Header { uint8_t reserved[7]; sd_id128_t file_id; sd_id128_t machine_id; - sd_id128_t boot_id; /* last writer */ + sd_id128_t tail_entry_boot_id; sd_id128_t seqnum_id; le64_t header_size; le64_t arena_size; @@ -192,8 +192,18 @@ new one. When journal file is first created the **file_id** is randomly and uniquely initialized. -When a writer opens a file it shall initialize the **boot_id** to the current -boot id of the system. +When a writer creates a file it shall initialize the **tail_entry_boot_id** to +the current boot ID of the system. When appending an entry it shall update the +field to the boot ID of that entry, so that it is guaranteed that the +**tail_entry_monotonic** field refers to a timestamp of the monotonic clock +associated with the boot with the ID indicated by the **tail_entry_boot_id** +field. (Compatibility note: in older versions of the journal, the field was +also supposed to be updated whenever the file was opened for any form of +writing, including when opened to mark it as archived. This behaviour has been +deemed problematic since without an associated boot ID the +**tail_entry_monotonic** field is useless. To indicate whether the boot ID is +updated only on append the JOURNAL_COMPATIBLE_TAIL_ENTRY_BOOT_ID is set. If it +is not set, the **tail_entry_monotonic** field is not usable). The currently used part of the file is the **header_size** plus the **arena_size** field of the header. If a writer needs to write to a file where @@ -222,7 +232,12 @@ timestamp of the last or first entry in the file, respectively, or 0 if no entry has been written yet. **tail_entry_monotonic** is the monotonic timestamp of the last entry in the -file, referring to monotonic time of the boot identified by **boot_id**. +file, referring to monotonic time of the boot identified by +**tail_entry_boot_id**, but only if the +JOURNAL_COMPATIBLE_TAIL_ENTRY_BOOT_ID feature flag is set, see above. If it +is not set, this field might refer to a different boot then the one in the +**tail_entry_boot_id** field, for example when the file was ultimately +archived. **data_hash_chain_depth** is a counter of the deepest chain in the data hash table, minus one. This is updated whenever a chain is found that is longer than @@ -268,7 +283,8 @@ enum { }; enum { - HEADER_COMPATIBLE_SEALED = 1 << 0, + HEADER_COMPATIBLE_SEALED = 1 << 0, + HEADER_COMPATIBLE_TAIL_ENTRY_BOOT_ID = 1 << 1, }; ``` @@ -288,6 +304,12 @@ format that uses less space on disk compared to the original format. HEADER_COMPATIBLE_SEALED indicates that the file includes TAG objects required for Forward Secure Sealing. +HEADER_COMPATIBLE_TAIL_ENTRY_BOOT_ID indicates whether the +**tail_entry_boot_id** field is strictly updated on initial creation of the +file and whenever an entry is updated (in which case the flag is set), or also +when the file is archived (in which case it is unset). New files should always +set this flag (and thus not update the **tail_entry_boot_id** except when +creating the file and when appending an entry to it. ## Dirty Detection diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c index dd31007a4de..de08c4e9651 100644 --- a/src/journal/journald-server.c +++ b/src/journal/journald-server.c @@ -873,10 +873,6 @@ static bool shall_try_append_again(JournalFile *f, int r) { log_ratelimit_warning(JOURNAL_LOG_RATELIMIT, "%s: Journal file has been deleted, rotating.", f->path); return true; - case -ETXTBSY: /* Journal file is from the future */ - log_ratelimit_warning(JOURNAL_LOG_RATELIMIT, "%s: Journal file is from the future, rotating.", f->path); - return true; - case -EREMCHG: /* Wallclock time (CLOCK_REALTIME) jumped backwards relative to last journal entry */ log_ratelimit_warning(JOURNAL_LOG_RATELIMIT, "%s: Realtime clock jumped backwards relative to last journal entry, rotating.", f->path); return true; diff --git a/src/journal/managed-journal-file.c b/src/journal/managed-journal-file.c index 81aecfe7cb9..538d999de0a 100644 --- a/src/journal/managed-journal-file.c +++ b/src/journal/managed-journal-file.c @@ -542,8 +542,7 @@ int managed_journal_file_open_reliably( -EBUSY, /* Unclean shutdown */ -ESHUTDOWN, /* Already archived */ -EIO, /* IO error, including SIGBUS on mmap */ - -EIDRM, /* File has been deleted */ - -ETXTBSY)) /* File is from the future */ + -EIDRM)) /* File has been deleted */ return r; if ((open_flags & O_ACCMODE) == O_RDONLY) diff --git a/src/journal/test-journal-interleaving.c b/src/journal/test-journal-interleaving.c index 55d717da316..7fec6d9eea2 100644 --- a/src/journal/test-journal-interleaving.c +++ b/src/journal/test-journal-interleaving.c @@ -230,7 +230,7 @@ static void test_sequence_numbers_one(void) { assert_se(one->file->header->state == STATE_ONLINE); assert_se(!sd_id128_equal(one->file->header->file_id, one->file->header->machine_id)); - assert_se(!sd_id128_equal(one->file->header->file_id, one->file->header->boot_id)); + assert_se(!sd_id128_equal(one->file->header->file_id, one->file->header->tail_entry_boot_id)); assert_se(sd_id128_equal(one->file->header->file_id, one->file->header->seqnum_id)); memcpy(&seqnum_id, &one->file->header->seqnum_id, sizeof(sd_id128_t)); @@ -241,7 +241,7 @@ static void test_sequence_numbers_one(void) { assert_se(two->file->header->state == STATE_ONLINE); assert_se(!sd_id128_equal(two->file->header->file_id, one->file->header->file_id)); assert_se(sd_id128_equal(one->file->header->machine_id, one->file->header->machine_id)); - assert_se(sd_id128_equal(one->file->header->boot_id, one->file->header->boot_id)); + assert_se(sd_id128_equal(one->file->header->tail_entry_boot_id, one->file->header->tail_entry_boot_id)); assert_se(sd_id128_equal(one->file->header->seqnum_id, one->file->header->seqnum_id)); append_number(two, 3, &seqnum); diff --git a/src/libsystemd/sd-journal/journal-authenticate.c b/src/libsystemd/sd-journal/journal-authenticate.c index 3c5d9d7e497..159e2153679 100644 --- a/src/libsystemd/sd-journal/journal-authenticate.c +++ b/src/libsystemd/sd-journal/journal-authenticate.c @@ -300,7 +300,7 @@ int journal_file_hmac_put_header(JournalFile *f) { * n_entry_arrays. */ gcry_md_write(f->hmac, f->header->signature, offsetof(Header, state) - offsetof(Header, signature)); - gcry_md_write(f->hmac, &f->header->file_id, offsetof(Header, boot_id) - offsetof(Header, file_id)); + gcry_md_write(f->hmac, &f->header->file_id, offsetof(Header, tail_entry_boot_id) - offsetof(Header, file_id)); gcry_md_write(f->hmac, &f->header->seqnum_id, offsetof(Header, arena_size) - offsetof(Header, seqnum_id)); gcry_md_write(f->hmac, &f->header->data_hash_table_offset, offsetof(Header, tail_object_offset) - offsetof(Header, data_hash_table_offset)); diff --git a/src/libsystemd/sd-journal/journal-def.h b/src/libsystemd/sd-journal/journal-def.h index d35290d3c70..fb22fc45f30 100644 --- a/src/libsystemd/sd-journal/journal-def.h +++ b/src/libsystemd/sd-journal/journal-def.h @@ -173,32 +173,31 @@ enum { HEADER_INCOMPATIBLE_KEYED_HASH = 1 << 2, HEADER_INCOMPATIBLE_COMPRESSED_ZSTD = 1 << 3, HEADER_INCOMPATIBLE_COMPACT = 1 << 4, + + HEADER_INCOMPATIBLE_ANY = HEADER_INCOMPATIBLE_COMPRESSED_XZ | + HEADER_INCOMPATIBLE_COMPRESSED_LZ4 | + HEADER_INCOMPATIBLE_KEYED_HASH | + HEADER_INCOMPATIBLE_COMPRESSED_ZSTD | + HEADER_INCOMPATIBLE_COMPACT, + + HEADER_INCOMPATIBLE_SUPPORTED = (HAVE_XZ ? HEADER_INCOMPATIBLE_COMPRESSED_XZ : 0) | + (HAVE_LZ4 ? HEADER_INCOMPATIBLE_COMPRESSED_LZ4 : 0) | + (HAVE_ZSTD ? HEADER_INCOMPATIBLE_COMPRESSED_ZSTD : 0) | + HEADER_INCOMPATIBLE_KEYED_HASH | + HEADER_INCOMPATIBLE_COMPACT, }; -#define HEADER_INCOMPATIBLE_ANY \ - (HEADER_INCOMPATIBLE_COMPRESSED_XZ | \ - HEADER_INCOMPATIBLE_COMPRESSED_LZ4 | \ - HEADER_INCOMPATIBLE_KEYED_HASH | \ - HEADER_INCOMPATIBLE_COMPRESSED_ZSTD | \ - HEADER_INCOMPATIBLE_COMPACT) - -#define HEADER_INCOMPATIBLE_SUPPORTED \ - ((HAVE_XZ ? HEADER_INCOMPATIBLE_COMPRESSED_XZ : 0) | \ - (HAVE_LZ4 ? HEADER_INCOMPATIBLE_COMPRESSED_LZ4 : 0) | \ - (HAVE_ZSTD ? HEADER_INCOMPATIBLE_COMPRESSED_ZSTD : 0) | \ - HEADER_INCOMPATIBLE_KEYED_HASH | \ - HEADER_INCOMPATIBLE_COMPACT) enum { - HEADER_COMPATIBLE_SEALED = 1 << 0, + HEADER_COMPATIBLE_SEALED = 1 << 0, + HEADER_COMPATIBLE_TAIL_ENTRY_BOOT_ID = 1 << 1, /* if set, the last_entry_boot_id field in the header is exclusively refreshed when an entry is appended */ + HEADER_COMPATIBLE_ANY = HEADER_COMPATIBLE_SEALED| + HEADER_COMPATIBLE_TAIL_ENTRY_BOOT_ID, + + HEADER_COMPATIBLE_SUPPORTED = (HAVE_GCRYPT ? HEADER_COMPATIBLE_SEALED : 0) | + HEADER_COMPATIBLE_TAIL_ENTRY_BOOT_ID, }; -#define HEADER_COMPATIBLE_ANY HEADER_COMPATIBLE_SEALED -#if HAVE_GCRYPT -# define HEADER_COMPATIBLE_SUPPORTED HEADER_COMPATIBLE_SEALED -#else -# define HEADER_COMPATIBLE_SUPPORTED 0 -#endif #define HEADER_SIGNATURE \ ((const uint8_t[]) { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' }) @@ -211,7 +210,7 @@ enum { uint8_t reserved[7]; \ sd_id128_t file_id; \ sd_id128_t machine_id; \ - sd_id128_t boot_id; /* last writer */ \ + sd_id128_t tail_entry_boot_id; \ sd_id128_t seqnum_id; \ le64_t header_size; \ le64_t arena_size; \ diff --git a/src/libsystemd/sd-journal/journal-file.c b/src/libsystemd/sd-journal/journal-file.c index aab33dbfcca..b1064a69820 100644 --- a/src/libsystemd/sd-journal/journal-file.c +++ b/src/libsystemd/sd-journal/journal-file.c @@ -357,7 +357,9 @@ static int journal_file_init_header( FLAGS_SET(file_flags, JOURNAL_COMPRESS) * COMPRESSION_TO_HEADER_INCOMPATIBLE_FLAG(DEFAULT_COMPRESSION) | keyed_hash_requested() * HEADER_INCOMPATIBLE_KEYED_HASH | compact_mode_requested() * HEADER_INCOMPATIBLE_COMPACT), - .compatible_flags = htole32(seal * HEADER_COMPATIBLE_SEALED), + .compatible_flags = htole32( + (seal * HEADER_COMPATIBLE_SEALED) | + HEADER_COMPATIBLE_TAIL_ENTRY_BOOT_ID), }; assert_cc(sizeof(h.signature) == sizeof(HEADER_SIGNATURE)); @@ -367,6 +369,11 @@ static int journal_file_init_header( if (r < 0) return r; + r = sd_id128_get_machine(&h.machine_id); + if (r < 0 && !ERRNO_IS_MACHINE_ID_UNSET(r)) + return r; /* If we have no valid machine ID (test environment?), let's simply leave the + * machine ID field all zeroes. */ + if (template) { h.seqnum_id = template->header->seqnum_id; h.tail_entry_seqnum = template->header->tail_entry_seqnum; @@ -388,18 +395,8 @@ static int journal_file_refresh_header(JournalFile *f) { assert(f); assert(f->header); - r = sd_id128_get_machine(&f->header->machine_id); - if (r < 0) { - if (!ERRNO_IS_MACHINE_ID_UNSET(r)) - return r; - - /* don't have a machine-id, let's continue without */ - f->header->machine_id = SD_ID128_NULL; - } - - r = sd_id128_get_boot(&f->header->boot_id); - if (r < 0) - return r; + /* We used to update the header's boot ID field here, but we don't do that anymore, as per + * HEADER_COMPATIBLE_TAIL_ENTRY_BOOT_ID */ r = journal_file_set_online(f); @@ -514,8 +511,12 @@ static int journal_file_verify_header(JournalFile *f) { int r; r = sd_id128_get_machine(&machine_id); - if (r < 0) - return r; + if (r < 0) { + if (!ERRNO_IS_MACHINE_ID_UNSET(r)) /* handle graceful if machine ID is not initialized yet */ + return r; + + machine_id = SD_ID128_NULL; + } if (!sd_id128_equal(machine_id, f->header->machine_id)) return log_debug_errno(SYNTHETIC_ERRNO(EHOSTDOWN), @@ -536,14 +537,6 @@ static int journal_file_verify_header(JournalFile *f) { if (f->header->field_hash_table_size == 0 || f->header->data_hash_table_size == 0) return -EBADMSG; - - /* Don't permit appending to files from the future. Because otherwise the realtime timestamps wouldn't - * be strictly ordered in the entries in the file anymore, and we can't have that since it breaks - * bisection. */ - if (le64toh(f->header->tail_entry_realtime) > now(CLOCK_REALTIME)) - return log_debug_errno(SYNTHETIC_ERRNO(ETXTBSY), - "Journal file %s is from the future, refusing to append new data to it that'd be older.", - f->path); } return 0; @@ -2090,6 +2083,7 @@ static int journal_file_append_entry_internal( JournalFile *f, const dual_timestamp *ts, const sd_id128_t *boot_id, + const sd_id128_t *machine_id, uint64_t xor_hash, const EntryItem items[], size_t n_items, @@ -2126,9 +2120,9 @@ static int journal_file_append_entry_internal( "timestamp %" PRIu64 ", refusing entry.", ts->realtime, le64toh(f->header->tail_entry_realtime)); - if (!sd_id128_is_null(f->header->boot_id) && boot_id) { + if (!sd_id128_is_null(f->header->tail_entry_boot_id) && boot_id) { - if (!sd_id128_equal(f->header->boot_id, *boot_id)) + if (!sd_id128_equal(f->header->tail_entry_boot_id, *boot_id)) return log_debug_errno(SYNTHETIC_ERRNO(EREMOTE), "Boot ID to write is different from previous boot id, refusing entry."); @@ -2155,6 +2149,10 @@ static int journal_file_append_entry_internal( } } + if (machine_id && sd_id128_is_null(f->header->machine_id)) + /* Initialize machine ID when not set yet */ + f->header->machine_id = *machine_id; + osize = offsetof(Object, entry.items) + (n_items * journal_file_entry_item_size(f)); r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np); @@ -2166,8 +2164,8 @@ static int journal_file_append_entry_internal( o->entry.monotonic = htole64(ts->monotonic); o->entry.xor_hash = htole64(xor_hash); if (boot_id) - f->header->boot_id = *boot_id; - o->entry.boot_id = f->header->boot_id; + f->header->tail_entry_boot_id = *boot_id; + o->entry.boot_id = f->header->tail_entry_boot_id; for (size_t i = 0; i < n_items; i++) write_entry_item(f, o, i, &items[i]); @@ -2314,7 +2312,7 @@ int journal_file_append_entry( EntryItem *items; uint64_t xor_hash = 0; struct dual_timestamp _ts; - sd_id128_t _boot_id; + sd_id128_t _boot_id, _machine_id, *machine_id; int r; assert(f); @@ -2344,6 +2342,16 @@ int journal_file_append_entry( boot_id = &_boot_id; } + r = sd_id128_get_machine(&_machine_id); + if (r < 0) { + if (!ERRNO_IS_MACHINE_ID_UNSET(r)) + return r; + + /* If the machine ID is not initialized yet, handle gracefully */ + machine_id = NULL; + } else + machine_id = &_machine_id; + #if HAVE_GCRYPT r = journal_file_maybe_append_tag(f, ts->realtime); if (r < 0) @@ -2393,7 +2401,18 @@ int journal_file_append_entry( typesafe_qsort(items, n_iovec, entry_item_cmp); n_iovec = remove_duplicate_entry_items(items, n_iovec); - r = journal_file_append_entry_internal(f, ts, boot_id, xor_hash, items, n_iovec, seqnum, seqnum_id, ret_object, ret_offset); + r = journal_file_append_entry_internal( + f, + ts, + boot_id, + machine_id, + xor_hash, + items, + n_iovec, + seqnum, + seqnum_id, + ret_object, + ret_offset); /* If the memory mapping triggered a SIGBUS then we return an * IO error and ignore the error code passed down to us, since @@ -3567,7 +3586,7 @@ void journal_file_print_header(JournalFile *f) { "Boot ID: %s\n" "Sequential number ID: %s\n" "State: %s\n" - "Compatible flags:%s%s\n" + "Compatible flags:%s%s%s\n" "Incompatible flags:%s%s%s%s%s%s\n" "Header size: %"PRIu64"\n" "Arena size: %"PRIu64"\n" @@ -3584,12 +3603,13 @@ void journal_file_print_header(JournalFile *f) { f->path, SD_ID128_TO_STRING(f->header->file_id), SD_ID128_TO_STRING(f->header->machine_id), - SD_ID128_TO_STRING(f->header->boot_id), + SD_ID128_TO_STRING(f->header->tail_entry_boot_id), SD_ID128_TO_STRING(f->header->seqnum_id), f->header->state == STATE_OFFLINE ? "OFFLINE" : f->header->state == STATE_ONLINE ? "ONLINE" : f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN", JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "", + JOURNAL_HEADER_TAIL_ENTRY_BOOT_ID(f->header) ? " TAIL_ENTRY_BOOT_ID" : "", (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_ANY) ? " ???" : "", JOURNAL_HEADER_COMPRESSED_XZ(f->header) ? " COMPRESSED-XZ" : "", JOURNAL_HEADER_COMPRESSED_LZ4(f->header) ? " COMPRESSED-LZ4" : "", @@ -4192,7 +4212,18 @@ int journal_file_copy_entry( return r; } - r = journal_file_append_entry_internal(to, &ts, boot_id, xor_hash, items, n, seqnum, seqnum_id, NULL, NULL); + r = journal_file_append_entry_internal( + to, + &ts, + boot_id, + &from->header->machine_id, + xor_hash, + items, + n, + seqnum, + seqnum_id, + /* ret_object= */ NULL, + /* ret_offset= */ NULL); if (mmap_cache_fd_got_sigbus(to->cache_fd)) return -EIO; diff --git a/src/libsystemd/sd-journal/journal-file.h b/src/libsystemd/sd-journal/journal-file.h index 07f1f5d1806..c1f1ab4e4fc 100644 --- a/src/libsystemd/sd-journal/journal-file.h +++ b/src/libsystemd/sd-journal/journal-file.h @@ -180,6 +180,9 @@ static inline bool VALID_EPOCH(uint64_t u) { #define JOURNAL_HEADER_SEALED(h) \ FLAGS_SET(le32toh((h)->compatible_flags), HEADER_COMPATIBLE_SEALED) +#define JOURNAL_HEADER_TAIL_ENTRY_BOOT_ID(h) \ + FLAGS_SET(le32toh((h)->compatible_flags), HEADER_COMPATIBLE_TAIL_ENTRY_BOOT_ID) + #define JOURNAL_HEADER_COMPRESSED_XZ(h) \ FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_COMPRESSED_XZ) diff --git a/src/libsystemd/sd-journal/journal-verify.c b/src/libsystemd/sd-journal/journal-verify.c index b4ce3881a44..8232f53eb66 100644 --- a/src/libsystemd/sd-journal/journal-verify.c +++ b/src/libsystemd/sd-journal/journal-verify.c @@ -1297,7 +1297,8 @@ int journal_file_verify( } if (entry_monotonic_set && - (sd_id128_equal(entry_boot_id, f->header->boot_id) && + (sd_id128_equal(entry_boot_id, f->header->tail_entry_boot_id) && + JOURNAL_HEADER_TAIL_ENTRY_BOOT_ID(f->header) && entry_monotonic != le64toh(f->header->tail_entry_monotonic))) { error(0, "Invalid tail monotonic timestamp (%"PRIu64" != %"PRIu64")",