1
0
mirror of https://github.com/systemd/systemd.git synced 2024-10-29 21:55:36 +03:00

Merge pull request #26265 from poettering/journal-refresh-fixes

journal: journal file header IDs refresh fixes and corrections
This commit is contained in:
Lennart Poettering 2023-02-21 18:19:12 +01:00 committed by GitHub
commit b469b969f3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 119 additions and 68 deletions

View File

@ -151,7 +151,7 @@ _packed_ struct Header {
uint8_t reserved[7];
sd_id128_t file_id;
sd_id128_t machine_id;
sd_id128_t boot_id; /* last writer */
sd_id128_t tail_entry_boot_id;
sd_id128_t seqnum_id;
le64_t header_size;
le64_t arena_size;
@ -192,8 +192,18 @@ new one.
When journal file is first created the **file_id** is randomly and uniquely
initialized.
When a writer opens a file it shall initialize the **boot_id** to the current
boot id of the system.
When a writer creates a file it shall initialize the **tail_entry_boot_id** to
the current boot ID of the system. When appending an entry it shall update the
field to the boot ID of that entry, so that it is guaranteed that the
**tail_entry_monotonic** field refers to a timestamp of the monotonic clock
associated with the boot with the ID indicated by the **tail_entry_boot_id**
field. (Compatibility note: in older versions of the journal, the field was
also supposed to be updated whenever the file was opened for any form of
writing, including when opened to mark it as archived. This behaviour has been
deemed problematic since without an associated boot ID the
**tail_entry_monotonic** field is useless. To indicate whether the boot ID is
updated only on append the JOURNAL_COMPATIBLE_TAIL_ENTRY_BOOT_ID is set. If it
is not set, the **tail_entry_monotonic** field is not usable).
The currently used part of the file is the **header_size** plus the
**arena_size** field of the header. If a writer needs to write to a file where
@ -222,7 +232,12 @@ timestamp of the last or first entry in the file, respectively, or 0 if no
entry has been written yet.
**tail_entry_monotonic** is the monotonic timestamp of the last entry in the
file, referring to monotonic time of the boot identified by **boot_id**.
file, referring to monotonic time of the boot identified by
**tail_entry_boot_id**, but only if the
JOURNAL_COMPATIBLE_TAIL_ENTRY_BOOT_ID feature flag is set, see above. If it
is not set, this field might refer to a different boot then the one in the
**tail_entry_boot_id** field, for example when the file was ultimately
archived.
**data_hash_chain_depth** is a counter of the deepest chain in the data hash
table, minus one. This is updated whenever a chain is found that is longer than
@ -268,7 +283,8 @@ enum {
};
enum {
HEADER_COMPATIBLE_SEALED = 1 << 0,
HEADER_COMPATIBLE_SEALED = 1 << 0,
HEADER_COMPATIBLE_TAIL_ENTRY_BOOT_ID = 1 << 1,
};
```
@ -288,6 +304,12 @@ format that uses less space on disk compared to the original format.
HEADER_COMPATIBLE_SEALED indicates that the file includes TAG objects required
for Forward Secure Sealing.
HEADER_COMPATIBLE_TAIL_ENTRY_BOOT_ID indicates whether the
**tail_entry_boot_id** field is strictly updated on initial creation of the
file and whenever an entry is updated (in which case the flag is set), or also
when the file is archived (in which case it is unset). New files should always
set this flag (and thus not update the **tail_entry_boot_id** except when
creating the file and when appending an entry to it.
## Dirty Detection

View File

@ -873,10 +873,6 @@ static bool shall_try_append_again(JournalFile *f, int r) {
log_ratelimit_warning(JOURNAL_LOG_RATELIMIT, "%s: Journal file has been deleted, rotating.", f->path);
return true;
case -ETXTBSY: /* Journal file is from the future */
log_ratelimit_warning(JOURNAL_LOG_RATELIMIT, "%s: Journal file is from the future, rotating.", f->path);
return true;
case -EREMCHG: /* Wallclock time (CLOCK_REALTIME) jumped backwards relative to last journal entry */
log_ratelimit_warning(JOURNAL_LOG_RATELIMIT, "%s: Realtime clock jumped backwards relative to last journal entry, rotating.", f->path);
return true;

View File

@ -542,8 +542,7 @@ int managed_journal_file_open_reliably(
-EBUSY, /* Unclean shutdown */
-ESHUTDOWN, /* Already archived */
-EIO, /* IO error, including SIGBUS on mmap */
-EIDRM, /* File has been deleted */
-ETXTBSY)) /* File is from the future */
-EIDRM)) /* File has been deleted */
return r;
if ((open_flags & O_ACCMODE) == O_RDONLY)

View File

@ -230,7 +230,7 @@ static void test_sequence_numbers_one(void) {
assert_se(one->file->header->state == STATE_ONLINE);
assert_se(!sd_id128_equal(one->file->header->file_id, one->file->header->machine_id));
assert_se(!sd_id128_equal(one->file->header->file_id, one->file->header->boot_id));
assert_se(!sd_id128_equal(one->file->header->file_id, one->file->header->tail_entry_boot_id));
assert_se(sd_id128_equal(one->file->header->file_id, one->file->header->seqnum_id));
memcpy(&seqnum_id, &one->file->header->seqnum_id, sizeof(sd_id128_t));
@ -241,7 +241,7 @@ static void test_sequence_numbers_one(void) {
assert_se(two->file->header->state == STATE_ONLINE);
assert_se(!sd_id128_equal(two->file->header->file_id, one->file->header->file_id));
assert_se(sd_id128_equal(one->file->header->machine_id, one->file->header->machine_id));
assert_se(sd_id128_equal(one->file->header->boot_id, one->file->header->boot_id));
assert_se(sd_id128_equal(one->file->header->tail_entry_boot_id, one->file->header->tail_entry_boot_id));
assert_se(sd_id128_equal(one->file->header->seqnum_id, one->file->header->seqnum_id));
append_number(two, 3, &seqnum);

View File

@ -300,7 +300,7 @@ int journal_file_hmac_put_header(JournalFile *f) {
* n_entry_arrays. */
gcry_md_write(f->hmac, f->header->signature, offsetof(Header, state) - offsetof(Header, signature));
gcry_md_write(f->hmac, &f->header->file_id, offsetof(Header, boot_id) - offsetof(Header, file_id));
gcry_md_write(f->hmac, &f->header->file_id, offsetof(Header, tail_entry_boot_id) - offsetof(Header, file_id));
gcry_md_write(f->hmac, &f->header->seqnum_id, offsetof(Header, arena_size) - offsetof(Header, seqnum_id));
gcry_md_write(f->hmac, &f->header->data_hash_table_offset, offsetof(Header, tail_object_offset) - offsetof(Header, data_hash_table_offset));

View File

@ -173,32 +173,31 @@ enum {
HEADER_INCOMPATIBLE_KEYED_HASH = 1 << 2,
HEADER_INCOMPATIBLE_COMPRESSED_ZSTD = 1 << 3,
HEADER_INCOMPATIBLE_COMPACT = 1 << 4,
HEADER_INCOMPATIBLE_ANY = HEADER_INCOMPATIBLE_COMPRESSED_XZ |
HEADER_INCOMPATIBLE_COMPRESSED_LZ4 |
HEADER_INCOMPATIBLE_KEYED_HASH |
HEADER_INCOMPATIBLE_COMPRESSED_ZSTD |
HEADER_INCOMPATIBLE_COMPACT,
HEADER_INCOMPATIBLE_SUPPORTED = (HAVE_XZ ? HEADER_INCOMPATIBLE_COMPRESSED_XZ : 0) |
(HAVE_LZ4 ? HEADER_INCOMPATIBLE_COMPRESSED_LZ4 : 0) |
(HAVE_ZSTD ? HEADER_INCOMPATIBLE_COMPRESSED_ZSTD : 0) |
HEADER_INCOMPATIBLE_KEYED_HASH |
HEADER_INCOMPATIBLE_COMPACT,
};
#define HEADER_INCOMPATIBLE_ANY \
(HEADER_INCOMPATIBLE_COMPRESSED_XZ | \
HEADER_INCOMPATIBLE_COMPRESSED_LZ4 | \
HEADER_INCOMPATIBLE_KEYED_HASH | \
HEADER_INCOMPATIBLE_COMPRESSED_ZSTD | \
HEADER_INCOMPATIBLE_COMPACT)
#define HEADER_INCOMPATIBLE_SUPPORTED \
((HAVE_XZ ? HEADER_INCOMPATIBLE_COMPRESSED_XZ : 0) | \
(HAVE_LZ4 ? HEADER_INCOMPATIBLE_COMPRESSED_LZ4 : 0) | \
(HAVE_ZSTD ? HEADER_INCOMPATIBLE_COMPRESSED_ZSTD : 0) | \
HEADER_INCOMPATIBLE_KEYED_HASH | \
HEADER_INCOMPATIBLE_COMPACT)
enum {
HEADER_COMPATIBLE_SEALED = 1 << 0,
HEADER_COMPATIBLE_SEALED = 1 << 0,
HEADER_COMPATIBLE_TAIL_ENTRY_BOOT_ID = 1 << 1, /* if set, the last_entry_boot_id field in the header is exclusively refreshed when an entry is appended */
HEADER_COMPATIBLE_ANY = HEADER_COMPATIBLE_SEALED|
HEADER_COMPATIBLE_TAIL_ENTRY_BOOT_ID,
HEADER_COMPATIBLE_SUPPORTED = (HAVE_GCRYPT ? HEADER_COMPATIBLE_SEALED : 0) |
HEADER_COMPATIBLE_TAIL_ENTRY_BOOT_ID,
};
#define HEADER_COMPATIBLE_ANY HEADER_COMPATIBLE_SEALED
#if HAVE_GCRYPT
# define HEADER_COMPATIBLE_SUPPORTED HEADER_COMPATIBLE_SEALED
#else
# define HEADER_COMPATIBLE_SUPPORTED 0
#endif
#define HEADER_SIGNATURE \
((const uint8_t[]) { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' })
@ -211,7 +210,7 @@ enum {
uint8_t reserved[7]; \
sd_id128_t file_id; \
sd_id128_t machine_id; \
sd_id128_t boot_id; /* last writer */ \
sd_id128_t tail_entry_boot_id; \
sd_id128_t seqnum_id; \
le64_t header_size; \
le64_t arena_size; \

View File

@ -357,7 +357,9 @@ static int journal_file_init_header(
FLAGS_SET(file_flags, JOURNAL_COMPRESS) * COMPRESSION_TO_HEADER_INCOMPATIBLE_FLAG(DEFAULT_COMPRESSION) |
keyed_hash_requested() * HEADER_INCOMPATIBLE_KEYED_HASH |
compact_mode_requested() * HEADER_INCOMPATIBLE_COMPACT),
.compatible_flags = htole32(seal * HEADER_COMPATIBLE_SEALED),
.compatible_flags = htole32(
(seal * HEADER_COMPATIBLE_SEALED) |
HEADER_COMPATIBLE_TAIL_ENTRY_BOOT_ID),
};
assert_cc(sizeof(h.signature) == sizeof(HEADER_SIGNATURE));
@ -367,6 +369,11 @@ static int journal_file_init_header(
if (r < 0)
return r;
r = sd_id128_get_machine(&h.machine_id);
if (r < 0 && !ERRNO_IS_MACHINE_ID_UNSET(r))
return r; /* If we have no valid machine ID (test environment?), let's simply leave the
* machine ID field all zeroes. */
if (template) {
h.seqnum_id = template->header->seqnum_id;
h.tail_entry_seqnum = template->header->tail_entry_seqnum;
@ -388,18 +395,8 @@ static int journal_file_refresh_header(JournalFile *f) {
assert(f);
assert(f->header);
r = sd_id128_get_machine(&f->header->machine_id);
if (r < 0) {
if (!ERRNO_IS_MACHINE_ID_UNSET(r))
return r;
/* don't have a machine-id, let's continue without */
f->header->machine_id = SD_ID128_NULL;
}
r = sd_id128_get_boot(&f->header->boot_id);
if (r < 0)
return r;
/* We used to update the header's boot ID field here, but we don't do that anymore, as per
* HEADER_COMPATIBLE_TAIL_ENTRY_BOOT_ID */
r = journal_file_set_online(f);
@ -514,8 +511,12 @@ static int journal_file_verify_header(JournalFile *f) {
int r;
r = sd_id128_get_machine(&machine_id);
if (r < 0)
return r;
if (r < 0) {
if (!ERRNO_IS_MACHINE_ID_UNSET(r)) /* handle graceful if machine ID is not initialized yet */
return r;
machine_id = SD_ID128_NULL;
}
if (!sd_id128_equal(machine_id, f->header->machine_id))
return log_debug_errno(SYNTHETIC_ERRNO(EHOSTDOWN),
@ -536,14 +537,6 @@ static int journal_file_verify_header(JournalFile *f) {
if (f->header->field_hash_table_size == 0 || f->header->data_hash_table_size == 0)
return -EBADMSG;
/* Don't permit appending to files from the future. Because otherwise the realtime timestamps wouldn't
* be strictly ordered in the entries in the file anymore, and we can't have that since it breaks
* bisection. */
if (le64toh(f->header->tail_entry_realtime) > now(CLOCK_REALTIME))
return log_debug_errno(SYNTHETIC_ERRNO(ETXTBSY),
"Journal file %s is from the future, refusing to append new data to it that'd be older.",
f->path);
}
return 0;
@ -2090,6 +2083,7 @@ static int journal_file_append_entry_internal(
JournalFile *f,
const dual_timestamp *ts,
const sd_id128_t *boot_id,
const sd_id128_t *machine_id,
uint64_t xor_hash,
const EntryItem items[],
size_t n_items,
@ -2126,9 +2120,9 @@ static int journal_file_append_entry_internal(
"timestamp %" PRIu64 ", refusing entry.",
ts->realtime, le64toh(f->header->tail_entry_realtime));
if (!sd_id128_is_null(f->header->boot_id) && boot_id) {
if (!sd_id128_is_null(f->header->tail_entry_boot_id) && boot_id) {
if (!sd_id128_equal(f->header->boot_id, *boot_id))
if (!sd_id128_equal(f->header->tail_entry_boot_id, *boot_id))
return log_debug_errno(SYNTHETIC_ERRNO(EREMOTE),
"Boot ID to write is different from previous boot id, refusing entry.");
@ -2155,6 +2149,10 @@ static int journal_file_append_entry_internal(
}
}
if (machine_id && sd_id128_is_null(f->header->machine_id))
/* Initialize machine ID when not set yet */
f->header->machine_id = *machine_id;
osize = offsetof(Object, entry.items) + (n_items * journal_file_entry_item_size(f));
r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
@ -2166,8 +2164,8 @@ static int journal_file_append_entry_internal(
o->entry.monotonic = htole64(ts->monotonic);
o->entry.xor_hash = htole64(xor_hash);
if (boot_id)
f->header->boot_id = *boot_id;
o->entry.boot_id = f->header->boot_id;
f->header->tail_entry_boot_id = *boot_id;
o->entry.boot_id = f->header->tail_entry_boot_id;
for (size_t i = 0; i < n_items; i++)
write_entry_item(f, o, i, &items[i]);
@ -2314,7 +2312,7 @@ int journal_file_append_entry(
EntryItem *items;
uint64_t xor_hash = 0;
struct dual_timestamp _ts;
sd_id128_t _boot_id;
sd_id128_t _boot_id, _machine_id, *machine_id;
int r;
assert(f);
@ -2344,6 +2342,16 @@ int journal_file_append_entry(
boot_id = &_boot_id;
}
r = sd_id128_get_machine(&_machine_id);
if (r < 0) {
if (!ERRNO_IS_MACHINE_ID_UNSET(r))
return r;
/* If the machine ID is not initialized yet, handle gracefully */
machine_id = NULL;
} else
machine_id = &_machine_id;
#if HAVE_GCRYPT
r = journal_file_maybe_append_tag(f, ts->realtime);
if (r < 0)
@ -2393,7 +2401,18 @@ int journal_file_append_entry(
typesafe_qsort(items, n_iovec, entry_item_cmp);
n_iovec = remove_duplicate_entry_items(items, n_iovec);
r = journal_file_append_entry_internal(f, ts, boot_id, xor_hash, items, n_iovec, seqnum, seqnum_id, ret_object, ret_offset);
r = journal_file_append_entry_internal(
f,
ts,
boot_id,
machine_id,
xor_hash,
items,
n_iovec,
seqnum,
seqnum_id,
ret_object,
ret_offset);
/* If the memory mapping triggered a SIGBUS then we return an
* IO error and ignore the error code passed down to us, since
@ -3567,7 +3586,7 @@ void journal_file_print_header(JournalFile *f) {
"Boot ID: %s\n"
"Sequential number ID: %s\n"
"State: %s\n"
"Compatible flags:%s%s\n"
"Compatible flags:%s%s%s\n"
"Incompatible flags:%s%s%s%s%s%s\n"
"Header size: %"PRIu64"\n"
"Arena size: %"PRIu64"\n"
@ -3584,12 +3603,13 @@ void journal_file_print_header(JournalFile *f) {
f->path,
SD_ID128_TO_STRING(f->header->file_id),
SD_ID128_TO_STRING(f->header->machine_id),
SD_ID128_TO_STRING(f->header->boot_id),
SD_ID128_TO_STRING(f->header->tail_entry_boot_id),
SD_ID128_TO_STRING(f->header->seqnum_id),
f->header->state == STATE_OFFLINE ? "OFFLINE" :
f->header->state == STATE_ONLINE ? "ONLINE" :
f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
JOURNAL_HEADER_TAIL_ENTRY_BOOT_ID(f->header) ? " TAIL_ENTRY_BOOT_ID" : "",
(le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_ANY) ? " ???" : "",
JOURNAL_HEADER_COMPRESSED_XZ(f->header) ? " COMPRESSED-XZ" : "",
JOURNAL_HEADER_COMPRESSED_LZ4(f->header) ? " COMPRESSED-LZ4" : "",
@ -4192,7 +4212,18 @@ int journal_file_copy_entry(
return r;
}
r = journal_file_append_entry_internal(to, &ts, boot_id, xor_hash, items, n, seqnum, seqnum_id, NULL, NULL);
r = journal_file_append_entry_internal(
to,
&ts,
boot_id,
&from->header->machine_id,
xor_hash,
items,
n,
seqnum,
seqnum_id,
/* ret_object= */ NULL,
/* ret_offset= */ NULL);
if (mmap_cache_fd_got_sigbus(to->cache_fd))
return -EIO;

View File

@ -180,6 +180,9 @@ static inline bool VALID_EPOCH(uint64_t u) {
#define JOURNAL_HEADER_SEALED(h) \
FLAGS_SET(le32toh((h)->compatible_flags), HEADER_COMPATIBLE_SEALED)
#define JOURNAL_HEADER_TAIL_ENTRY_BOOT_ID(h) \
FLAGS_SET(le32toh((h)->compatible_flags), HEADER_COMPATIBLE_TAIL_ENTRY_BOOT_ID)
#define JOURNAL_HEADER_COMPRESSED_XZ(h) \
FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_COMPRESSED_XZ)

View File

@ -1297,7 +1297,8 @@ int journal_file_verify(
}
if (entry_monotonic_set &&
(sd_id128_equal(entry_boot_id, f->header->boot_id) &&
(sd_id128_equal(entry_boot_id, f->header->tail_entry_boot_id) &&
JOURNAL_HEADER_TAIL_ENTRY_BOOT_ID(f->header) &&
entry_monotonic != le64toh(f->header->tail_entry_monotonic))) {
error(0,
"Invalid tail monotonic timestamp (%"PRIu64" != %"PRIu64")",