diff --git a/src/journal/journal-def.h b/src/journal/journal-def.h index 54260c97b0..431f46bb59 100644 --- a/src/journal/journal-def.h +++ b/src/journal/journal-def.h @@ -147,18 +147,22 @@ enum { enum { HEADER_INCOMPATIBLE_COMPRESSED_XZ = 1 << 0, HEADER_INCOMPATIBLE_COMPRESSED_LZ4 = 1 << 1, + HEADER_INCOMPATIBLE_KEYED_HASH = 1 << 2, }; -#define HEADER_INCOMPATIBLE_ANY (HEADER_INCOMPATIBLE_COMPRESSED_XZ|HEADER_INCOMPATIBLE_COMPRESSED_LZ4) +#define HEADER_INCOMPATIBLE_ANY \ + (HEADER_INCOMPATIBLE_COMPRESSED_XZ| \ + HEADER_INCOMPATIBLE_COMPRESSED_LZ4| \ + HEADER_INCOMPATIBLE_KEYED_HASH) #if HAVE_XZ && HAVE_LZ4 # define HEADER_INCOMPATIBLE_SUPPORTED HEADER_INCOMPATIBLE_ANY #elif HAVE_XZ -# define HEADER_INCOMPATIBLE_SUPPORTED HEADER_INCOMPATIBLE_COMPRESSED_XZ +# define HEADER_INCOMPATIBLE_SUPPORTED (HEADER_INCOMPATIBLE_COMPRESSED_XZ|HEADER_INCOMPATIBLE_KEYED_HASH) #elif HAVE_LZ4 -# define HEADER_INCOMPATIBLE_SUPPORTED HEADER_INCOMPATIBLE_COMPRESSED_LZ4 +# define HEADER_INCOMPATIBLE_SUPPORTED (HEADER_INCOMPATIBLE_COMPRESSED_LZ4|HEADER_INCOMPATIBLE_KEYED_HASH) #else -# define HEADER_INCOMPATIBLE_SUPPORTED 0 +# define HEADER_INCOMPATIBLE_SUPPORTED HEADER_INCOMPATIBLE_KEYED_HASH #endif enum { diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c index c77a9436e6..8ae966a6b2 100644 --- a/src/journal/journal-file.c +++ b/src/journal/journal-file.c @@ -16,6 +16,7 @@ #include "btrfs-util.h" #include "chattr-util.h" #include "compress.h" +#include "env-util.h" #include "fd-util.h" #include "format-util.h" #include "fs-util.h" @@ -419,7 +420,8 @@ static int journal_file_init_header(JournalFile *f, JournalFile *template) { h.incompatible_flags |= htole32( f->compress_xz * HEADER_INCOMPATIBLE_COMPRESSED_XZ | - f->compress_lz4 * HEADER_INCOMPATIBLE_COMPRESSED_LZ4); + f->compress_lz4 * HEADER_INCOMPATIBLE_COMPRESSED_LZ4 | + f->keyed_hash * HEADER_INCOMPATIBLE_KEYED_HASH); h.compatible_flags = htole32( f->seal * HEADER_COMPATIBLE_SEALED); @@ -486,16 +488,21 @@ static bool warn_wrong_flags(const JournalFile *f, bool compatible) { f->path, type, flags & ~any); flags = (flags & any) & ~supported; if (flags) { - const char* strv[3]; + const char* strv[4]; unsigned n = 0; _cleanup_free_ char *t = NULL; - if (compatible && (flags & HEADER_COMPATIBLE_SEALED)) - strv[n++] = "sealed"; - if (!compatible && (flags & HEADER_INCOMPATIBLE_COMPRESSED_XZ)) - strv[n++] = "xz-compressed"; - if (!compatible && (flags & HEADER_INCOMPATIBLE_COMPRESSED_LZ4)) - strv[n++] = "lz4-compressed"; + if (compatible) { + if (flags & HEADER_COMPATIBLE_SEALED) + strv[n++] = "sealed"; + } else { + if (flags & HEADER_INCOMPATIBLE_COMPRESSED_XZ) + strv[n++] = "xz-compressed"; + if (flags & HEADER_INCOMPATIBLE_COMPRESSED_LZ4) + strv[n++] = "lz4-compressed"; + if (flags & HEADER_INCOMPATIBLE_KEYED_HASH) + strv[n++] = "keyed-hash"; + } strv[n] = NULL; assert(n < ELEMENTSOF(strv)); @@ -595,6 +602,8 @@ static int journal_file_verify_header(JournalFile *f) { f->seal = JOURNAL_HEADER_SEALED(f->header); + f->keyed_hash = JOURNAL_HEADER_KEYED_HASH(f->header); + return 0; } @@ -1334,21 +1343,35 @@ int journal_file_find_field_object_with_hash( return 0; } +uint64_t journal_file_hash_data( + JournalFile *f, + const void *data, + size_t sz) { + + assert(f); + assert(data || sz == 0); + + /* We try to unify our codebase on siphash, hence new-styled journal files utilizing the keyed hash + * function use siphash. Old journal files use the Jenkins hash. */ + + if (JOURNAL_HEADER_KEYED_HASH(f->header)) + return siphash24(data, sz, f->header->file_id.bytes); + + return jenkins_hash64(data, sz); +} + int journal_file_find_field_object( JournalFile *f, const void *field, uint64_t size, Object **ret, uint64_t *ret_offset) { - uint64_t hash; - assert(f); assert(field && size > 0); - hash = jenkins_hash64(field, size); - return journal_file_find_field_object_with_hash( f, - field, size, hash, + field, size, + journal_file_hash_data(f, field, size), ret, ret_offset); } @@ -1446,16 +1469,13 @@ int journal_file_find_data_object( const void *data, uint64_t size, Object **ret, uint64_t *ret_offset) { - uint64_t hash; - assert(f); assert(data || size == 0); - hash = jenkins_hash64(data, size); - return journal_file_find_data_object_with_hash( f, - data, size, hash, + data, size, + journal_file_hash_data(f, data, size), ret, ret_offset); } @@ -1472,7 +1492,7 @@ static int journal_file_append_field( assert(f); assert(field && size > 0); - hash = jenkins_hash64(field, size); + hash = journal_file_hash_data(f, field, size); r = journal_file_find_field_object_with_hash(f, field, size, hash, &o, &p); if (r < 0) @@ -1535,7 +1555,7 @@ static int journal_file_append_data( assert(f); assert(data || size == 0); - hash = jenkins_hash64(data, size); + hash = journal_file_hash_data(f, data, size); r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p); if (r < 0) @@ -2028,7 +2048,20 @@ int journal_file_append_entry( if (r < 0) return r; - xor_hash ^= le64toh(o->data.hash); + /* When calculating the XOR hash field, we need to take special care if the "keyed-hash" + * journal file flag is on. We use the XOR hash field to quickly determine the identity of a + * specific record, and give records with otherwise identical position (i.e. match in seqno, + * timestamp, …) a stable ordering. But for that we can't have it that the hash of the + * objects in each file is different since they are keyed. Hence let's calculate the Jenkins + * hash here for that. This also has the benefit that cursors for old and new journal files + * are completely identical (they include the XOR hash after all). For classic Jenkins-hash + * files things are easier, we can just take the value from the stored record directly. */ + + if (JOURNAL_HEADER_KEYED_HASH(f->header)) + xor_hash ^= jenkins_hash64(iovec[i].iov_base, iovec[i].iov_len); + else + xor_hash ^= le64toh(o->data.hash); + items[i].object_offset = htole64(p); items[i].hash = o->data.hash; } @@ -3149,7 +3182,7 @@ void journal_file_print_header(JournalFile *f) { "Sequential number ID: %s\n" "State: %s\n" "Compatible flags:%s%s\n" - "Incompatible flags:%s%s%s\n" + "Incompatible flags:%s%s%s%s\n" "Header size: %"PRIu64"\n" "Arena size: %"PRIu64"\n" "Data hash table size: %"PRIu64"\n" @@ -3174,6 +3207,7 @@ void journal_file_print_header(JournalFile *f) { (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_ANY) ? " ???" : "", JOURNAL_HEADER_COMPRESSED_XZ(f->header) ? " COMPRESSED-XZ" : "", JOURNAL_HEADER_COMPRESSED_LZ4(f->header) ? " COMPRESSED-LZ4" : "", + JOURNAL_HEADER_KEYED_HASH(f->header) ? " KEYED-HASH" : "", (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_ANY) ? " ???" : "", le64toh(f->header->header_size), le64toh(f->header->arena_size), @@ -3299,19 +3333,31 @@ int journal_file_open( #endif }; + /* We turn on keyed hashes by default, but provide an environment variable to turn them off, if + * people really want that */ + r = getenv_bool("SYSTEMD_JOURNAL_KEYED_HASH"); + if (r < 0) { + if (r != -ENXIO) + log_debug_errno(r, "Failed to parse $SYSTEMD_JOURNAL_KEYED_HASH environment variable, ignoring."); + f->keyed_hash = true; + } else + f->keyed_hash = r; + if (DEBUG_LOGGING) { - static int last_seal = -1, last_compress = -1; + static int last_seal = -1, last_compress = -1, last_keyed_hash = -1; static uint64_t last_bytes = UINT64_MAX; char bytes[FORMAT_BYTES_MAX]; if (last_seal != f->seal || + last_keyed_hash != f->keyed_hash || last_compress != JOURNAL_FILE_COMPRESS(f) || last_bytes != f->compress_threshold_bytes) { - log_debug("Journal effective settings seal=%s compress=%s compress_threshold_bytes=%s", - yes_no(f->seal), yes_no(JOURNAL_FILE_COMPRESS(f)), + log_debug("Journal effective settings seal=%s keyed_hash=%s compress=%s compress_threshold_bytes=%s", + yes_no(f->seal), yes_no(f->keyed_hash), yes_no(JOURNAL_FILE_COMPRESS(f)), format_bytes(bytes, sizeof bytes, f->compress_threshold_bytes)); last_seal = f->seal; + last_keyed_hash = f->keyed_hash; last_compress = JOURNAL_FILE_COMPRESS(f); last_bytes = f->compress_threshold_bytes; } @@ -3769,7 +3815,11 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6 if (r < 0) return r; - xor_hash ^= le64toh(u->data.hash); + if (JOURNAL_HEADER_KEYED_HASH(to->header)) + xor_hash ^= jenkins_hash64(data, l); + else + xor_hash ^= le64toh(u->data.hash); + items[i].object_offset = htole64(h); items[i].hash = u->data.hash; diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h index 121e9153a6..732c2f31cd 100644 --- a/src/journal/journal-file.h +++ b/src/journal/journal-file.h @@ -71,6 +71,7 @@ typedef struct JournalFile { bool defrag_on_close:1; bool close_fd:1; bool archive:1; + bool keyed_hash:1; direction_t last_direction; LocationType location_type; @@ -195,6 +196,9 @@ static inline bool VALID_EPOCH(uint64_t u) { #define JOURNAL_HEADER_COMPRESSED_LZ4(h) \ FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_COMPRESSED_LZ4) +#define JOURNAL_HEADER_KEYED_HASH(h) \ + FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_KEYED_HASH) + int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset, Object **ret); uint64_t journal_file_entry_n_items(Object *o) _pure_; @@ -262,3 +266,5 @@ static inline bool JOURNAL_FILE_COMPRESS(JournalFile *f) { assert(f); return f->compress_xz || f->compress_lz4; } + +uint64_t journal_file_hash_data(JournalFile *f, const void *data, size_t sz); diff --git a/src/journal/journal-internal.h b/src/journal/journal-internal.h index 028f0d9055..a649acf634 100644 --- a/src/journal/journal-internal.h +++ b/src/journal/journal-internal.h @@ -32,7 +32,7 @@ struct Match { /* For concrete matches */ char *data; size_t size; - uint64_t hash; + uint64_t hash; /* old-style jenkins hash. New-style siphash is different per file, hence won't be cached here */ /* For terms */ LIST_HEAD(Match, matches); diff --git a/src/journal/journal-verify.c b/src/journal/journal-verify.c index c70ab7aa24..fe9997bc14 100644 --- a/src/journal/journal-verify.c +++ b/src/journal/journal-verify.c @@ -163,9 +163,9 @@ static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o return r; } - h2 = jenkins_hash64(b, b_size); + h2 = journal_file_hash_data(f, b, b_size); } else - h2 = jenkins_hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload)); + h2 = journal_file_hash_data(f, o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload)); if (h1 != h2) { error(offset, "Invalid hash (%08"PRIx64" vs. %08"PRIx64, h1, h2); diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c index 5ddca5f93a..515bb82621 100644 --- a/src/journal/sd-journal.c +++ b/src/journal/sd-journal.c @@ -279,6 +279,8 @@ _public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size) assert(j->level1->type == MATCH_OR_TERM); assert(j->level2->type == MATCH_AND_TERM); + /* Old-style Jenkins (unkeyed) hashing only here. We do not cover new-style siphash (keyed) hashing + * here, since it's different for each file, and thus can't be pre-calculated in the Match object. */ hash = jenkins_hash64(data, size); LIST_FOREACH(matches, l3, j->level2->matches) { @@ -501,9 +503,16 @@ static int next_for_match( assert(f); if (m->type == MATCH_DISCRETE) { - uint64_t dp; + uint64_t dp, hash; - r = journal_file_find_data_object_with_hash(f, m->data, m->size, m->hash, NULL, &dp); + /* If the keyed hash logic is used, we need to calculate the hash fresh per file. Otherwise + * we can use what we pre-calculated. */ + if (JOURNAL_HEADER_KEYED_HASH(f->header)) + hash = journal_file_hash_data(f, m->data, m->size); + else + hash = m->hash; + + r = journal_file_find_data_object_with_hash(f, m->data, m->size, hash, NULL, &dp); if (r <= 0) return r; @@ -590,9 +599,14 @@ static int find_location_for_match( assert(f); if (m->type == MATCH_DISCRETE) { - uint64_t dp; + uint64_t dp, hash; - r = journal_file_find_data_object_with_hash(f, m->data, m->size, m->hash, NULL, &dp); + if (JOURNAL_HEADER_KEYED_HASH(f->header)) + hash = journal_file_hash_data(f, m->data, m->size); + else + hash = m->hash; + + r = journal_file_find_data_object_with_hash(f, m->data, m->size, hash, NULL, &dp); if (r <= 0) return r; diff --git a/src/journal/test-journal-stream.c b/src/journal/test-journal-stream.c index 6d97bc5ce8..50aab11c6a 100644 --- a/src/journal/test-journal-stream.c +++ b/src/journal/test-journal-stream.c @@ -58,7 +58,7 @@ static void verify_contents(sd_journal *j, unsigned skip) { assert_se(i == N_ENTRIES); } -int main(int argc, char *argv[]) { +static void run_test(void) { JournalFile *one, *two, *three; char t[] = "/var/tmp/journal-stream-XXXXXX"; unsigned i; @@ -68,12 +68,6 @@ int main(int argc, char *argv[]) { size_t l; dual_timestamp previous_ts = DUAL_TIMESTAMP_NULL; - /* journal_file_open requires a valid machine id */ - if (access("/etc/machine-id", F_OK) != 0) - return log_tests_skipped("/etc/machine-id not found"); - - test_setup_logging(LOG_DEBUG); - assert_se(mkdtemp(t)); assert_se(chdir(t) >= 0); (void) chattr_path(t, FS_NOCOW_FL, FS_NOCOW_FL, NULL); @@ -177,6 +171,22 @@ int main(int argc, char *argv[]) { printf("%.*s\n", (int) l, (const char*) data); assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0); +} + +int main(int argc, char *argv[]) { + + /* journal_file_open requires a valid machine id */ + if (access("/etc/machine-id", F_OK) != 0) + return log_tests_skipped("/etc/machine-id not found"); + + test_setup_logging(LOG_DEBUG); + + /* Run this test twice. Once with old hashing and once with new hashing */ + assert_se(setenv("SYSTEMD_JOURNAL_KEYED_HASH", "1", 1) >= 0); + run_test(); + + assert_se(setenv("SYSTEMD_JOURNAL_KEYED_HASH", "0", 1) >= 0); + run_test(); return 0; }