7 files changed, 128 insertions, 44 deletions
diff --git a/src/journal/journal-def.h b/src/journal/journal-def.h
index 54260c97b0..431f46bb59 100644
--- a/src/journal/journal-def.h
+++ b/src/journal/journal-def.h
@@ -147,18 +147,22 @@ enum {
 enum {
         HEADER_INCOMPATIBLE_COMPRESSED_XZ   = 1 << 0,
         HEADER_INCOMPATIBLE_COMPRESSED_LZ4  = 1 << 1,
+        HEADER_INCOMPATIBLE_KEYED_HASH      = 1 << 2,
 };
 
-#define HEADER_INCOMPATIBLE_ANY (HEADER_INCOMPATIBLE_COMPRESSED_XZ|HEADER_INCOMPATIBLE_COMPRESSED_LZ4)
+#define HEADER_INCOMPATIBLE_ANY              \
+        (HEADER_INCOMPATIBLE_COMPRESSED_XZ|  \
+         HEADER_INCOMPATIBLE_COMPRESSED_LZ4| \
+         HEADER_INCOMPATIBLE_KEYED_HASH)
 
 #if HAVE_XZ && HAVE_LZ4
 #  define HEADER_INCOMPATIBLE_SUPPORTED HEADER_INCOMPATIBLE_ANY
 #elif HAVE_XZ
-#  define HEADER_INCOMPATIBLE_SUPPORTED HEADER_INCOMPATIBLE_COMPRESSED_XZ
+#  define HEADER_INCOMPATIBLE_SUPPORTED (HEADER_INCOMPATIBLE_COMPRESSED_XZ|HEADER_INCOMPATIBLE_KEYED_HASH)
 #elif HAVE_LZ4
-#  define HEADER_INCOMPATIBLE_SUPPORTED HEADER_INCOMPATIBLE_COMPRESSED_LZ4
+#  define HEADER_INCOMPATIBLE_SUPPORTED (HEADER_INCOMPATIBLE_COMPRESSED_LZ4|HEADER_INCOMPATIBLE_KEYED_HASH)
 #else
-#  define HEADER_INCOMPATIBLE_SUPPORTED 0
+#  define HEADER_INCOMPATIBLE_SUPPORTED HEADER_INCOMPATIBLE_KEYED_HASH
 #endif
 
 enum {
diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c
index c77a9436e6..8ae966a6b2 100644
--- a/src/journal/journal-file.c
+++ b/src/journal/journal-file.c
@@ -16,6 +16,7 @@
 #include "btrfs-util.h"
 #include "chattr-util.h"
 #include "compress.h"
+#include "env-util.h"
 #include "fd-util.h"
 #include "format-util.h"
 #include "fs-util.h"
@@ -419,7 +420,8 @@ static int journal_file_init_header(JournalFile *f, JournalFile *template) {
 
         h.incompatible_flags |= htole32(
                 f->compress_xz * HEADER_INCOMPATIBLE_COMPRESSED_XZ |
-                f->compress_lz4 * HEADER_INCOMPATIBLE_COMPRESSED_LZ4);
+                f->compress_lz4 * HEADER_INCOMPATIBLE_COMPRESSED_LZ4 |
+                f->keyed_hash * HEADER_INCOMPATIBLE_KEYED_HASH);
 
         h.compatible_flags = htole32(
                 f->seal * HEADER_COMPATIBLE_SEALED);
@@ -486,16 +488,21 @@ static bool warn_wrong_flags(const JournalFile *f, bool compatible) {
                                   f->path, type, flags & ~any);
                 flags = (flags & any) & ~supported;
                 if (flags) {
-                        const char* strv[3];
+                        const char* strv[4];
                         unsigned n = 0;
                         _cleanup_free_ char *t = NULL;
 
-                        if (compatible && (flags & HEADER_COMPATIBLE_SEALED))
-                                strv[n++] = "sealed";
-                        if (!compatible && (flags & HEADER_INCOMPATIBLE_COMPRESSED_XZ))
-                                strv[n++] = "xz-compressed";
-                        if (!compatible && (flags & HEADER_INCOMPATIBLE_COMPRESSED_LZ4))
-                                strv[n++] = "lz4-compressed";
+                        if (compatible) {
+                                if (flags & HEADER_COMPATIBLE_SEALED)
+                                        strv[n++] = "sealed";
+                        } else {
+                                if (flags & HEADER_INCOMPATIBLE_COMPRESSED_XZ)
+                                        strv[n++] = "xz-compressed";
+                                if (flags & HEADER_INCOMPATIBLE_COMPRESSED_LZ4)
+                                        strv[n++] = "lz4-compressed";
+                                if (flags & HEADER_INCOMPATIBLE_KEYED_HASH)
+                                        strv[n++] = "keyed-hash";
+                        }
                         strv[n] = NULL;
                         assert(n < ELEMENTSOF(strv));
 
@@ -595,6 +602,8 @@ static int journal_file_verify_header(JournalFile *f) {
 
         f->seal = JOURNAL_HEADER_SEALED(f->header);
 
+        f->keyed_hash = JOURNAL_HEADER_KEYED_HASH(f->header);
+
         return 0;
 }
 
@@ -1334,21 +1343,35 @@ int journal_file_find_field_object_with_hash(
         return 0;
 }
 
+uint64_t journal_file_hash_data(
+                JournalFile *f,
+                const void *data,
+                size_t sz) {
+
+        assert(f);
+        assert(data || sz == 0);
+
+        /* We try to unify our codebase on siphash, hence new-styled journal files utilizing the keyed hash
+         * function use siphash. Old journal files use the Jenkins hash. */
+
+        if (JOURNAL_HEADER_KEYED_HASH(f->header))
+                return siphash24(data, sz, f->header->file_id.bytes);
+
+        return jenkins_hash64(data, sz);
+}
+
 int journal_file_find_field_object(
                 JournalFile *f,
                 const void *field, uint64_t size,
                 Object **ret, uint64_t *ret_offset) {
 
-        uint64_t hash;
-
         assert(f);
         assert(field && size > 0);
 
-        hash = jenkins_hash64(field, size);
-
         return journal_file_find_field_object_with_hash(
                         f,
-                        field, size, hash,
+                        field, size,
+                        journal_file_hash_data(f, field, size),
                         ret, ret_offset);
 }
 
@@ -1446,16 +1469,13 @@ int journal_file_find_data_object(
                 const void *data, uint64_t size,
                 Object **ret, uint64_t *ret_offset) {
 
-        uint64_t hash;
-
         assert(f);
         assert(data || size == 0);
 
-        hash = jenkins_hash64(data, size);
-
         return journal_file_find_data_object_with_hash(
                         f,
-                        data, size, hash,
+                        data, size,
+                        journal_file_hash_data(f, data, size),
                         ret, ret_offset);
 }
 
@@ -1472,7 +1492,7 @@ static int journal_file_append_field(
         assert(f);
         assert(field && size > 0);
 
-        hash = jenkins_hash64(field, size);
+        hash = journal_file_hash_data(f, field, size);
 
         r = journal_file_find_field_object_with_hash(f, field, size, hash, &o, &p);
         if (r < 0)
@@ -1535,7 +1555,7 @@ static int journal_file_append_data(
         assert(f);
         assert(data || size == 0);
 
-        hash = jenkins_hash64(data, size);
+        hash = journal_file_hash_data(f, data, size);
 
         r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
         if (r < 0)
@@ -2028,7 +2048,20 @@ int journal_file_append_entry(
                 if (r < 0)
                         return r;
 
-                xor_hash ^= le64toh(o->data.hash);
+                /* When calculating the XOR hash field, we need to take special care if the "keyed-hash"
+                 * journal file flag is on. We use the XOR hash field to quickly determine the identity of a
+                 * specific record, and give records with otherwise identical position (i.e. match in seqno,
+                 * timestamp, …) a stable ordering. But for that we can't have it that the hash of the
+                 * objects in each file is different since they are keyed. Hence let's calculate the Jenkins
+                 * hash here for that. This also has the benefit that cursors for old and new journal files
+                 * are completely identical (they include the XOR hash after all). For classic Jenkins-hash
+                 * files things are easier, we can just take the value from the stored record directly. */
+
+                if (JOURNAL_HEADER_KEYED_HASH(f->header))
+                        xor_hash ^= jenkins_hash64(iovec[i].iov_base, iovec[i].iov_len);
+                else
+                        xor_hash ^= le64toh(o->data.hash);
+
                 items[i].object_offset = htole64(p);
                 items[i].hash = o->data.hash;
         }
@@ -3149,7 +3182,7 @@ void journal_file_print_header(JournalFile *f) {
                "Sequential number ID: %s\n"
                "State: %s\n"
                "Compatible flags:%s%s\n"
-               "Incompatible flags:%s%s%s\n"
+               "Incompatible flags:%s%s%s%s\n"
                "Header size: %"PRIu64"\n"
                "Arena size: %"PRIu64"\n"
                "Data hash table size: %"PRIu64"\n"
@@ -3174,6 +3207,7 @@ void journal_file_print_header(JournalFile *f) {
                (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_ANY) ? " ???" : "",
                JOURNAL_HEADER_COMPRESSED_XZ(f->header) ? " COMPRESSED-XZ" : "",
                JOURNAL_HEADER_COMPRESSED_LZ4(f->header) ? " COMPRESSED-LZ4" : "",
+               JOURNAL_HEADER_KEYED_HASH(f->header) ? " KEYED-HASH" : "",
                (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_ANY) ? " ???" : "",
                le64toh(f->header->header_size),
                le64toh(f->header->arena_size),
@@ -3299,19 +3333,31 @@ int journal_file_open(
 #endif
         };
 
+        /* We turn on keyed hashes by default, but provide an environment variable to turn them off, if
+         * people really want that */
+        r = getenv_bool("SYSTEMD_JOURNAL_KEYED_HASH");
+        if (r < 0) {
+                if (r != -ENXIO)
+                        log_debug_errno(r, "Failed to parse $SYSTEMD_JOURNAL_KEYED_HASH environment variable, ignoring.");
+                f->keyed_hash = true;
+        } else
+                f->keyed_hash = r;
+
         if (DEBUG_LOGGING) {
-                static int last_seal = -1, last_compress = -1;
+                static int last_seal = -1, last_compress = -1, last_keyed_hash = -1;
                 static uint64_t last_bytes = UINT64_MAX;
                 char bytes[FORMAT_BYTES_MAX];
 
                 if (last_seal != f->seal ||
+                    last_keyed_hash != f->keyed_hash ||
                     last_compress != JOURNAL_FILE_COMPRESS(f) ||
                     last_bytes != f->compress_threshold_bytes) {
 
-                        log_debug("Journal effective settings seal=%s compress=%s compress_threshold_bytes=%s",
-                                  yes_no(f->seal), yes_no(JOURNAL_FILE_COMPRESS(f)),
+                        log_debug("Journal effective settings seal=%s keyed_hash=%s compress=%s compress_threshold_bytes=%s",
+                                  yes_no(f->seal), yes_no(f->keyed_hash), yes_no(JOURNAL_FILE_COMPRESS(f)),
                                   format_bytes(bytes, sizeof bytes, f->compress_threshold_bytes));
                         last_seal = f->seal;
+                        last_keyed_hash = f->keyed_hash;
                         last_compress = JOURNAL_FILE_COMPRESS(f);
                         last_bytes = f->compress_threshold_bytes;
                 }
@@ -3769,7 +3815,11 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6
                 if (r < 0)
                         return r;
 
-                xor_hash ^= le64toh(u->data.hash);
+                if (JOURNAL_HEADER_KEYED_HASH(to->header))
+                        xor_hash ^= jenkins_hash64(data, l);
+                else
+                        xor_hash ^= le64toh(u->data.hash);
+
                 items[i].object_offset = htole64(h);
                 items[i].hash = u->data.hash;
 
diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h
index 121e9153a6..732c2f31cd 100644
--- a/src/journal/journal-file.h
+++ b/src/journal/journal-file.h
@@ -71,6 +71,7 @@ typedef struct JournalFile {
         bool defrag_on_close:1;
         bool close_fd:1;
         bool archive:1;
+        bool keyed_hash:1;
 
         direction_t last_direction;
         LocationType location_type;
@@ -195,6 +196,9 @@ static inline bool VALID_EPOCH(uint64_t u) {
 #define JOURNAL_HEADER_COMPRESSED_LZ4(h) \
         FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_COMPRESSED_LZ4)
 
+#define JOURNAL_HEADER_KEYED_HASH(h) \
+        FLAGS_SET(le32toh((h)->incompatible_flags), HEADER_INCOMPATIBLE_KEYED_HASH)
+
 int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset, Object **ret);
 
 uint64_t journal_file_entry_n_items(Object *o) _pure_;
@@ -262,3 +266,5 @@ static inline bool JOURNAL_FILE_COMPRESS(JournalFile *f) {
         assert(f);
         return f->compress_xz || f->compress_lz4;
 }
+
+uint64_t journal_file_hash_data(JournalFile *f, const void *data, size_t sz);
diff --git a/src/journal/journal-internal.h b/src/journal/journal-internal.h
index 028f0d9055..a649acf634 100644
--- a/src/journal/journal-internal.h
+++ b/src/journal/journal-internal.h
@@ -32,7 +32,7 @@ struct Match {
         /* For concrete matches */
         char *data;
         size_t size;
-        uint64_t hash;
+        uint64_t hash; /* old-style jenkins hash. New-style siphash is different per file, hence won't be cached here */
 
         /* For terms */
         LIST_HEAD(Match, matches);
diff --git a/src/journal/journal-verify.c b/src/journal/journal-verify.c
index c70ab7aa24..fe9997bc14 100644
--- a/src/journal/journal-verify.c
+++ b/src/journal/journal-verify.c
@@ -163,9 +163,9 @@ static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o
                                 return r;
                         }
 
-                        h2 = jenkins_hash64(b, b_size);
+                        h2 = journal_file_hash_data(f, b, b_size);
                 } else
-                        h2 = jenkins_hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
+                        h2 = journal_file_hash_data(f, o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
 
                 if (h1 != h2) {
                         error(offset, "Invalid hash (%08"PRIx64" vs. %08"PRIx64, h1, h2);
diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c
index 5ddca5f93a..515bb82621 100644
--- a/src/journal/sd-journal.c
+++ b/src/journal/sd-journal.c
@@ -279,6 +279,8 @@ _public_ int sd_journal_add_match(sd_journal *j, const void *data, size_t size)
         assert(j->level1->type == MATCH_OR_TERM);
         assert(j->level2->type == MATCH_AND_TERM);
 
+        /* Old-style Jenkins (unkeyed) hashing only here. We do not cover new-style siphash (keyed) hashing
+         * here, since it's different for each file, and thus can't be pre-calculated in the Match object. */
         hash = jenkins_hash64(data, size);
 
         LIST_FOREACH(matches, l3, j->level2->matches) {
@@ -501,9 +503,16 @@ static int next_for_match(
         assert(f);
 
         if (m->type == MATCH_DISCRETE) {
-                uint64_t dp;
+                uint64_t dp, hash;
 
-                r = journal_file_find_data_object_with_hash(f, m->data, m->size, m->hash, NULL, &dp);
+                /* If the keyed hash logic is used, we need to calculate the hash fresh per file. Otherwise
+                 * we can use what we pre-calculated. */
+                if (JOURNAL_HEADER_KEYED_HASH(f->header))
+                        hash = journal_file_hash_data(f, m->data, m->size);
+                else
+                        hash = m->hash;
+
+                r = journal_file_find_data_object_with_hash(f, m->data, m->size, hash, NULL, &dp);
                 if (r <= 0)
                         return r;
 
@@ -590,9 +599,14 @@ static int find_location_for_match(
         assert(f);
 
         if (m->type == MATCH_DISCRETE) {
-                uint64_t dp;
+                uint64_t dp, hash;
+
+                if (JOURNAL_HEADER_KEYED_HASH(f->header))
+                        hash = journal_file_hash_data(f, m->data, m->size);
+                else
+                        hash = m->hash;
 
-                r = journal_file_find_data_object_with_hash(f, m->data, m->size, m->hash, NULL, &dp);
+                r = journal_file_find_data_object_with_hash(f, m->data, m->size, hash, NULL, &dp);
                 if (r <= 0)
                         return r;
 
diff --git a/src/journal/test-journal-stream.c b/src/journal/test-journal-stream.c
index 6d97bc5ce8..50aab11c6a 100644
--- a/src/journal/test-journal-stream.c
+++ b/src/journal/test-journal-stream.c
@@ -58,7 +58,7 @@ static void verify_contents(sd_journal *j, unsigned skip) {
                 assert_se(i == N_ENTRIES);
 }
 
-int main(int argc, char *argv[]) {
+static void run_test(void) {
         JournalFile *one, *two, *three;
         char t[] = "/var/tmp/journal-stream-XXXXXX";
         unsigned i;
@@ -68,12 +68,6 @@ int main(int argc, char *argv[]) {
         size_t l;
         dual_timestamp previous_ts = DUAL_TIMESTAMP_NULL;
 
-        /* journal_file_open requires a valid machine id */
-        if (access("/etc/machine-id", F_OK) != 0)
-                return log_tests_skipped("/etc/machine-id not found");
-
-        test_setup_logging(LOG_DEBUG);
-
         assert_se(mkdtemp(t));
         assert_se(chdir(t) >= 0);
         (void) chattr_path(t, FS_NOCOW_FL, FS_NOCOW_FL, NULL);
@@ -177,6 +171,22 @@ int main(int argc, char *argv[]) {
                 printf("%.*s\n", (int) l, (const char*) data);
 
         assert_se(rm_rf(t, REMOVE_ROOT|REMOVE_PHYSICAL) >= 0);
+}
+
+int main(int argc, char *argv[]) {
+
+        /* journal_file_open requires a valid machine id */
+        if (access("/etc/machine-id", F_OK) != 0)
+                return log_tests_skipped("/etc/machine-id not found");
+
+        test_setup_logging(LOG_DEBUG);
+
+        /* Run this test twice. Once with old hashing and once with new hashing */
+        assert_se(setenv("SYSTEMD_JOURNAL_KEYED_HASH", "1", 1) >= 0);
+        run_test();
+
+        assert_se(setenv("SYSTEMD_JOURNAL_KEYED_HASH", "0", 1) >= 0);
+        run_test();
 
         return 0;
 }