summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaan De Meyer <daan.j.demeyer@gmail.com>2021-11-02 20:50:39 +0000
committerDaan De Meyer <daan.j.demeyer@gmail.com>2022-10-07 12:20:57 +0200
commita9089a6604066a8fa8138af2a6388be48f2a80ef (patch)
treed0931d8e65d340d083d1a0df8c546697a767b6e1
parent99daf3ce03f4091c74400f895f9c82a1c046e645 (diff)
downloadsystemd-a9089a6604066a8fa8138af2a6388be48f2a80ef.tar.gz
journal: Use 32-bit entry item object offsets in compact mode
To do this, we move EntryItem out of journal-def.h and turn it into a host only struct in native endian mode so we can still use it to ship the necessary info around. Aside from that, the changes are pretty simple, we introduce some extra functions to access the right field depending on the mode and convert all the other code to use those functions instead of accessing the raw fields. We also drop the unused entry item hash field in compact mode. We already stopped doing anything with this field a while ago, now we actually drop it from the format in compact mode.
-rw-r--r--docs/JOURNAL_FILE_FORMAT.md19
-rw-r--r--src/libsystemd/sd-journal/journal-def.h32
-rw-r--r--src/libsystemd/sd-journal/journal-file.c70
-rw-r--r--src/libsystemd/sd-journal/journal-file.h20
-rw-r--r--src/libsystemd/sd-journal/journal-verify.c20
-rw-r--r--src/libsystemd/sd-journal/sd-journal.c8
6 files changed, 107 insertions, 62 deletions
diff --git a/docs/JOURNAL_FILE_FORMAT.md b/docs/JOURNAL_FILE_FORMAT.md
index c4484693af..5f7f97c1b8 100644
--- a/docs/JOURNAL_FILE_FORMAT.md
+++ b/docs/JOURNAL_FILE_FORMAT.md
@@ -461,11 +461,6 @@ field name. It is the head of a singly linked list using DATA's
## Entry Objects
```
-_packed_ struct EntryItem {
- le64_t object_offset;
- le64_t hash;
-};
-
_packed_ struct EntryObject {
ObjectHeader object;
le64_t seqnum;
@@ -473,7 +468,15 @@ _packed_ struct EntryObject {
le64_t monotonic;
sd_id128_t boot_id;
le64_t xor_hash;
- EntryItem items[];
+ union { \
+ struct { \
+ le64_t object_offset; \
+ le64_t hash; \
+ } regular[]; \
+ struct { \
+ le32_t object_offset; \
+ } compact[]; \
+ } items; \
};
```
@@ -499,6 +502,10 @@ The **items[]** array contains references to all DATA objects of this entry,
plus their respective hashes (which are calculated the same way as in the DATA
objects, i.e. keyed by the file ID).
+If the `HEADER_INCOMPATIBLE_COMPACT` flag is set, DATA object offsets are stored
+as 32-bit integers instead of 64bit and the unused hash field per data object is
+not stored anymore.
+
In the file ENTRY objects are written ordered monotonically by sequence
number. For continuous parts of the file written during the same boot
(i.e. with the same boot_id) the monotonic timestamp is monotonic too. Modulo
diff --git a/src/libsystemd/sd-journal/journal-def.h b/src/libsystemd/sd-journal/journal-def.h
index c35e438518..f04a2298c4 100644
--- a/src/libsystemd/sd-journal/journal-def.h
+++ b/src/libsystemd/sd-journal/journal-def.h
@@ -24,7 +24,6 @@ typedef struct HashTableObject HashTableObject;
typedef struct EntryArrayObject EntryArrayObject;
typedef struct TagObject TagObject;
-typedef struct EntryItem EntryItem;
typedef struct HashItem HashItem;
typedef struct FSSHeader FSSHeader;
@@ -85,20 +84,23 @@ struct FieldObject FieldObject__contents;
struct FieldObject__packed FieldObject__contents _packed_;
assert_cc(sizeof(struct FieldObject) == sizeof(struct FieldObject__packed));
-struct EntryItem {
- le64_t object_offset;
- le64_t hash;
-} _packed_;
-
-#define EntryObject__contents { \
- ObjectHeader object; \
- le64_t seqnum; \
- le64_t realtime; \
- le64_t monotonic; \
- sd_id128_t boot_id; \
- le64_t xor_hash; \
- EntryItem items[]; \
- }
+#define EntryObject__contents { \
+ ObjectHeader object; \
+ le64_t seqnum; \
+ le64_t realtime; \
+ le64_t monotonic; \
+ sd_id128_t boot_id; \
+ le64_t xor_hash; \
+ union { \
+ struct { \
+ le64_t object_offset; \
+ le64_t hash; \
+ } regular[0]; \
+ struct { \
+ le32_t object_offset; \
+ } compact[0]; \
+ } items; \
+}
struct EntryObject EntryObject__contents;
struct EntryObject__packed EntryObject__contents _packed_;
diff --git a/src/libsystemd/sd-journal/journal-file.c b/src/libsystemd/sd-journal/journal-file.c
index d9aa9a3806..33a285faa4 100644
--- a/src/libsystemd/sd-journal/journal-file.c
+++ b/src/libsystemd/sd-journal/journal-file.c
@@ -771,17 +771,17 @@ static int check_object(JournalFile *f, Object *o, uint64_t offset) {
sz = le64toh(READ_NOW(o->object.size));
if (sz < offsetof(Object, entry.items) ||
- (sz - offsetof(Object, entry.items)) % sizeof(EntryItem) != 0)
+ (sz - offsetof(Object, entry.items)) % journal_file_entry_item_size(f) != 0)
return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
"Bad entry size (<= %zu): %" PRIu64 ": %" PRIu64,
offsetof(Object, entry.items),
sz,
offset);
- if ((sz - offsetof(Object, entry.items)) / sizeof(EntryItem) <= 0)
+ if ((sz - offsetof(Object, entry.items)) / journal_file_entry_item_size(f) <= 0)
return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
"Invalid number items in entry: %" PRIu64 ": %" PRIu64,
- (sz - offsetof(Object, entry.items)) / sizeof(EntryItem),
+ (sz - offsetof(Object, entry.items)) / journal_file_entry_item_size(f),
offset);
if (le64toh(o->entry.seqnum) <= 0)
@@ -1658,8 +1658,10 @@ static int journal_file_append_data(
return 0;
}
-uint64_t journal_file_entry_n_items(Object *o) {
+uint64_t journal_file_entry_n_items(JournalFile *f, Object *o) {
uint64_t sz;
+
+ assert(f);
assert(o);
if (o->object.type != OBJECT_ENTRY)
@@ -1669,7 +1671,7 @@ uint64_t journal_file_entry_n_items(Object *o) {
if (sz < offsetof(Object, entry.items))
return 0;
- return (sz - offsetof(Object, entry.items)) / sizeof(EntryItem);
+ return (sz - offsetof(Object, entry.items)) / journal_file_entry_item_size(f);
}
uint64_t journal_file_entry_array_n_items(JournalFile *f, Object *o) {
@@ -1820,15 +1822,13 @@ static int link_entry_into_array_plus_one(JournalFile *f,
return 0;
}
-static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
- uint64_t p;
+static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t p) {
int r;
assert(f);
assert(o);
assert(offset > 0);
- p = le64toh(o->entry.items[i].object_offset);
r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
if (r < 0)
return r;
@@ -1840,8 +1840,13 @@ static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offs
offset);
}
-static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
- uint64_t n;
+static int journal_file_link_entry(
+ JournalFile *f,
+ Object *o,
+ uint64_t offset,
+ const EntryItem items[],
+ size_t n_items) {
+
int r;
assert(f);
@@ -1871,15 +1876,14 @@ static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
f->header->tail_entry_monotonic = o->entry.monotonic;
/* Link up the items */
- n = journal_file_entry_n_items(o);
- for (uint64_t i = 0; i < n; i++) {
+ for (uint64_t i = 0; i < n_items; i++) {
int k;
/* If we fail to link an entry item because we can't allocate a new entry array, don't fail
* immediately but try to link the other entry items since it might still be possible to link
* those if they don't require a new entry array to be allocated. */
- k = journal_file_link_entry_item(f, o, offset, i);
+ k = journal_file_link_entry_item(f, o, offset, items[i].object_offset);
if (k == -E2BIG)
r = k;
else if (k < 0)
@@ -1889,12 +1893,26 @@ static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
return r;
}
+static void write_entry_item(JournalFile *f, Object *o, uint64_t i, const EntryItem *item) {
+ assert(f);
+ assert(o);
+ assert(item);
+
+ if (JOURNAL_HEADER_COMPACT(f->header)) {
+ assert(item->object_offset <= UINT32_MAX);
+ o->entry.items.compact[i].object_offset = htole32(item->object_offset);
+ } else {
+ o->entry.items.regular[i].object_offset = htole64(item->object_offset);
+ o->entry.items.regular[i].hash = htole64(item->hash);
+ }
+}
+
static int journal_file_append_entry_internal(
JournalFile *f,
const dual_timestamp *ts,
const sd_id128_t *boot_id,
uint64_t xor_hash,
- const EntryItem items[], unsigned n_items,
+ const EntryItem items[], size_t n_items,
uint64_t *seqnum,
Object **ret, uint64_t *ret_offset) {
uint64_t np;
@@ -1907,14 +1925,13 @@ static int journal_file_append_entry_internal(
assert(items || n_items == 0);
assert(ts);
- osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
+ osize = offsetof(Object, entry.items) + (n_items * journal_file_entry_item_size(f));
r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
if (r < 0)
return r;
o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
- memcpy_safe(o->entry.items, items, n_items * sizeof(EntryItem));
o->entry.realtime = htole64(ts->realtime);
o->entry.monotonic = htole64(ts->monotonic);
o->entry.xor_hash = htole64(xor_hash);
@@ -1922,13 +1939,16 @@ static int journal_file_append_entry_internal(
f->header->boot_id = *boot_id;
o->entry.boot_id = f->header->boot_id;
+ for (size_t i = 0; i < n_items; i++)
+ write_entry_item(f, o, i, &items[i]);
+
#if HAVE_GCRYPT
r = journal_file_hmac_put_object(f, OBJECT_ENTRY, o, np);
if (r < 0)
return r;
#endif
- r = journal_file_link_entry(f, o, np);
+ r = journal_file_link_entry(f, o, np, items, n_items);
if (r < 0)
return r;
@@ -2031,12 +2051,10 @@ int journal_file_enable_post_change_timer(JournalFile *f, sd_event *e, usec_t t)
}
static int entry_item_cmp(const EntryItem *a, const EntryItem *b) {
- return CMP(le64toh(a->object_offset), le64toh(b->object_offset));
+ return CMP(a->object_offset, b->object_offset);
}
static size_t remove_duplicate_entry_items(EntryItem items[], size_t n) {
-
- /* This function relies on the items array being sorted. */
size_t j = 1;
if (n <= 1)
@@ -2111,8 +2129,8 @@ int journal_file_append_entry(
xor_hash ^= le64toh(o->data.hash);
items[i] = (EntryItem) {
- .object_offset = htole64(p),
- .hash = o->data.hash,
+ .object_offset = p,
+ .hash = le64toh(o->data.hash),
};
}
@@ -3785,7 +3803,7 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6
};
boot_id = &o->entry.boot_id;
- n = journal_file_entry_n_items(o);
+ n = journal_file_entry_n_items(from, o);
items = newa(EntryItem, n);
for (uint64_t i = 0; i < n; i++) {
@@ -3795,7 +3813,7 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6
void *data;
Object *u;
- q = le64toh(o->entry.items[i].object_offset);
+ q = journal_file_entry_item_object_offset(from, o, i);
r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
if (r < 0)
@@ -3848,8 +3866,8 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6
xor_hash ^= le64toh(u->data.hash);
items[i] = (EntryItem) {
- .object_offset = htole64(h),
- .hash = u->data.hash,
+ .object_offset = h,
+ .hash = le64toh(u->data.hash),
};
r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
diff --git a/src/libsystemd/sd-journal/journal-file.h b/src/libsystemd/sd-journal/journal-file.h
index 9b5bd1ff36..086b440888 100644
--- a/src/libsystemd/sd-journal/journal-file.h
+++ b/src/libsystemd/sd-journal/journal-file.h
@@ -127,6 +127,11 @@ typedef enum JournalFileFlags {
JOURNAL_SEAL = 1 << 1,
} JournalFileFlags;
+typedef struct {
+ uint64_t object_offset;
+ uint64_t hash;
+} EntryItem;
+
int journal_file_open(
int fd,
const char *fname,
@@ -193,7 +198,20 @@ int journal_file_read_object_header(JournalFile *f, ObjectType type, uint64_t of
int journal_file_tail_end_by_pread(JournalFile *f, uint64_t *ret_offset);
int journal_file_tail_end_by_mmap(JournalFile *f, uint64_t *ret_offset);
-uint64_t journal_file_entry_n_items(Object *o) _pure_;
+static inline uint64_t journal_file_entry_item_object_offset(JournalFile *f, Object *o, size_t i) {
+ assert(f);
+ assert(o);
+ return JOURNAL_HEADER_COMPACT(f->header) ? le32toh(o->entry.items.compact[i].object_offset) :
+ le64toh(o->entry.items.regular[i].object_offset);
+}
+
+static inline size_t journal_file_entry_item_size(JournalFile *f) {
+ assert(f);
+ return JOURNAL_HEADER_COMPACT(f->header) ? sizeof_field(Object, entry.items.compact[0]) :
+ sizeof_field(Object, entry.items.regular[0]);
+}
+
+uint64_t journal_file_entry_n_items(JournalFile *f, Object *o) _pure_;
uint64_t journal_file_entry_array_n_items(JournalFile *f, Object *o) _pure_;
static inline uint64_t journal_file_entry_array_item(JournalFile *f, Object *o, size_t i) {
diff --git a/src/libsystemd/sd-journal/journal-verify.c b/src/libsystemd/sd-journal/journal-verify.c
index d0da9bf806..37d2a656b2 100644
--- a/src/libsystemd/sd-journal/journal-verify.c
+++ b/src/libsystemd/sd-journal/journal-verify.c
@@ -240,7 +240,7 @@ static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o
}
case OBJECT_ENTRY:
- if ((le64toh(o->object.size) - offsetof(Object, entry.items)) % sizeof(EntryItem) != 0) {
+ if ((le64toh(o->object.size) - offsetof(Object, entry.items)) % journal_file_entry_item_size(f) != 0) {
error(offset,
"Bad entry size (<= %zu): %"PRIu64,
offsetof(Object, entry.items),
@@ -248,10 +248,10 @@ static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o
return -EBADMSG;
}
- if ((le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem) <= 0) {
+ if ((le64toh(o->object.size) - offsetof(Object, entry.items)) / journal_file_entry_item_size(f) <= 0) {
error(offset,
"Invalid number items in entry: %"PRIu64,
- (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem));
+ (le64toh(o->object.size) - offsetof(Object, entry.items)) / journal_file_entry_item_size(f));
return -EBADMSG;
}
@@ -276,13 +276,13 @@ static int journal_file_object_verify(JournalFile *f, uint64_t offset, Object *o
return -EBADMSG;
}
- for (uint64_t i = 0; i < journal_file_entry_n_items(o); i++) {
- if (le64toh(o->entry.items[i].object_offset) == 0 ||
- !VALID64(le64toh(o->entry.items[i].object_offset))) {
+ for (uint64_t i = 0; i < journal_file_entry_n_items(f, o); i++) {
+ if (journal_file_entry_item_object_offset(f, o, i) == 0 ||
+ !VALID64(journal_file_entry_item_object_offset(f, o, i))) {
error(offset,
"Invalid entry item (%"PRIu64"/%"PRIu64") offset: "OFSfmt,
- i, journal_file_entry_n_items(o),
- le64toh(o->entry.items[i].object_offset));
+ i, journal_file_entry_n_items(f, o),
+ journal_file_entry_item_object_offset(f, o, i));
return -EBADMSG;
}
}
@@ -646,12 +646,12 @@ static int verify_entry(
assert(o);
assert(cache_data_fd);
- n = journal_file_entry_n_items(o);
+ n = journal_file_entry_n_items(f, o);
for (i = 0; i < n; i++) {
uint64_t q;
Object *u;
- q = le64toh(o->entry.items[i].object_offset);
+ q = journal_file_entry_item_object_offset(f, o, i);
if (!contains_uint64(cache_data_fd, n_data, q)) {
error(p, "Invalid data object of entry");
diff --git a/src/libsystemd/sd-journal/sd-journal.c b/src/libsystemd/sd-journal/sd-journal.c
index af11f33505..8c94f02f3e 100644
--- a/src/libsystemd/sd-journal/sd-journal.c
+++ b/src/libsystemd/sd-journal/sd-journal.c
@@ -2287,14 +2287,14 @@ _public_ int sd_journal_get_data(sd_journal *j, const char *field, const void **
field_length = strlen(field);
- uint64_t n = journal_file_entry_n_items(o);
+ uint64_t n = journal_file_entry_n_items(f, o);
for (uint64_t i = 0; i < n; i++) {
Object *d;
uint64_t p, l;
size_t t;
Compression c;
- p = le64toh(o->entry.items[i].object_offset);
+ p = journal_file_entry_item_object_offset(f, o, i);
r = journal_file_move_to_object(f, OBJECT_DATA, p, &d);
if (IN_SET(r, -EADDRNOTAVAIL, -EBADMSG)) {
log_debug_errno(r, "Entry item %"PRIu64" data object is bad, skipping over it: %m", i);
@@ -2435,10 +2435,10 @@ _public_ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t
if (r < 0)
return r;
- for (uint64_t n = journal_file_entry_n_items(o); j->current_field < n; j->current_field++) {
+ for (uint64_t n = journal_file_entry_n_items(f, o); j->current_field < n; j->current_field++) {
uint64_t p;
- p = le64toh(o->entry.items[j->current_field].object_offset);
+ p = journal_file_entry_item_object_offset(f, o, j->current_field);
r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
if (IN_SET(r, -EADDRNOTAVAIL, -EBADMSG)) {
log_debug_errno(r, "Entry item %"PRIu64" data object is bad, skipping over it: %m", j->current_field);