summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2023-01-23 21:21:21 +0100
committerLennart Poettering <lennart@poettering.net>2023-02-08 13:42:29 +0100
commite5d60d1b3b0c0b229571823c7fdd390a9562982e (patch)
treef46db05768fb62e658f91562088ea01d438e6018 /src
parent2bc70e2e9db13cfbc74e4bf1fd28ecbfef1466c9 (diff)
downloadsystemd-e5d60d1b3b0c0b229571823c7fdd390a9562982e.tar.gz
journald: maintain entry seqnum counter in mmap()ed file in /run/
Let's ensure that entry seqnums remain stable and monotonic across the entire runtime of the system, even if local storage is turned off. Let's do this by maintainer a counter file in /run/ which we mmap() and wherein we maintain the counter from early-boot on till late shutdown. This takes inspiration of the kernel-seqnum file we already maintain like that that tracks which kmsg messages we already processed. In fact, we reuse the same code for maintaining it. This should allow the behaviour entry seqnums to be more predictable, in particular when journal local storage is turned off. Previously, we'd maintain the seqnum simply by always bumping it to the maximum of the last written entry seqnum plus one, and the biggest seqnum so far written to the journal file on disk. If we'd never write a file on disk, or if no journal file was existing during the initrd→seqnum transition we'd completely lose the current seqnum position during daemon restarts (such as the one happening during the switch-root operation). This also will cause a journal file rotation whenever we try to write to a journal file with multiple sequence number IDs, so that we know that from early boot trhough the entire runtime we'll have stable sequence numbers that do not jump, and thus can be used to determine "lost" messages.
Diffstat (limited to 'src')
-rw-r--r--src/journal-remote/journal-remote-write.c25
-rw-r--r--src/journal/journald-kmsg.c26
-rw-r--r--src/journal/journald-server.c105
-rw-r--r--src/journal/journald-server.h12
-rw-r--r--src/journal/test-journal-flush.c2
-rw-r--r--src/journal/test-journal-interleaving.c2
-rw-r--r--src/journal/test-journal-stream.c6
-rw-r--r--src/journal/test-journal-verify.c2
-rw-r--r--src/journal/test-journal.c8
-rw-r--r--src/libsystemd/sd-journal/journal-file.c30
-rw-r--r--src/libsystemd/sd-journal/journal-file.h5
11 files changed, 168 insertions, 55 deletions
diff --git a/src/journal-remote/journal-remote-write.c b/src/journal-remote/journal-remote-write.c
index f6c3d83732..ace4a1cfad 100644
--- a/src/journal-remote/journal-remote-write.c
+++ b/src/journal-remote/journal-remote-write.c
@@ -99,9 +99,16 @@ int writer_write(Writer *w,
return r;
}
- r = journal_file_append_entry(w->journal->file, ts, boot_id,
- iovw->iovec, iovw->count,
- &w->seqnum, NULL, NULL);
+ r = journal_file_append_entry(
+ w->journal->file,
+ ts,
+ boot_id,
+ iovw->iovec,
+ iovw->count,
+ &w->seqnum,
+ /* seqnum_id= */ NULL,
+ /* ret_object= */ NULL,
+ /* ret_offset= */ NULL);
if (r >= 0) {
if (w->server)
w->server->event_count += 1;
@@ -120,9 +127,15 @@ int writer_write(Writer *w,
return r;
log_debug("Retrying write.");
- r = journal_file_append_entry(w->journal->file, ts, boot_id,
- iovw->iovec, iovw->count,
- &w->seqnum, NULL, NULL);
+ r = journal_file_append_entry(
+ w->journal->file,
+ ts,
+ boot_id,
+ iovw->iovec, iovw->count,
+ &w->seqnum,
+ /* seqnum_id= */ NULL,
+ /* ret_object= */ NULL,
+ /* ret_offset= */ NULL);
if (r < 0)
return r;
diff --git a/src/journal/journald-kmsg.c b/src/journal/journald-kmsg.c
index c64e626b2c..83082c4e94 100644
--- a/src/journal/journald-kmsg.c
+++ b/src/journal/journald-kmsg.c
@@ -423,9 +423,6 @@ finish:
}
int server_open_kernel_seqnum(Server *s) {
- _cleanup_close_ int fd = -EBADF;
- const char *fn;
- uint64_t *p;
int r;
assert(s);
@@ -436,26 +433,9 @@ int server_open_kernel_seqnum(Server *s) {
if (!s->dev_kmsg_readable)
return 0;
- fn = strjoina(s->runtime_directory, "/kernel-seqnum");
- fd = open(fn, O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0644);
- if (fd < 0) {
- log_error_errno(errno, "Failed to open %s, ignoring: %m", fn);
- return 0;
- }
-
- r = posix_fallocate_loop(fd, 0, sizeof(uint64_t));
- if (r < 0) {
- log_error_errno(r, "Failed to allocate sequential number file, ignoring: %m");
- return 0;
- }
-
- p = mmap(NULL, sizeof(uint64_t), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
- if (p == MAP_FAILED) {
- log_error_errno(errno, "Failed to map sequential number file, ignoring: %m");
- return 0;
- }
-
- s->kernel_seqnum = p;
+ r = server_map_seqnum_file(s, "kernel-seqnum", sizeof(uint64_t), (void**) &s->kernel_seqnum);
+ if (r < 0)
+ return log_error_errno(r, "Failed to map kernel seqnum file: %m");
return 0;
}
diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c
index 022e12d83d..f88365e268 100644
--- a/src/journal/journald-server.c
+++ b/src/journal/journald-server.c
@@ -889,10 +889,12 @@ static bool shall_try_append_again(JournalFile *f, int r) {
log_ratelimit_warning(JOURNAL_LOG_RATELIMIT, "%s: Montonic clock jumped backwards relative to last journal entry, rotating.", f->path);
return true;
+ case -EILSEQ: /* seqnum ID last used in the file doesn't match the one we'd passed when writing an entry to it */
+ log_ratelimit_warning(JOURNAL_LOG_RATELIMIT, "%s: Journal file uses a different sequence number ID, rotating.", f->path);
+ return true;
+
case -EAFNOSUPPORT:
- log_ratelimit_warning(JOURNAL_LOG_RATELIMIT,
- "%s: underlying file system does not support memory mapping or another required file system feature.",
- f->path);
+ log_ratelimit_warning(JOURNAL_LOG_RATELIMIT, "%s: underlying file system does not support memory mapping or another required file system feature.", f->path);
return false;
default:
@@ -900,7 +902,13 @@ static bool shall_try_append_again(JournalFile *f, int r) {
}
}
-static void server_write_to_journal(Server *s, uid_t uid, struct iovec *iovec, size_t n, int priority) {
+static void server_write_to_journal(
+ Server *s,
+ uid_t uid,
+ const struct iovec *iovec,
+ size_t n,
+ int priority) {
+
bool vacuumed = false, rotate = false;
struct dual_timestamp ts;
ManagedJournalFile *f;
@@ -950,7 +958,15 @@ static void server_write_to_journal(Server *s, uid_t uid, struct iovec *iovec, s
s->last_realtime_clock = ts.realtime;
- r = journal_file_append_entry(f->file, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
+ r = journal_file_append_entry(
+ f->file,
+ &ts,
+ /* boot_id= */ NULL,
+ iovec, n,
+ &s->seqnum->seqnum,
+ &s->seqnum->id,
+ /* ret_object= */ NULL,
+ /* ret_offset= */ NULL);
if (r >= 0) {
server_schedule_sync(s, priority);
return;
@@ -978,7 +994,15 @@ static void server_write_to_journal(Server *s, uid_t uid, struct iovec *iovec, s
return;
log_debug_errno(r, "Retrying write.");
- r = journal_file_append_entry(f->file, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
+ r = journal_file_append_entry(
+ f->file,
+ &ts,
+ /* boot_id= */ NULL,
+ iovec, n,
+ &s->seqnum->seqnum,
+ &s->seqnum->id,
+ /* ret_object= */ NULL,
+ /* ret_offset= */ NULL);
if (r < 0)
log_ratelimit_error_errno(r, FAILED_TO_WRITE_ENTRY_RATELIMIT,
"Failed to write entry to %s (%zu items, %zu bytes) despite vacuuming, ignoring: %m",
@@ -1290,7 +1314,13 @@ int server_flush_to_var(Server *s, bool require_flag_file) {
goto finish;
}
- r = journal_file_copy_entry(f, s->system_journal->file, o, f->current_offset);
+ r = journal_file_copy_entry(
+ f,
+ s->system_journal->file,
+ o,
+ f->current_offset,
+ &s->seqnum->seqnum,
+ &s->seqnum->id);
if (r >= 0)
continue;
@@ -1312,7 +1342,13 @@ int server_flush_to_var(Server *s, bool require_flag_file) {
}
log_debug("Retrying write.");
- r = journal_file_copy_entry(f, s->system_journal->file, o, f->current_offset);
+ r = journal_file_copy_entry(
+ f,
+ s->system_journal->file,
+ o,
+ f->current_offset,
+ &s->seqnum->seqnum,
+ &s->seqnum->id);
if (r < 0) {
log_ratelimit_error_errno(r, JOURNAL_LOG_RATELIMIT, "Can't write entry: %m");
goto finish;
@@ -2250,6 +2286,51 @@ static int server_open_varlink(Server *s, const char *socket, int fd) {
return 0;
}
+int server_map_seqnum_file(
+ Server *s,
+ const char *fname,
+ size_t size,
+ void **ret) {
+
+ _cleanup_free_ char *fn = NULL;
+ _cleanup_close_ int fd = -EBADF;
+ uint64_t *p;
+ int r;
+
+ assert(s);
+ assert(fname);
+ assert(size > 0);
+ assert(ret);
+
+ fn = path_join(s->runtime_directory, fname);
+ if (!fn)
+ return -ENOMEM;
+
+ fd = open(fn, O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0644);
+ if (fd < 0)
+ return -errno;
+
+ r = posix_fallocate_loop(fd, 0, size);
+ if (r < 0)
+ return r;
+
+ p = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+ if (p == MAP_FAILED)
+ return -errno;
+
+ *ret = p;
+ return 0;
+}
+
+void server_unmap_seqnum_file(void *p, size_t size) {
+ assert(size > 0);
+
+ if (!p)
+ return;
+
+ assert_se(munmap(p, size) >= 0);
+}
+
static bool server_is_idle(Server *s) {
assert(s);
@@ -2562,6 +2643,10 @@ int server_init(Server *s, const char *namespace) {
if (r < 0)
return r;
+ r = server_map_seqnum_file(s, "seqnum", sizeof(SeqnumData), (void**) &s->seqnum);
+ if (r < 0)
+ return log_error_errno(r, "Failed to map main seqnum file: %m");
+
r = server_open_kernel_seqnum(s);
if (r < 0)
return r;
@@ -2678,8 +2763,8 @@ void server_done(Server *s) {
if (s->ratelimit)
journal_ratelimit_free(s->ratelimit);
- if (s->kernel_seqnum)
- munmap(s->kernel_seqnum, sizeof(uint64_t));
+ server_unmap_seqnum_file(s->seqnum, sizeof(*s->seqnum));
+ server_unmap_seqnum_file(s->kernel_seqnum, sizeof(*s->kernel_seqnum));
free(s->buffer);
free(s->tty_path);
diff --git a/src/journal/journald-server.h b/src/journal/journald-server.h
index ee8f374190..e7bf750a59 100644
--- a/src/journal/journald-server.h
+++ b/src/journal/journald-server.h
@@ -60,6 +60,13 @@ typedef struct JournalStorage {
JournalStorageSpace space;
} JournalStorage;
+/* This structure will be kept in $RUNTIME_DIRECTORY/seqnum and is mapped by journald, and is used to
+ * maintain the sequence number counter with its seqnum ID */
+typedef struct SeqnumData {
+ sd_id128_t id;
+ uint64_t seqnum;
+} SeqnumData;
+
struct Server {
char *namespace;
@@ -93,7 +100,7 @@ struct Server {
ManagedJournalFile *system_journal;
OrderedHashmap *user_journals;
- uint64_t seqnum;
+ SeqnumData *seqnum;
char *buffer;
@@ -227,3 +234,6 @@ void server_space_usage_message(Server *s, JournalStorage *storage);
int server_start_or_stop_idle_timer(Server *s);
int server_refresh_idle_timer(Server *s);
+
+int server_map_seqnum_file(Server *s, const char *fname, size_t size, void **ret);
+void server_unmap_seqnum_file(void *p, size_t size);
diff --git a/src/journal/test-journal-flush.c b/src/journal/test-journal-flush.c
index 53e479909e..f2fe0e51c0 100644
--- a/src/journal/test-journal-flush.c
+++ b/src/journal/test-journal-flush.c
@@ -53,7 +53,7 @@ static void test_journal_flush(int argc, char *argv[]) {
log_error_errno(r, "journal_file_move_to_object failed: %m");
assert_se(r >= 0);
- r = journal_file_copy_entry(f, new_journal->file, o, f->current_offset);
+ r = journal_file_copy_entry(f, new_journal->file, o, f->current_offset, NULL, NULL);
if (r < 0)
log_warning_errno(r, "journal_file_copy_entry failed: %m");
assert_se(r >= 0 ||
diff --git a/src/journal/test-journal-interleaving.c b/src/journal/test-journal-interleaving.c
index 67552de8fe..55d717da31 100644
--- a/src/journal/test-journal-interleaving.c
+++ b/src/journal/test-journal-interleaving.c
@@ -66,7 +66,7 @@ static void append_number(ManagedJournalFile *f, int n, uint64_t *seqnum) {
assert_se(asprintf(&p, "NUMBER=%d", n) >= 0);
iovec[0] = IOVEC_MAKE_STRING(p);
- assert_ret(journal_file_append_entry(f->file, &ts, NULL, iovec, 1, seqnum, NULL, NULL));
+ assert_ret(journal_file_append_entry(f->file, &ts, NULL, iovec, 1, seqnum, NULL, NULL, NULL));
free(p);
}
diff --git a/src/journal/test-journal-stream.c b/src/journal/test-journal-stream.c
index 9f4494c6c0..940625a084 100644
--- a/src/journal/test-journal-stream.c
+++ b/src/journal/test-journal-stream.c
@@ -103,12 +103,12 @@ static void run_test(void) {
iovec[1] = IOVEC_MAKE(q, strlen(q));
if (i % 10 == 0)
- assert_se(journal_file_append_entry(three->file, &ts, NULL, iovec, 2, NULL, NULL, NULL) == 0);
+ assert_se(journal_file_append_entry(three->file, &ts, NULL, iovec, 2, NULL, NULL, NULL, NULL) == 0);
else {
if (i % 3 == 0)
- assert_se(journal_file_append_entry(two->file, &ts, NULL, iovec, 2, NULL, NULL, NULL) == 0);
+ assert_se(journal_file_append_entry(two->file, &ts, NULL, iovec, 2, NULL, NULL, NULL, NULL) == 0);
- assert_se(journal_file_append_entry(one->file, &ts, NULL, iovec, 2, NULL, NULL, NULL) == 0);
+ assert_se(journal_file_append_entry(one->file, &ts, NULL, iovec, 2, NULL, NULL, NULL, NULL) == 0);
}
free(p);
diff --git a/src/journal/test-journal-verify.c b/src/journal/test-journal-verify.c
index 0c58d05ced..7a90079fc8 100644
--- a/src/journal/test-journal-verify.c
+++ b/src/journal/test-journal-verify.c
@@ -94,7 +94,7 @@ static int run_test(int argc, char *argv[]) {
iovec = IOVEC_MAKE_STRING(test);
- assert_se(journal_file_append_entry(df->file, &ts, NULL, &iovec, 1, NULL, NULL, NULL) == 0);
+ assert_se(journal_file_append_entry(df->file, &ts, NULL, &iovec, 1, NULL, NULL, NULL, NULL) == 0);
free(test);
}
diff --git a/src/journal/test-journal.c b/src/journal/test-journal.c
index 889673cae7..cd295b2a7e 100644
--- a/src/journal/test-journal.c
+++ b/src/journal/test-journal.c
@@ -45,13 +45,13 @@ static void test_non_empty_one(void) {
assert_se(sd_id128_randomize(&fake_boot_id) == 0);
iovec = IOVEC_MAKE_STRING(test);
- assert_se(journal_file_append_entry(f->file, &ts, NULL, &iovec, 1, NULL, NULL, NULL) == 0);
+ assert_se(journal_file_append_entry(f->file, &ts, NULL, &iovec, 1, NULL, NULL, NULL, NULL) == 0);
iovec = IOVEC_MAKE_STRING(test2);
- assert_se(journal_file_append_entry(f->file, &ts, NULL, &iovec, 1, NULL, NULL, NULL) == 0);
+ assert_se(journal_file_append_entry(f->file, &ts, NULL, &iovec, 1, NULL, NULL, NULL, NULL) == 0);
iovec = IOVEC_MAKE_STRING(test);
- assert_se(journal_file_append_entry(f->file, &ts, &fake_boot_id, &iovec, 1, NULL, NULL, NULL) == 0);
+ assert_se(journal_file_append_entry(f->file, &ts, &fake_boot_id, &iovec, 1, NULL, NULL, NULL, NULL) == 0);
#if HAVE_GCRYPT
journal_file_append_tag(f->file);
@@ -199,7 +199,7 @@ static bool check_compressed(uint64_t compress_threshold, uint64_t data_size) {
dual_timestamp_get(&ts);
iovec = IOVEC_MAKE(data, data_size);
- assert_se(journal_file_append_entry(f->file, &ts, NULL, &iovec, 1, NULL, NULL, NULL) == 0);
+ assert_se(journal_file_append_entry(f->file, &ts, NULL, &iovec, 1, NULL, NULL, NULL, NULL) == 0);
#if HAVE_GCRYPT
journal_file_append_tag(f->file);
diff --git a/src/libsystemd/sd-journal/journal-file.c b/src/libsystemd/sd-journal/journal-file.c
index 2ead29548b..aab33dbfcc 100644
--- a/src/libsystemd/sd-journal/journal-file.c
+++ b/src/libsystemd/sd-journal/journal-file.c
@@ -2094,6 +2094,7 @@ static int journal_file_append_entry_internal(
const EntryItem items[],
size_t n_items,
uint64_t *seqnum,
+ sd_id128_t *seqnum_id,
Object **ret_object,
uint64_t *ret_offset) {
@@ -2139,6 +2140,21 @@ static int journal_file_append_entry_internal(
}
}
+ if (seqnum_id) {
+ /* Settle the passed in sequence number ID */
+
+ if (sd_id128_is_null(*seqnum_id))
+ *seqnum_id = f->header->seqnum_id; /* Caller has none assigned, then copy the one from the file */
+ else if (!sd_id128_equal(*seqnum_id, f->header->seqnum_id)) {
+ /* Different seqnum IDs? We can't allow entries from multiple IDs end up in the same journal.*/
+ if (le64toh(f->header->n_entries) == 0)
+ f->header->seqnum_id = *seqnum_id; /* Caller has one, and file so far has no entries, then copy the one from the caller */
+ else
+ return log_debug_errno(SYNTHETIC_ERRNO(EILSEQ),
+ "Sequence number IDs don't match, refusing entry.");
+ }
+ }
+
osize = offsetof(Object, entry.items) + (n_items * journal_file_entry_item_size(f));
r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
@@ -2290,6 +2306,7 @@ int journal_file_append_entry(
const struct iovec iovec[],
size_t n_iovec,
uint64_t *seqnum,
+ sd_id128_t *seqnum_id,
Object **ret_object,
uint64_t *ret_offset) {
@@ -2376,7 +2393,7 @@ int journal_file_append_entry(
typesafe_qsort(items, n_iovec, entry_item_cmp);
n_iovec = remove_duplicate_entry_items(items, n_iovec);
- r = journal_file_append_entry_internal(f, ts, boot_id, xor_hash, items, n_iovec, seqnum, ret_object, ret_offset);
+ r = journal_file_append_entry_internal(f, ts, boot_id, xor_hash, items, n_iovec, seqnum, seqnum_id, ret_object, ret_offset);
/* If the memory mapping triggered a SIGBUS then we return an
* IO error and ignore the error code passed down to us, since
@@ -4093,7 +4110,14 @@ int journal_file_dispose(int dir_fd, const char *fname) {
return 0;
}
-int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p) {
+int journal_file_copy_entry(
+ JournalFile *from,
+ JournalFile *to,
+ Object *o,
+ uint64_t p,
+ uint64_t *seqnum,
+ sd_id128_t *seqnum_id) {
+
_cleanup_free_ EntryItem *items_alloc = NULL;
EntryItem *items;
uint64_t q, n, xor_hash = 0;
@@ -4168,7 +4192,7 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6
return r;
}
- r = journal_file_append_entry_internal(to, &ts, boot_id, xor_hash, items, n, NULL, NULL, NULL);
+ r = journal_file_append_entry_internal(to, &ts, boot_id, xor_hash, items, n, seqnum, seqnum_id, NULL, NULL);
if (mmap_cache_fd_got_sigbus(to->cache_fd))
return -EIO;
diff --git a/src/libsystemd/sd-journal/journal-file.h b/src/libsystemd/sd-journal/journal-file.h
index 8c809ed4b9..07f1f5d180 100644
--- a/src/libsystemd/sd-journal/journal-file.h
+++ b/src/libsystemd/sd-journal/journal-file.h
@@ -259,7 +259,8 @@ int journal_file_append_entry(
const sd_id128_t *boot_id,
const struct iovec iovec[],
size_t n_iovec,
- uint64_t *seqno,
+ uint64_t *seqnum,
+ sd_id128_t *seqnum_id,
Object **ret_object,
uint64_t *ret_offset);
@@ -286,7 +287,7 @@ int journal_file_move_to_entry_by_seqnum_for_data(JournalFile *f, Object *d, uin
int journal_file_move_to_entry_by_realtime_for_data(JournalFile *f, Object *d, uint64_t realtime, direction_t direction, Object **ret_object, uint64_t *ret_offset);
int journal_file_move_to_entry_by_monotonic_for_data(JournalFile *f, Object *d, sd_id128_t boot_id, uint64_t monotonic, direction_t direction, Object **ret_object, uint64_t *ret_offset);
-int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p);
+int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, sd_id128_t *seqnum_id);
void journal_file_dump(JournalFile *f);
void journal_file_print_header(JournalFile *f);