summaryrefslogtreecommitdiff
path: root/src/journal
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2023-01-23 21:21:21 +0100
committerLennart Poettering <lennart@poettering.net>2023-02-08 13:42:29 +0100
commite5d60d1b3b0c0b229571823c7fdd390a9562982e (patch)
treef46db05768fb62e658f91562088ea01d438e6018 /src/journal
parent2bc70e2e9db13cfbc74e4bf1fd28ecbfef1466c9 (diff)
downloadsystemd-e5d60d1b3b0c0b229571823c7fdd390a9562982e.tar.gz
journald: maintain entry seqnum counter in mmap()ed file in /run/
Let's ensure that entry seqnums remain stable and monotonic across the entire runtime of the system, even if local storage is turned off. Let's do this by maintainer a counter file in /run/ which we mmap() and wherein we maintain the counter from early-boot on till late shutdown. This takes inspiration of the kernel-seqnum file we already maintain like that that tracks which kmsg messages we already processed. In fact, we reuse the same code for maintaining it. This should allow the behaviour entry seqnums to be more predictable, in particular when journal local storage is turned off. Previously, we'd maintain the seqnum simply by always bumping it to the maximum of the last written entry seqnum plus one, and the biggest seqnum so far written to the journal file on disk. If we'd never write a file on disk, or if no journal file was existing during the initrd→seqnum transition we'd completely lose the current seqnum position during daemon restarts (such as the one happening during the switch-root operation). This also will cause a journal file rotation whenever we try to write to a journal file with multiple sequence number IDs, so that we know that from early boot trhough the entire runtime we'll have stable sequence numbers that do not jump, and thus can be used to determine "lost" messages.
Diffstat (limited to 'src/journal')
-rw-r--r--src/journal/journald-kmsg.c26
-rw-r--r--src/journal/journald-server.c105
-rw-r--r--src/journal/journald-server.h12
-rw-r--r--src/journal/test-journal-flush.c2
-rw-r--r--src/journal/test-journal-interleaving.c2
-rw-r--r--src/journal/test-journal-stream.c6
-rw-r--r--src/journal/test-journal-verify.c2
-rw-r--r--src/journal/test-journal.c8
8 files changed, 119 insertions, 44 deletions
diff --git a/src/journal/journald-kmsg.c b/src/journal/journald-kmsg.c
index c64e626b2c..83082c4e94 100644
--- a/src/journal/journald-kmsg.c
+++ b/src/journal/journald-kmsg.c
@@ -423,9 +423,6 @@ finish:
}
int server_open_kernel_seqnum(Server *s) {
- _cleanup_close_ int fd = -EBADF;
- const char *fn;
- uint64_t *p;
int r;
assert(s);
@@ -436,26 +433,9 @@ int server_open_kernel_seqnum(Server *s) {
if (!s->dev_kmsg_readable)
return 0;
- fn = strjoina(s->runtime_directory, "/kernel-seqnum");
- fd = open(fn, O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0644);
- if (fd < 0) {
- log_error_errno(errno, "Failed to open %s, ignoring: %m", fn);
- return 0;
- }
-
- r = posix_fallocate_loop(fd, 0, sizeof(uint64_t));
- if (r < 0) {
- log_error_errno(r, "Failed to allocate sequential number file, ignoring: %m");
- return 0;
- }
-
- p = mmap(NULL, sizeof(uint64_t), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
- if (p == MAP_FAILED) {
- log_error_errno(errno, "Failed to map sequential number file, ignoring: %m");
- return 0;
- }
-
- s->kernel_seqnum = p;
+ r = server_map_seqnum_file(s, "kernel-seqnum", sizeof(uint64_t), (void**) &s->kernel_seqnum);
+ if (r < 0)
+ return log_error_errno(r, "Failed to map kernel seqnum file: %m");
return 0;
}
diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c
index 022e12d83d..f88365e268 100644
--- a/src/journal/journald-server.c
+++ b/src/journal/journald-server.c
@@ -889,10 +889,12 @@ static bool shall_try_append_again(JournalFile *f, int r) {
log_ratelimit_warning(JOURNAL_LOG_RATELIMIT, "%s: Montonic clock jumped backwards relative to last journal entry, rotating.", f->path);
return true;
+ case -EILSEQ: /* seqnum ID last used in the file doesn't match the one we'd passed when writing an entry to it */
+ log_ratelimit_warning(JOURNAL_LOG_RATELIMIT, "%s: Journal file uses a different sequence number ID, rotating.", f->path);
+ return true;
+
case -EAFNOSUPPORT:
- log_ratelimit_warning(JOURNAL_LOG_RATELIMIT,
- "%s: underlying file system does not support memory mapping or another required file system feature.",
- f->path);
+ log_ratelimit_warning(JOURNAL_LOG_RATELIMIT, "%s: underlying file system does not support memory mapping or another required file system feature.", f->path);
return false;
default:
@@ -900,7 +902,13 @@ static bool shall_try_append_again(JournalFile *f, int r) {
}
}
-static void server_write_to_journal(Server *s, uid_t uid, struct iovec *iovec, size_t n, int priority) {
+static void server_write_to_journal(
+ Server *s,
+ uid_t uid,
+ const struct iovec *iovec,
+ size_t n,
+ int priority) {
+
bool vacuumed = false, rotate = false;
struct dual_timestamp ts;
ManagedJournalFile *f;
@@ -950,7 +958,15 @@ static void server_write_to_journal(Server *s, uid_t uid, struct iovec *iovec, s
s->last_realtime_clock = ts.realtime;
- r = journal_file_append_entry(f->file, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
+ r = journal_file_append_entry(
+ f->file,
+ &ts,
+ /* boot_id= */ NULL,
+ iovec, n,
+ &s->seqnum->seqnum,
+ &s->seqnum->id,
+ /* ret_object= */ NULL,
+ /* ret_offset= */ NULL);
if (r >= 0) {
server_schedule_sync(s, priority);
return;
@@ -978,7 +994,15 @@ static void server_write_to_journal(Server *s, uid_t uid, struct iovec *iovec, s
return;
log_debug_errno(r, "Retrying write.");
- r = journal_file_append_entry(f->file, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
+ r = journal_file_append_entry(
+ f->file,
+ &ts,
+ /* boot_id= */ NULL,
+ iovec, n,
+ &s->seqnum->seqnum,
+ &s->seqnum->id,
+ /* ret_object= */ NULL,
+ /* ret_offset= */ NULL);
if (r < 0)
log_ratelimit_error_errno(r, FAILED_TO_WRITE_ENTRY_RATELIMIT,
"Failed to write entry to %s (%zu items, %zu bytes) despite vacuuming, ignoring: %m",
@@ -1290,7 +1314,13 @@ int server_flush_to_var(Server *s, bool require_flag_file) {
goto finish;
}
- r = journal_file_copy_entry(f, s->system_journal->file, o, f->current_offset);
+ r = journal_file_copy_entry(
+ f,
+ s->system_journal->file,
+ o,
+ f->current_offset,
+ &s->seqnum->seqnum,
+ &s->seqnum->id);
if (r >= 0)
continue;
@@ -1312,7 +1342,13 @@ int server_flush_to_var(Server *s, bool require_flag_file) {
}
log_debug("Retrying write.");
- r = journal_file_copy_entry(f, s->system_journal->file, o, f->current_offset);
+ r = journal_file_copy_entry(
+ f,
+ s->system_journal->file,
+ o,
+ f->current_offset,
+ &s->seqnum->seqnum,
+ &s->seqnum->id);
if (r < 0) {
log_ratelimit_error_errno(r, JOURNAL_LOG_RATELIMIT, "Can't write entry: %m");
goto finish;
@@ -2250,6 +2286,51 @@ static int server_open_varlink(Server *s, const char *socket, int fd) {
return 0;
}
+int server_map_seqnum_file(
+ Server *s,
+ const char *fname,
+ size_t size,
+ void **ret) {
+
+ _cleanup_free_ char *fn = NULL;
+ _cleanup_close_ int fd = -EBADF;
+ uint64_t *p;
+ int r;
+
+ assert(s);
+ assert(fname);
+ assert(size > 0);
+ assert(ret);
+
+ fn = path_join(s->runtime_directory, fname);
+ if (!fn)
+ return -ENOMEM;
+
+ fd = open(fn, O_RDWR|O_CREAT|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0644);
+ if (fd < 0)
+ return -errno;
+
+ r = posix_fallocate_loop(fd, 0, size);
+ if (r < 0)
+ return r;
+
+ p = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+ if (p == MAP_FAILED)
+ return -errno;
+
+ *ret = p;
+ return 0;
+}
+
+void server_unmap_seqnum_file(void *p, size_t size) {
+ assert(size > 0);
+
+ if (!p)
+ return;
+
+ assert_se(munmap(p, size) >= 0);
+}
+
static bool server_is_idle(Server *s) {
assert(s);
@@ -2562,6 +2643,10 @@ int server_init(Server *s, const char *namespace) {
if (r < 0)
return r;
+ r = server_map_seqnum_file(s, "seqnum", sizeof(SeqnumData), (void**) &s->seqnum);
+ if (r < 0)
+ return log_error_errno(r, "Failed to map main seqnum file: %m");
+
r = server_open_kernel_seqnum(s);
if (r < 0)
return r;
@@ -2678,8 +2763,8 @@ void server_done(Server *s) {
if (s->ratelimit)
journal_ratelimit_free(s->ratelimit);
- if (s->kernel_seqnum)
- munmap(s->kernel_seqnum, sizeof(uint64_t));
+ server_unmap_seqnum_file(s->seqnum, sizeof(*s->seqnum));
+ server_unmap_seqnum_file(s->kernel_seqnum, sizeof(*s->kernel_seqnum));
free(s->buffer);
free(s->tty_path);
diff --git a/src/journal/journald-server.h b/src/journal/journald-server.h
index ee8f374190..e7bf750a59 100644
--- a/src/journal/journald-server.h
+++ b/src/journal/journald-server.h
@@ -60,6 +60,13 @@ typedef struct JournalStorage {
JournalStorageSpace space;
} JournalStorage;
+/* This structure will be kept in $RUNTIME_DIRECTORY/seqnum and is mapped by journald, and is used to
+ * maintain the sequence number counter with its seqnum ID */
+typedef struct SeqnumData {
+ sd_id128_t id;
+ uint64_t seqnum;
+} SeqnumData;
+
struct Server {
char *namespace;
@@ -93,7 +100,7 @@ struct Server {
ManagedJournalFile *system_journal;
OrderedHashmap *user_journals;
- uint64_t seqnum;
+ SeqnumData *seqnum;
char *buffer;
@@ -227,3 +234,6 @@ void server_space_usage_message(Server *s, JournalStorage *storage);
int server_start_or_stop_idle_timer(Server *s);
int server_refresh_idle_timer(Server *s);
+
+int server_map_seqnum_file(Server *s, const char *fname, size_t size, void **ret);
+void server_unmap_seqnum_file(void *p, size_t size);
diff --git a/src/journal/test-journal-flush.c b/src/journal/test-journal-flush.c
index 53e479909e..f2fe0e51c0 100644
--- a/src/journal/test-journal-flush.c
+++ b/src/journal/test-journal-flush.c
@@ -53,7 +53,7 @@ static void test_journal_flush(int argc, char *argv[]) {
log_error_errno(r, "journal_file_move_to_object failed: %m");
assert_se(r >= 0);
- r = journal_file_copy_entry(f, new_journal->file, o, f->current_offset);
+ r = journal_file_copy_entry(f, new_journal->file, o, f->current_offset, NULL, NULL);
if (r < 0)
log_warning_errno(r, "journal_file_copy_entry failed: %m");
assert_se(r >= 0 ||
diff --git a/src/journal/test-journal-interleaving.c b/src/journal/test-journal-interleaving.c
index 67552de8fe..55d717da31 100644
--- a/src/journal/test-journal-interleaving.c
+++ b/src/journal/test-journal-interleaving.c
@@ -66,7 +66,7 @@ static void append_number(ManagedJournalFile *f, int n, uint64_t *seqnum) {
assert_se(asprintf(&p, "NUMBER=%d", n) >= 0);
iovec[0] = IOVEC_MAKE_STRING(p);
- assert_ret(journal_file_append_entry(f->file, &ts, NULL, iovec, 1, seqnum, NULL, NULL));
+ assert_ret(journal_file_append_entry(f->file, &ts, NULL, iovec, 1, seqnum, NULL, NULL, NULL));
free(p);
}
diff --git a/src/journal/test-journal-stream.c b/src/journal/test-journal-stream.c
index 9f4494c6c0..940625a084 100644
--- a/src/journal/test-journal-stream.c
+++ b/src/journal/test-journal-stream.c
@@ -103,12 +103,12 @@ static void run_test(void) {
iovec[1] = IOVEC_MAKE(q, strlen(q));
if (i % 10 == 0)
- assert_se(journal_file_append_entry(three->file, &ts, NULL, iovec, 2, NULL, NULL, NULL) == 0);
+ assert_se(journal_file_append_entry(three->file, &ts, NULL, iovec, 2, NULL, NULL, NULL, NULL) == 0);
else {
if (i % 3 == 0)
- assert_se(journal_file_append_entry(two->file, &ts, NULL, iovec, 2, NULL, NULL, NULL) == 0);
+ assert_se(journal_file_append_entry(two->file, &ts, NULL, iovec, 2, NULL, NULL, NULL, NULL) == 0);
- assert_se(journal_file_append_entry(one->file, &ts, NULL, iovec, 2, NULL, NULL, NULL) == 0);
+ assert_se(journal_file_append_entry(one->file, &ts, NULL, iovec, 2, NULL, NULL, NULL, NULL) == 0);
}
free(p);
diff --git a/src/journal/test-journal-verify.c b/src/journal/test-journal-verify.c
index 0c58d05ced..7a90079fc8 100644
--- a/src/journal/test-journal-verify.c
+++ b/src/journal/test-journal-verify.c
@@ -94,7 +94,7 @@ static int run_test(int argc, char *argv[]) {
iovec = IOVEC_MAKE_STRING(test);
- assert_se(journal_file_append_entry(df->file, &ts, NULL, &iovec, 1, NULL, NULL, NULL) == 0);
+ assert_se(journal_file_append_entry(df->file, &ts, NULL, &iovec, 1, NULL, NULL, NULL, NULL) == 0);
free(test);
}
diff --git a/src/journal/test-journal.c b/src/journal/test-journal.c
index 889673cae7..cd295b2a7e 100644
--- a/src/journal/test-journal.c
+++ b/src/journal/test-journal.c
@@ -45,13 +45,13 @@ static void test_non_empty_one(void) {
assert_se(sd_id128_randomize(&fake_boot_id) == 0);
iovec = IOVEC_MAKE_STRING(test);
- assert_se(journal_file_append_entry(f->file, &ts, NULL, &iovec, 1, NULL, NULL, NULL) == 0);
+ assert_se(journal_file_append_entry(f->file, &ts, NULL, &iovec, 1, NULL, NULL, NULL, NULL) == 0);
iovec = IOVEC_MAKE_STRING(test2);
- assert_se(journal_file_append_entry(f->file, &ts, NULL, &iovec, 1, NULL, NULL, NULL) == 0);
+ assert_se(journal_file_append_entry(f->file, &ts, NULL, &iovec, 1, NULL, NULL, NULL, NULL) == 0);
iovec = IOVEC_MAKE_STRING(test);
- assert_se(journal_file_append_entry(f->file, &ts, &fake_boot_id, &iovec, 1, NULL, NULL, NULL) == 0);
+ assert_se(journal_file_append_entry(f->file, &ts, &fake_boot_id, &iovec, 1, NULL, NULL, NULL, NULL) == 0);
#if HAVE_GCRYPT
journal_file_append_tag(f->file);
@@ -199,7 +199,7 @@ static bool check_compressed(uint64_t compress_threshold, uint64_t data_size) {
dual_timestamp_get(&ts);
iovec = IOVEC_MAKE(data, data_size);
- assert_se(journal_file_append_entry(f->file, &ts, NULL, &iovec, 1, NULL, NULL, NULL) == 0);
+ assert_se(journal_file_append_entry(f->file, &ts, NULL, &iovec, 1, NULL, NULL, NULL, NULL) == 0);
#if HAVE_GCRYPT
journal_file_append_tag(f->file);