diff options
author | Lennart Poettering <lennart@poettering.net> | 2014-12-30 20:57:53 +0100 |
---|---|---|
committer | Lennart Poettering <lennart@poettering.net> | 2015-01-05 01:40:51 +0100 |
commit | fa6ac76083b8ffc1309876459f54f9f0e2843731 (patch) | |
tree | 5169233d54825b07e63da5d11fbc3b4e248aa38b /src/journal/mmap-cache.c | |
parent | f93bf4363395018ef48d744c4624158623afd693 (diff) | |
download | systemd-fa6ac76083b8ffc1309876459f54f9f0e2843731.tar.gz |
journald: process SIGBUS for the memory maps we set up
Even though we use fallocate() it appears that file systems like btrfs
will trigger SIGBUS on certain low-disk-space situation. We should
handle that, hence catch the signal, add it to a list of invalidated
pages, and replace the page with an empty memory area. After each write
check if SIGBUS was triggered, and consider the write invalid if it was.
This should make journald a lot more robust with file systems where
fallocate() is not reliable, for example all CoW file systems
(btrfs...), where changing written data can fail with disk full errors.
https://bugzilla.redhat.com/show_bug.cgi?id=1045810
Diffstat (limited to 'src/journal/mmap-cache.c')
-rw-r--r-- | src/journal/mmap-cache.c | 126 |
1 files changed, 117 insertions, 9 deletions
diff --git a/src/journal/mmap-cache.c b/src/journal/mmap-cache.c index 4c940aaa24..ab21cdc288 100644 --- a/src/journal/mmap-cache.c +++ b/src/journal/mmap-cache.c @@ -29,6 +29,7 @@ #include "log.h" #include "util.h" #include "macro.h" +#include "sigbus.h" #include "mmap-cache.h" typedef struct Window Window; @@ -38,6 +39,7 @@ typedef struct FileDescriptor FileDescriptor; struct Window { MMapCache *cache; + bool invalidated; bool keep_always; bool in_unused; @@ -65,6 +67,7 @@ struct Context { struct FileDescriptor { MMapCache *cache; int fd; + bool sigbus; LIST_HEAD(Window, windows); }; @@ -134,6 +137,21 @@ static void window_unlink(Window *w) { } } +static void window_invalidate(Window *w) { + assert(w); + + if (w->invalidated) + return; + + /* Replace the window with anonymous pages. This is useful + * when we hit a SIGBUS and want to make sure the file cannot + * trigger any further SIGBUS, possibly overrunning the sigbus + * queue. */ + + assert_se(mmap(w->ptr, w->size, w->prot, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0) == w->ptr); + w->invalidated = true; +} + static void window_free(Window *w) { assert(w); @@ -383,6 +401,9 @@ static int try_context( return 0; } + if (c->window->fd->sigbus) + return -EIO; + c->window->keep_always |= keep_always; *ret = (uint8_t*) c->window->ptr + (offset - c->window->offset); @@ -414,6 +435,9 @@ static int find_mmap( assert(f->fd == fd); + if (f->sigbus) + return -EIO; + LIST_FOREACH(by_fd, w, f->windows) if (window_matches(w, fd, prot, offset, size)) break; @@ -572,27 +596,111 @@ int mmap_cache_get( return add_mmap(m, fd, prot, context, keep_always, offset, size, st, ret); } -void mmap_cache_close_fd(MMapCache *m, int fd) { +unsigned mmap_cache_get_hit(MMapCache *m) { + assert(m); + + return m->n_hit; +} + +unsigned mmap_cache_get_missed(MMapCache *m) { + assert(m); + + return m->n_missed; +} + +static void mmap_cache_process_sigbus(MMapCache *m) { + bool found = false; FileDescriptor *f; + Iterator i; + int r; assert(m); - assert(fd >= 0); - f = hashmap_get(m->fds, INT_TO_PTR(fd + 1)); - if (!f) + /* Iterate through all triggered pages and mark their files as + * invalidated */ + for (;;) { + bool ours; + void *addr; + + r = sigbus_pop(&addr); + if (_likely_(r == 0)) + break; + if (r < 0) { + log_error_errno(r, "SIGBUS handling failed: %m"); + abort(); + } + + ours = false; + HASHMAP_FOREACH(f, m->fds, i) { + Window *w; + + LIST_FOREACH(by_fd, w, f->windows) { + if ((uint8_t*) addr >= (uint8_t*) w->ptr && + (uint8_t*) addr < (uint8_t*) w->ptr + w->size) { + found = ours = f->sigbus = true; + break; + } + } + + if (ours) + break; + } + + /* Didn't find a matching window, give up */ + if (!ours) { + log_error("Unknown SIGBUS page, aborting."); + abort(); + } + } + + /* The list of triggered pages is now empty. Now, let's remap + * all windows of the triggered file to anonymous maps, so + * that no page of the file in question is triggered again, so + * that we can be sure not to hit the queue size limit. */ + if (_likely_(!found)) return; - fd_free(f); + HASHMAP_FOREACH(f, m->fds, i) { + Window *w; + + if (!f->sigbus) + continue; + + LIST_FOREACH(by_fd, w, f->windows) + window_invalidate(w); + } } -unsigned mmap_cache_get_hit(MMapCache *m) { +bool mmap_cache_got_sigbus(MMapCache *m, int fd) { + FileDescriptor *f; + assert(m); + assert(fd >= 0); - return m->n_hit; + mmap_cache_process_sigbus(m); + + f = hashmap_get(m->fds, INT_TO_PTR(fd + 1)); + if (!f) + return false; + + return f->sigbus; } -unsigned mmap_cache_get_missed(MMapCache *m) { +void mmap_cache_close_fd(MMapCache *m, int fd) { + FileDescriptor *f; + assert(m); + assert(fd >= 0); - return m->n_missed; + /* Make sure that any queued SIGBUS are first dispatched, so + * that we don't end up with a SIGBUS entry we cannot relate + * to any existing memory map */ + + mmap_cache_process_sigbus(m); + + f = hashmap_get(m->fds, INT_TO_PTR(fd + 1)); + if (!f) + return; + + fd_free(f); } |