diff options
author | Lennart Poettering <lennart@poettering.net> | 2019-01-26 13:55:55 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-01-26 13:55:55 +0100 |
commit | 67216ef8f2cb63482dc7ba184b8b7460cf4916f1 (patch) | |
tree | 51daa2f3600cbf37d1423e8deff2b390d898840f | |
parent | b3f259d0566bdac1af2987dd316e312c0557edf9 (diff) | |
parent | 91714a7f427a6c9c5c3be8b3819fee45050028f3 (diff) | |
download | systemd-67216ef8f2cb63482dc7ba184b8b7460cf4916f1.tar.gz |
Merge pull request #11530 from keszybz/journal-cache-trimming
Journal cache trimming
-rw-r--r-- | src/basic/prioq.c | 7 | ||||
-rw-r--r-- | src/basic/prioq.h | 8 | ||||
-rw-r--r-- | src/basic/procfs-util.c | 9 | ||||
-rw-r--r-- | src/basic/procfs-util.h | 5 | ||||
-rw-r--r-- | src/cgtop/cgtop.c | 2 | ||||
-rw-r--r-- | src/core/cgroup.c | 2 | ||||
-rw-r--r-- | src/journal/journald-context.c | 63 | ||||
-rw-r--r-- | src/journal/journald-server.h | 2 | ||||
-rw-r--r-- | src/test/test-prioq.c | 16 | ||||
-rw-r--r-- | src/test/test-procfs-util.c | 2 |
10 files changed, 99 insertions, 17 deletions
diff --git a/src/basic/prioq.c b/src/basic/prioq.c index cfd08d5d23..76b27fa0a8 100644 --- a/src/basic/prioq.c +++ b/src/basic/prioq.c @@ -259,15 +259,14 @@ int prioq_reshuffle(Prioq *q, void *data, unsigned *idx) { return 1; } -void *prioq_peek(Prioq *q) { - +void *prioq_peek_by_index(Prioq *q, unsigned idx) { if (!q) return NULL; - if (q->n_items <= 0) + if (idx >= q->n_items) return NULL; - return q->items[0].data; + return q->items[idx].data; } void *prioq_pop(Prioq *q) { diff --git a/src/basic/prioq.h b/src/basic/prioq.h index bba5c7caa4..1fb57bfa4c 100644 --- a/src/basic/prioq.h +++ b/src/basic/prioq.h @@ -19,8 +19,14 @@ int prioq_put(Prioq *q, void *data, unsigned *idx); int prioq_remove(Prioq *q, void *data, unsigned *idx); int prioq_reshuffle(Prioq *q, void *data, unsigned *idx); -void *prioq_peek(Prioq *q) _pure_; +void *prioq_peek_by_index(Prioq *q, unsigned idx) _pure_; +static inline void *prioq_peek(Prioq *q) { + return prioq_peek_by_index(q, 0); +} void *prioq_pop(Prioq *q); +#define PRIOQ_FOREACH_ITEM(q, p) \ + for (unsigned _i = 0; (p = prioq_peek_by_index(q, _i)); _i++) + unsigned prioq_size(Prioq *q) _pure_; bool prioq_isempty(Prioq *q) _pure_; diff --git a/src/basic/procfs-util.c b/src/basic/procfs-util.c index a159e344b3..7aaf95bfce 100644 --- a/src/basic/procfs-util.c +++ b/src/basic/procfs-util.c @@ -201,13 +201,11 @@ int procfs_cpu_get_usage(nsec_t *ret) { return 0; } -int procfs_memory_get_current(uint64_t *ret) { +int procfs_memory_get(uint64_t *ret_total, uint64_t *ret_used) { uint64_t mem_total = UINT64_MAX, mem_free = UINT64_MAX; _cleanup_fclose_ FILE *f = NULL; int r; - assert(ret); - f = fopen("/proc/meminfo", "re"); if (!f) return -errno; @@ -262,6 +260,9 @@ int procfs_memory_get_current(uint64_t *ret) { if (mem_free > mem_total) return -EINVAL; - *ret = (mem_total - mem_free) * 1024U; + if (ret_total) + *ret_total = mem_total * 1024U; + if (ret_used) + *ret_used = (mem_total - mem_free) * 1024U; return 0; } diff --git a/src/basic/procfs-util.h b/src/basic/procfs-util.h index f697ed92bc..5a44e9eff7 100644 --- a/src/basic/procfs-util.h +++ b/src/basic/procfs-util.h @@ -11,4 +11,7 @@ int procfs_tasks_get_current(uint64_t *ret); int procfs_cpu_get_usage(nsec_t *ret); -int procfs_memory_get_current(uint64_t *ret); +int procfs_memory_get(uint64_t *ret_total, uint64_t *ret_used); +static inline int procfs_memory_get_used(uint64_t *ret) { + return procfs_memory_get(NULL, ret); +} diff --git a/src/cgtop/cgtop.c b/src/cgtop/cgtop.c index 11cc5fa2e9..b3bda30cec 100644 --- a/src/cgtop/cgtop.c +++ b/src/cgtop/cgtop.c @@ -291,7 +291,7 @@ static int process( } else if (streq(controller, "memory")) { if (is_root_cgroup(path)) { - r = procfs_memory_get_current(&g->memory); + r = procfs_memory_get_used(&g->memory); if (r < 0) return r; } else { diff --git a/src/core/cgroup.c b/src/core/cgroup.c index ed2f331b33..18d470b6d6 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -2780,7 +2780,7 @@ int unit_get_memory_current(Unit *u, uint64_t *ret) { /* The root cgroup doesn't expose this information, let's get it from /proc instead */ if (unit_has_host_root_cgroup(u)) - return procfs_memory_get_current(ret); + return procfs_memory_get_used(ret); if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0) return -ENODATA; diff --git a/src/journal/journald-context.c b/src/journal/journald-context.c index 80bbc34e37..7c51f2f633 100644 --- a/src/journal/journald-context.c +++ b/src/journal/journald-context.c @@ -16,6 +16,7 @@ #include "parse-util.h" #include "path-util.h" #include "process-util.h" +#include "procfs-util.h" #include "string-util.h" #include "syslog-util.h" #include "unaligned.h" @@ -60,7 +61,37 @@ /* Keep at most 16K entries in the cache. (Note though that this limit may be violated if enough streams pin entries in * the cache, in which case we *do* permit this limit to be breached. That's safe however, as the number of stream * clients itself is limited.) */ -#define CACHE_MAX (16*1024) +#define CACHE_MAX_FALLBACK 128U +#define CACHE_MAX_MAX (16*1024U) +#define CACHE_MAX_MIN 64U + +static size_t cache_max(void) { + static size_t cached = -1; + + if (cached == (size_t) -1) { + uint64_t mem_total; + int r; + + r = procfs_memory_get(&mem_total, NULL); + if (r < 0) { + log_warning_errno(r, "Cannot query /proc/meminfo for MemTotal: %m"); + cached = CACHE_MAX_FALLBACK; + } else { + /* Cache entries are usually a few kB, but the process cmdline is controlled by the + * user and can be up to _SC_ARG_MAX, usually 2MB. Let's say that approximately up to + * 1/8th of memory may be used by the cache. + * + * In the common case, this formula gives 64 cache entries for each GB of RAM. + */ + long l = sysconf(_SC_ARG_MAX); + assert(l > 0); + + cached = CLAMP(mem_total / 8 / (uint64_t) l, CACHE_MAX_MIN, CACHE_MAX_MAX); + } + } + + return cached; +} static int client_context_compare(const void *a, const void *b) { const ClientContext *x = a, *y = b; @@ -550,15 +581,39 @@ refresh: } static void client_context_try_shrink_to(Server *s, size_t limit) { + ClientContext *c; + usec_t t; + assert(s); + /* Flush any cache entries for PIDs that have already moved on. Don't do this + * too often, since it's a slow process. */ + t = now(CLOCK_MONOTONIC); + if (s->last_cache_pid_flush + MAX_USEC < t) { + unsigned n = prioq_size(s->client_contexts_lru), idx = 0; + + /* We do a number of iterations based on the initial size of the prioq. When we remove an + * item, a new item is moved into its places, and items to the right might be reshuffled. + */ + for (unsigned i = 0; i < n; i++) { + c = prioq_peek_by_index(s->client_contexts_lru, idx); + + assert(c->n_ref == 0); + + if (!pid_is_unwaited(c->pid)) + client_context_free(s, c); + else + idx ++; + } + + s->last_cache_pid_flush = t; + } + /* Bring the number of cache entries below the indicated limit, so that we can create a new entry without * breaching the limit. Note that we only flush out entries that aren't pinned here. This means the number of * cache entries may very well grow beyond the limit, if all entries stored remain pinned. */ while (hashmap_size(s->client_contexts) > limit) { - ClientContext *c; - c = prioq_pop(s->client_contexts_lru); if (!c) break; /* All remaining entries are pinned, give up */ @@ -627,7 +682,7 @@ static int client_context_get_internal( return 0; } - client_context_try_shrink_to(s, CACHE_MAX-1); + client_context_try_shrink_to(s, cache_max()-1); r = client_context_new(s, pid, &c); if (r < 0) diff --git a/src/journal/journald-server.h b/src/journal/journald-server.h index 6d4847b0cd..3f6b42ddd5 100644 --- a/src/journal/journald-server.h +++ b/src/journal/journald-server.h @@ -161,6 +161,8 @@ struct Server { Hashmap *client_contexts; Prioq *client_contexts_lru; + usec_t last_cache_pid_flush; + ClientContext *my_context; /* the context of journald itself */ ClientContext *pid1_context; /* the context of PID 1 */ }; diff --git a/src/test/test-prioq.c b/src/test/test-prioq.c index bc5fdd15b2..53c9e090a7 100644 --- a/src/test/test-prioq.c +++ b/src/test/test-prioq.c @@ -69,6 +69,11 @@ static void test_struct(void) { assert_se(q = prioq_new((compare_func_t) test_compare)); assert_se(s = set_new(&test_hash_ops)); + assert_se(prioq_peek(q) == NULL); + assert_se(prioq_peek_by_index(q, 0) == NULL); + assert_se(prioq_peek_by_index(q, 1) == NULL); + assert_se(prioq_peek_by_index(q, (unsigned) -1) == NULL); + for (i = 0; i < SET_SIZE; i++) { assert_se(t = new0(struct test, 1)); t->value = (unsigned) rand(); @@ -79,6 +84,17 @@ static void test_struct(void) { assert_se(set_consume(s, t) >= 0); } + for (i = 0; i < SET_SIZE; i++) + assert_se(prioq_peek_by_index(q, i)); + assert_se(prioq_peek_by_index(q, SET_SIZE) == NULL); + + unsigned count = 0; + PRIOQ_FOREACH_ITEM(q, t) { + assert_se(t); + count++; + } + assert_se(count == SET_SIZE); + while ((t = set_steal_first(s))) { assert_se(prioq_remove(q, t, &t->idx) == 1); assert_se(prioq_remove(q, t, &t->idx) == 0); diff --git a/src/test/test-procfs-util.c b/src/test/test-procfs-util.c index 08af380cc7..1d0612985b 100644 --- a/src/test/test-procfs-util.c +++ b/src/test/test-procfs-util.c @@ -18,7 +18,7 @@ int main(int argc, char *argv[]) { assert_se(procfs_cpu_get_usage(&nsec) >= 0); log_info("Current system CPU time: %s", format_timespan(buf, sizeof(buf), nsec/NSEC_PER_USEC, 1)); - assert_se(procfs_memory_get_current(&v) >= 0); + assert_se(procfs_memory_get_used(&v) >= 0); log_info("Current memory usage: %s", format_bytes(buf, sizeof(buf), v)); assert_se(procfs_tasks_get_current(&v) >= 0); |