summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2019-01-26 13:55:55 +0100
committerGitHub <noreply@github.com>2019-01-26 13:55:55 +0100
commit67216ef8f2cb63482dc7ba184b8b7460cf4916f1 (patch)
tree51daa2f3600cbf37d1423e8deff2b390d898840f
parentb3f259d0566bdac1af2987dd316e312c0557edf9 (diff)
parent91714a7f427a6c9c5c3be8b3819fee45050028f3 (diff)
downloadsystemd-67216ef8f2cb63482dc7ba184b8b7460cf4916f1.tar.gz
Merge pull request #11530 from keszybz/journal-cache-trimming
Journal cache trimming
-rw-r--r--src/basic/prioq.c7
-rw-r--r--src/basic/prioq.h8
-rw-r--r--src/basic/procfs-util.c9
-rw-r--r--src/basic/procfs-util.h5
-rw-r--r--src/cgtop/cgtop.c2
-rw-r--r--src/core/cgroup.c2
-rw-r--r--src/journal/journald-context.c63
-rw-r--r--src/journal/journald-server.h2
-rw-r--r--src/test/test-prioq.c16
-rw-r--r--src/test/test-procfs-util.c2
10 files changed, 99 insertions, 17 deletions
diff --git a/src/basic/prioq.c b/src/basic/prioq.c
index cfd08d5d23..76b27fa0a8 100644
--- a/src/basic/prioq.c
+++ b/src/basic/prioq.c
@@ -259,15 +259,14 @@ int prioq_reshuffle(Prioq *q, void *data, unsigned *idx) {
return 1;
}
-void *prioq_peek(Prioq *q) {
-
+void *prioq_peek_by_index(Prioq *q, unsigned idx) {
if (!q)
return NULL;
- if (q->n_items <= 0)
+ if (idx >= q->n_items)
return NULL;
- return q->items[0].data;
+ return q->items[idx].data;
}
void *prioq_pop(Prioq *q) {
diff --git a/src/basic/prioq.h b/src/basic/prioq.h
index bba5c7caa4..1fb57bfa4c 100644
--- a/src/basic/prioq.h
+++ b/src/basic/prioq.h
@@ -19,8 +19,14 @@ int prioq_put(Prioq *q, void *data, unsigned *idx);
int prioq_remove(Prioq *q, void *data, unsigned *idx);
int prioq_reshuffle(Prioq *q, void *data, unsigned *idx);
-void *prioq_peek(Prioq *q) _pure_;
+void *prioq_peek_by_index(Prioq *q, unsigned idx) _pure_;
+static inline void *prioq_peek(Prioq *q) {
+ return prioq_peek_by_index(q, 0);
+}
void *prioq_pop(Prioq *q);
+#define PRIOQ_FOREACH_ITEM(q, p) \
+ for (unsigned _i = 0; (p = prioq_peek_by_index(q, _i)); _i++)
+
unsigned prioq_size(Prioq *q) _pure_;
bool prioq_isempty(Prioq *q) _pure_;
diff --git a/src/basic/procfs-util.c b/src/basic/procfs-util.c
index a159e344b3..7aaf95bfce 100644
--- a/src/basic/procfs-util.c
+++ b/src/basic/procfs-util.c
@@ -201,13 +201,11 @@ int procfs_cpu_get_usage(nsec_t *ret) {
return 0;
}
-int procfs_memory_get_current(uint64_t *ret) {
+int procfs_memory_get(uint64_t *ret_total, uint64_t *ret_used) {
uint64_t mem_total = UINT64_MAX, mem_free = UINT64_MAX;
_cleanup_fclose_ FILE *f = NULL;
int r;
- assert(ret);
-
f = fopen("/proc/meminfo", "re");
if (!f)
return -errno;
@@ -262,6 +260,9 @@ int procfs_memory_get_current(uint64_t *ret) {
if (mem_free > mem_total)
return -EINVAL;
- *ret = (mem_total - mem_free) * 1024U;
+ if (ret_total)
+ *ret_total = mem_total * 1024U;
+ if (ret_used)
+ *ret_used = (mem_total - mem_free) * 1024U;
return 0;
}
diff --git a/src/basic/procfs-util.h b/src/basic/procfs-util.h
index f697ed92bc..5a44e9eff7 100644
--- a/src/basic/procfs-util.h
+++ b/src/basic/procfs-util.h
@@ -11,4 +11,7 @@ int procfs_tasks_get_current(uint64_t *ret);
int procfs_cpu_get_usage(nsec_t *ret);
-int procfs_memory_get_current(uint64_t *ret);
+int procfs_memory_get(uint64_t *ret_total, uint64_t *ret_used);
+static inline int procfs_memory_get_used(uint64_t *ret) {
+ return procfs_memory_get(NULL, ret);
+}
diff --git a/src/cgtop/cgtop.c b/src/cgtop/cgtop.c
index 11cc5fa2e9..b3bda30cec 100644
--- a/src/cgtop/cgtop.c
+++ b/src/cgtop/cgtop.c
@@ -291,7 +291,7 @@ static int process(
} else if (streq(controller, "memory")) {
if (is_root_cgroup(path)) {
- r = procfs_memory_get_current(&g->memory);
+ r = procfs_memory_get_used(&g->memory);
if (r < 0)
return r;
} else {
diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index ed2f331b33..18d470b6d6 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -2780,7 +2780,7 @@ int unit_get_memory_current(Unit *u, uint64_t *ret) {
/* The root cgroup doesn't expose this information, let's get it from /proc instead */
if (unit_has_host_root_cgroup(u))
- return procfs_memory_get_current(ret);
+ return procfs_memory_get_used(ret);
if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0)
return -ENODATA;
diff --git a/src/journal/journald-context.c b/src/journal/journald-context.c
index 80bbc34e37..7c51f2f633 100644
--- a/src/journal/journald-context.c
+++ b/src/journal/journald-context.c
@@ -16,6 +16,7 @@
#include "parse-util.h"
#include "path-util.h"
#include "process-util.h"
+#include "procfs-util.h"
#include "string-util.h"
#include "syslog-util.h"
#include "unaligned.h"
@@ -60,7 +61,37 @@
/* Keep at most 16K entries in the cache. (Note though that this limit may be violated if enough streams pin entries in
* the cache, in which case we *do* permit this limit to be breached. That's safe however, as the number of stream
* clients itself is limited.) */
-#define CACHE_MAX (16*1024)
+#define CACHE_MAX_FALLBACK 128U
+#define CACHE_MAX_MAX (16*1024U)
+#define CACHE_MAX_MIN 64U
+
+static size_t cache_max(void) {
+ static size_t cached = -1;
+
+ if (cached == (size_t) -1) {
+ uint64_t mem_total;
+ int r;
+
+ r = procfs_memory_get(&mem_total, NULL);
+ if (r < 0) {
+ log_warning_errno(r, "Cannot query /proc/meminfo for MemTotal: %m");
+ cached = CACHE_MAX_FALLBACK;
+ } else {
+ /* Cache entries are usually a few kB, but the process cmdline is controlled by the
+ * user and can be up to _SC_ARG_MAX, usually 2MB. Let's say that approximately up to
+ * 1/8th of memory may be used by the cache.
+ *
+ * In the common case, this formula gives 64 cache entries for each GB of RAM.
+ */
+ long l = sysconf(_SC_ARG_MAX);
+ assert(l > 0);
+
+ cached = CLAMP(mem_total / 8 / (uint64_t) l, CACHE_MAX_MIN, CACHE_MAX_MAX);
+ }
+ }
+
+ return cached;
+}
static int client_context_compare(const void *a, const void *b) {
const ClientContext *x = a, *y = b;
@@ -550,15 +581,39 @@ refresh:
}
static void client_context_try_shrink_to(Server *s, size_t limit) {
+ ClientContext *c;
+ usec_t t;
+
assert(s);
+ /* Flush any cache entries for PIDs that have already moved on. Don't do this
+ * too often, since it's a slow process. */
+ t = now(CLOCK_MONOTONIC);
+ if (s->last_cache_pid_flush + MAX_USEC < t) {
+ unsigned n = prioq_size(s->client_contexts_lru), idx = 0;
+
+ /* We do a number of iterations based on the initial size of the prioq. When we remove an
+ * item, a new item is moved into its places, and items to the right might be reshuffled.
+ */
+ for (unsigned i = 0; i < n; i++) {
+ c = prioq_peek_by_index(s->client_contexts_lru, idx);
+
+ assert(c->n_ref == 0);
+
+ if (!pid_is_unwaited(c->pid))
+ client_context_free(s, c);
+ else
+ idx ++;
+ }
+
+ s->last_cache_pid_flush = t;
+ }
+
/* Bring the number of cache entries below the indicated limit, so that we can create a new entry without
* breaching the limit. Note that we only flush out entries that aren't pinned here. This means the number of
* cache entries may very well grow beyond the limit, if all entries stored remain pinned. */
while (hashmap_size(s->client_contexts) > limit) {
- ClientContext *c;
-
c = prioq_pop(s->client_contexts_lru);
if (!c)
break; /* All remaining entries are pinned, give up */
@@ -627,7 +682,7 @@ static int client_context_get_internal(
return 0;
}
- client_context_try_shrink_to(s, CACHE_MAX-1);
+ client_context_try_shrink_to(s, cache_max()-1);
r = client_context_new(s, pid, &c);
if (r < 0)
diff --git a/src/journal/journald-server.h b/src/journal/journald-server.h
index 6d4847b0cd..3f6b42ddd5 100644
--- a/src/journal/journald-server.h
+++ b/src/journal/journald-server.h
@@ -161,6 +161,8 @@ struct Server {
Hashmap *client_contexts;
Prioq *client_contexts_lru;
+ usec_t last_cache_pid_flush;
+
ClientContext *my_context; /* the context of journald itself */
ClientContext *pid1_context; /* the context of PID 1 */
};
diff --git a/src/test/test-prioq.c b/src/test/test-prioq.c
index bc5fdd15b2..53c9e090a7 100644
--- a/src/test/test-prioq.c
+++ b/src/test/test-prioq.c
@@ -69,6 +69,11 @@ static void test_struct(void) {
assert_se(q = prioq_new((compare_func_t) test_compare));
assert_se(s = set_new(&test_hash_ops));
+ assert_se(prioq_peek(q) == NULL);
+ assert_se(prioq_peek_by_index(q, 0) == NULL);
+ assert_se(prioq_peek_by_index(q, 1) == NULL);
+ assert_se(prioq_peek_by_index(q, (unsigned) -1) == NULL);
+
for (i = 0; i < SET_SIZE; i++) {
assert_se(t = new0(struct test, 1));
t->value = (unsigned) rand();
@@ -79,6 +84,17 @@ static void test_struct(void) {
assert_se(set_consume(s, t) >= 0);
}
+ for (i = 0; i < SET_SIZE; i++)
+ assert_se(prioq_peek_by_index(q, i));
+ assert_se(prioq_peek_by_index(q, SET_SIZE) == NULL);
+
+ unsigned count = 0;
+ PRIOQ_FOREACH_ITEM(q, t) {
+ assert_se(t);
+ count++;
+ }
+ assert_se(count == SET_SIZE);
+
while ((t = set_steal_first(s))) {
assert_se(prioq_remove(q, t, &t->idx) == 1);
assert_se(prioq_remove(q, t, &t->idx) == 0);
diff --git a/src/test/test-procfs-util.c b/src/test/test-procfs-util.c
index 08af380cc7..1d0612985b 100644
--- a/src/test/test-procfs-util.c
+++ b/src/test/test-procfs-util.c
@@ -18,7 +18,7 @@ int main(int argc, char *argv[]) {
assert_se(procfs_cpu_get_usage(&nsec) >= 0);
log_info("Current system CPU time: %s", format_timespan(buf, sizeof(buf), nsec/NSEC_PER_USEC, 1));
- assert_se(procfs_memory_get_current(&v) >= 0);
+ assert_se(procfs_memory_get_used(&v) >= 0);
log_info("Current memory usage: %s", format_bytes(buf, sizeof(buf), v));
assert_se(procfs_tasks_get_current(&v) >= 0);