summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIlya Maximets <i.maximets@ovn.org>2020-10-24 02:25:48 +0200
committerIlya Maximets <i.maximets@ovn.org>2020-11-03 13:01:33 +0100
commitf38f98a2c0dd7fcaf20fbe11d1e67a9b2afc0b2a (patch)
tree9bfaaaeceb94e847edc8e005649654306e5de5de
parent7e38188160294df43dbbbc0cf6cfd42d02881fcf (diff)
downloadopenvswitch-f38f98a2c0dd7fcaf20fbe11d1e67a9b2afc0b2a.tar.gz
ovsdb-server: Reclaim heap memory after compaction.
Compaction happens at most once in 10 minutes. That is a big time interval for a heavy loaded ovsdb-server in cluster mode. In 10 minutes raft logs could grow up to tens of thousands of entries with tens of gigabytes in total size. While compaction cleans up raft log entries, the memory in many cases is not returned to the system, but kept in the heap of running ovsdb-server process, and it could stay in this condition for a really long time. In the end one performance spike could lead to a fast growth of the raft log and this memory will never (for a really long time) be released to the system even if the database if empty. Simple example how to reproduce with OVN sandbox: 1. make sandbox SANDBOXFLAGS='--nbdb-model=clustered --sbdb-model=clustered' 2. Run following script that creates 1 port group, adds 4000 acls and removes all of that in the end: # cat ../memory-test.sh pg_name=my_port_group export OVN_NB_DAEMON=$(ovn-nbctl --pidfile --detach --log-file -vsocket_util:off) ovn-nbctl pg-add $pg_name for i in $(seq 1 4000); do echo "Iteration: $i" ovn-nbctl --log acl-add $pg_name from-lport $i udp drop done ovn-nbctl acl-del $pg_name ovn-nbctl pg-del $pg_name ovs-appctl -t $(pwd)/sandbox/nb1 memory/show ovn-appctl -t ovn-nbctl exit --- 3. Stopping one of Northbound DB servers: ovs-appctl -t $(pwd)/sandbox/nb1 exit Make sure that ovsdb-server didn't compact the database before it was stopped. Now we have a db file on disk that contains 4000 fairly big transactions inside. 4. Trying to start same ovsdb-server with this file. # cd sandbox && ovsdb-server <...> nb1.db At this point ovsdb-server reads all the transactions from db file and performs all of them as fast as it can one by one. When it finishes this, raft log contains 4000 entries and ovsdb-server consumes (on my system) ~13GB of memory while database is empty. And libc will likely never return this memory back to system, or, at least, will hold it for a really long time. This patch adds a new command 'ovsdb-server/memory-trim-on-compaction'. It's disabled by default, but once enabled, ovsdb-server will call 'malloc_trim(0)' after every successful compaction to try to return unused heap memory back to system. This is glibc-specific, so we need to detect function availability in a build time. Disabled by default since it adds from 1% to 30% (depending on the current state) to the snapshot creation time and, also, next memory allocations will likely require requests to kernel and that might be slower. Could be enabled by default later if considered broadly beneficial. Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1888829 Acked-by: Dumitru Ceara <dceara@redhat.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
-rw-r--r--NEWS3
-rw-r--r--configure.ac1
-rw-r--r--ovsdb/ovsdb-server.1.in4
-rw-r--r--ovsdb/ovsdb-server.c41
-rw-r--r--ovsdb/ovsdb.c12
-rw-r--r--ovsdb/ovsdb.h3
6 files changed, 60 insertions, 4 deletions
diff --git a/NEWS b/NEWS
index 8bb5bdc3f..2860a8e9c 100644
--- a/NEWS
+++ b/NEWS
@@ -3,6 +3,9 @@ Post-v2.14.0
- OVSDB:
* New unixctl command 'ovsdb-server/get-db-storage-status' to show the
status of the storage that's backing a database.
+ * New unixctl command 'ovsdb-server/memory-trim-on-compaction on|off'.
+ If turned on, ovsdb-server will try to reclaim all the unused memory
+ after every DB compaction back to OS. Disabled by default.
- DPDK:
* Removed support for vhost-user dequeue zero-copy.
- The environment variable OVS_UNBOUND_CONF, if set, is now used
diff --git a/configure.ac b/configure.ac
index 8d37af9db..126a1d9d1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -100,6 +100,7 @@ OVS_CHECK_IF_DL
OVS_CHECK_STRTOK_R
OVS_CHECK_LINUX_AF_XDP
AC_CHECK_DECLS([sys_siglist], [], [], [[#include <signal.h>]])
+AC_CHECK_DECLS([malloc_trim], [], [], [[#include <malloc.h>]])
AC_CHECK_MEMBERS([struct stat.st_mtim.tv_nsec, struct stat.st_mtimensec],
[], [], [[#include <sys/stat.h>]])
AC_CHECK_MEMBERS([struct ifreq.ifr_flagshigh], [], [], [[#include <net/if.h>]])
diff --git a/ovsdb/ovsdb-server.1.in b/ovsdb/ovsdb-server.1.in
index 6667553df..07a36cc7d 100644
--- a/ovsdb/ovsdb-server.1.in
+++ b/ovsdb/ovsdb-server.1.in
@@ -206,6 +206,10 @@ but not before 100 commits have been added or 10 minutes have elapsed
since the last compaction. It will also be compacted automatically
after 24 hours since the last compaction if 100 commits were added
regardless of its size.
+.IP "\fBovsdb\-server/memory-trim-on-compaction\fR \fIon\fR|\fIoff\fR"
+If this option is \fIon\fR, ovsdb-server will try to reclaim all unused
+heap memory back to the system after each successful database compaction
+to reduce the memory consumption of the process. \fIoff\fR by default.
.
.IP "\fBovsdb\-server/reconnect\fR"
Makes \fBovsdb\-server\fR drop all of the JSON\-RPC
diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c
index 73a155b3f..0e60e2b87 100644
--- a/ovsdb/ovsdb-server.c
+++ b/ovsdb/ovsdb-server.c
@@ -76,8 +76,12 @@ static char *ssl_protocols;
static char *ssl_ciphers;
static bool bootstrap_ca_cert;
+/* Try to reclaim heap memory back to system after DB compaction. */
+static bool trim_memory = false;
+
static unixctl_cb_func ovsdb_server_exit;
static unixctl_cb_func ovsdb_server_compact;
+static unixctl_cb_func ovsdb_server_memory_trim_on_compaction;
static unixctl_cb_func ovsdb_server_reconnect;
static unixctl_cb_func ovsdb_server_perf_counters_clear;
static unixctl_cb_func ovsdb_server_perf_counters_show;
@@ -243,7 +247,7 @@ main_loop(struct server_config *config,
xasprintf("removing database %s because storage "
"disconnected permanently", node->name));
} else if (ovsdb_storage_should_snapshot(db->db->storage)) {
- log_and_free_error(ovsdb_snapshot(db->db));
+ log_and_free_error(ovsdb_snapshot(db->db, trim_memory));
}
}
if (run_process) {
@@ -410,6 +414,9 @@ main(int argc, char *argv[])
unixctl_command_register("exit", "", 0, 0, ovsdb_server_exit, &exiting);
unixctl_command_register("ovsdb-server/compact", "", 0, 1,
ovsdb_server_compact, &all_dbs);
+ unixctl_command_register("ovsdb-server/memory-trim-on-compaction",
+ "on|off", 1, 1,
+ ovsdb_server_memory_trim_on_compaction, NULL);
unixctl_command_register("ovsdb-server/reconnect", "", 0, 0,
ovsdb_server_reconnect, jsonrpc);
@@ -1492,7 +1499,8 @@ ovsdb_server_compact(struct unixctl_conn *conn, int argc,
VLOG_INFO("compacting %s database by user request",
node->name);
- struct ovsdb_error *error = ovsdb_snapshot(db->db);
+ struct ovsdb_error *error = ovsdb_snapshot(db->db,
+ trim_memory);
if (error) {
char *s = ovsdb_error_to_string(error);
ds_put_format(&reply, "%s\n", s);
@@ -1515,6 +1523,35 @@ ovsdb_server_compact(struct unixctl_conn *conn, int argc,
ds_destroy(&reply);
}
+/* "ovsdb-server/memory-trim-on-compaction": controls whether ovsdb-server
+ * tries to reclaim heap memory back to system using malloc_trim() after
+ * compaction. */
+static void
+ovsdb_server_memory_trim_on_compaction(struct unixctl_conn *conn,
+ int argc OVS_UNUSED,
+ const char *argv[],
+ void *arg OVS_UNUSED)
+{
+ const char *command = argv[1];
+
+#if !HAVE_DECL_MALLOC_TRIM
+ unixctl_command_reply_error(conn, "memory trimming is not supported");
+ return;
+#endif
+
+ if (!strcmp(command, "on")) {
+ trim_memory = true;
+ } else if (!strcmp(command, "off")) {
+ trim_memory = false;
+ } else {
+ unixctl_command_reply_error(conn, "invalid argument");
+ return;
+ }
+ VLOG_INFO("memory trimming after compaction %s.",
+ trim_memory ? "enabled" : "disabled");
+ unixctl_command_reply(conn, NULL);
+}
+
/* "ovsdb-server/reconnect": makes ovsdb-server drop all of its JSON-RPC
* connections and reconnect. */
static void
diff --git a/ovsdb/ovsdb.c b/ovsdb/ovsdb.c
index 2da117cb3..9042658fa 100644
--- a/ovsdb/ovsdb.c
+++ b/ovsdb/ovsdb.c
@@ -17,6 +17,10 @@
#include "ovsdb.h"
+#if HAVE_DECL_MALLOC_TRIM
+#include <malloc.h>
+#endif
+
#include "column.h"
#include "file.h"
#include "monitor.h"
@@ -515,7 +519,7 @@ ovsdb_get_table(const struct ovsdb *db, const char *name)
}
struct ovsdb_error * OVS_WARN_UNUSED_RESULT
-ovsdb_snapshot(struct ovsdb *db)
+ovsdb_snapshot(struct ovsdb *db, bool trim_memory OVS_UNUSED)
{
if (!db->storage) {
return NULL;
@@ -527,6 +531,12 @@ ovsdb_snapshot(struct ovsdb *db)
schema, data);
json_destroy(schema);
json_destroy(data);
+
+#if HAVE_DECL_MALLOC_TRIM
+ if (!error && trim_memory) {
+ malloc_trim(0);
+ }
+#endif
return error;
}
diff --git a/ovsdb/ovsdb.h b/ovsdb/ovsdb.h
index 5c30a83d9..72e127c84 100644
--- a/ovsdb/ovsdb.h
+++ b/ovsdb/ovsdb.h
@@ -112,7 +112,8 @@ struct json *ovsdb_execute(struct ovsdb *, const struct ovsdb_session *,
long long int elapsed_msec,
long long int *timeout_msec);
-struct ovsdb_error *ovsdb_snapshot(struct ovsdb *) OVS_WARN_UNUSED_RESULT;
+struct ovsdb_error *ovsdb_snapshot(struct ovsdb *, bool trim_memory)
+ OVS_WARN_UNUSED_RESULT;
void ovsdb_replace(struct ovsdb *dst, struct ovsdb *src);