summaryrefslogtreecommitdiff
path: root/ovsdb
diff options
context:
space:
mode:
authorIlya Maximets <i.maximets@ovn.org>2021-09-28 13:17:21 +0200
committerIlya Maximets <i.maximets@ovn.org>2021-11-05 22:33:03 +0100
commit317b1bfd7dd315e241c158e6d4095002ff391ee3 (patch)
tree6930434f6777c0f49ab3e9616bfa49c4c1787109 /ovsdb
parent1bdda7b6d53c92e877b457157676aff326414c53 (diff)
downloadopenvswitch-317b1bfd7dd315e241c158e6d4095002ff391ee3.tar.gz
ovsdb: Don't let transaction history grow larger than the database.
If user frequently changes a lot of rows in a database, transaction history could grow way larger than the database itself. This wastes a lot of memory and also makes monitor_cond_since slower than usual monotor_cond if the transaction id is old enough, because re-construction of the changes from a history is slower than just creation of initial database snapshot. This is also the case if user deleted a lot of data, so transaction history still holds all of it while the database itself doesn't. In case of current lb-per-service model in ovn-kubernetes, each load-balancer is added to every logical switch/router. Such a transaction touches more than a half of a OVN_Northbound database. And each of these transactions is added to the transaction history. Since transaction history depth is 100, in worst case scenario, it will hold 100 copies of a database increasing memory consumption dramatically. In tests with 3000 LBs and 120 LSs, memory goes up to 3 GB, while holding at 30 MB if transaction history disabled in the code. Fixing that by keeping count of the number of ovsdb_atom's in the database and not allowing the total number of atoms in transaction history to grow larger than this value. Counting atoms is fairly cheap because we don't need to iterate over them, so it doesn't have significant performance impact. It would be ideal to measure the size of individual atoms, but that will hit the performance. Counting cells instead of atoms is not sufficient, because OVN users are adding hundreds or thousands of atoms to a single cell, so they are largely different in size. Signed-off-by: Ilya Maximets <i.maximets@ovn.org> Acked-by: Han Zhou <hzhou@ovn.org> Acked-by: Dumitru Ceara <dceara@redhat.com>
Diffstat (limited to 'ovsdb')
-rw-r--r--ovsdb/ovsdb.c5
-rw-r--r--ovsdb/ovsdb.h3
-rw-r--r--ovsdb/transaction.c56
3 files changed, 62 insertions, 2 deletions
diff --git a/ovsdb/ovsdb.c b/ovsdb/ovsdb.c
index 126d16a2f..e6d866182 100644
--- a/ovsdb/ovsdb.c
+++ b/ovsdb/ovsdb.c
@@ -422,6 +422,8 @@ ovsdb_create(struct ovsdb_schema *schema, struct ovsdb_storage *storage)
ovs_list_init(&db->triggers);
db->run_triggers_now = db->run_triggers = false;
+ db->n_atoms = 0;
+
db->is_relay = false;
ovs_list_init(&db->txn_forward_new);
hmap_init(&db->txn_forward_sent);
@@ -518,6 +520,9 @@ ovsdb_get_memory_usage(const struct ovsdb *db, struct simap *usage)
}
simap_increase(usage, "cells", cells);
+ simap_increase(usage, "atoms", db->n_atoms);
+ simap_increase(usage, "txn-history", db->n_txn_history);
+ simap_increase(usage, "txn-history-atoms", db->n_txn_history_atoms);
if (db->storage) {
ovsdb_storage_get_memory_usage(db->storage, usage);
diff --git a/ovsdb/ovsdb.h b/ovsdb/ovsdb.h
index 4a7bd0f0e..ec2d235ec 100644
--- a/ovsdb/ovsdb.h
+++ b/ovsdb/ovsdb.h
@@ -90,8 +90,11 @@ struct ovsdb {
/* History trasanctions for incremental monitor transfer. */
bool need_txn_history; /* Need to maintain history of transactions. */
unsigned int n_txn_history; /* Current number of history transactions. */
+ unsigned int n_txn_history_atoms; /* Total number of atoms in history. */
struct ovs_list txn_history; /* Contains "struct ovsdb_txn_history_node. */
+ size_t n_atoms; /* Total number of ovsdb atoms in the database. */
+
/* Relay mode. */
bool is_relay; /* True, if database is in relay mode. */
/* List that holds transactions waiting to be forwarded to the server. */
diff --git a/ovsdb/transaction.c b/ovsdb/transaction.c
index 11bbd57eb..88e052800 100644
--- a/ovsdb/transaction.c
+++ b/ovsdb/transaction.c
@@ -41,6 +41,9 @@ struct ovsdb_txn {
struct ovs_list txn_tables; /* Contains "struct ovsdb_txn_table"s. */
struct ds comment;
struct uuid txnid; /* For clustered mode only. It is the eid. */
+ size_t n_atoms; /* Number of atoms in all transaction rows. */
+ ssize_t n_atoms_diff; /* Difference between number of added and
+ * removed atoms. */
};
/* A table modified by a transaction. */
@@ -940,6 +943,37 @@ check_index_uniqueness(struct ovsdb_txn *txn OVS_UNUSED,
}
static struct ovsdb_error * OVS_WARN_UNUSED_RESULT
+count_atoms(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row)
+{
+ struct ovsdb_table *table = txn_row->table;
+ ssize_t n_atoms_old = 0, n_atoms_new = 0;
+ struct shash_node *node;
+
+ SHASH_FOR_EACH (node, &table->schema->columns) {
+ const struct ovsdb_column *column = node->data;
+ const struct ovsdb_type *type = &column->type;
+ unsigned int idx = column->index;
+
+ if (txn_row->old) {
+ n_atoms_old += txn_row->old->fields[idx].n;
+ if (type->value.type != OVSDB_TYPE_VOID) {
+ n_atoms_old += txn_row->old->fields[idx].n;
+ }
+ }
+ if (txn_row->new) {
+ n_atoms_new += txn_row->new->fields[idx].n;
+ if (type->value.type != OVSDB_TYPE_VOID) {
+ n_atoms_new += txn_row->new->fields[idx].n;
+ }
+ }
+ }
+
+ txn->n_atoms += n_atoms_old + n_atoms_new;
+ txn->n_atoms_diff += n_atoms_new - n_atoms_old;
+ return NULL;
+}
+
+static struct ovsdb_error * OVS_WARN_UNUSED_RESULT
update_version(struct ovsdb_txn *txn OVS_UNUSED, struct ovsdb_txn_row *txn_row)
{
struct ovsdb_table *table = txn_row->table;
@@ -1007,6 +1041,12 @@ ovsdb_txn_precommit(struct ovsdb_txn *txn)
return error;
}
+ /* Count atoms. */
+ error = for_each_txn_row(txn, count_atoms);
+ if (error) {
+ return OVSDB_WRAP_BUG("can't happen", error);
+ }
+
/* Update _version for rows that changed. */
error = for_each_txn_row(txn, update_version);
if (error) {
@@ -1022,6 +1062,8 @@ ovsdb_txn_clone(const struct ovsdb_txn *txn)
struct ovsdb_txn *txn_cloned = xzalloc(sizeof *txn_cloned);
ovs_list_init(&txn_cloned->txn_tables);
txn_cloned->txnid = txn->txnid;
+ txn_cloned->n_atoms = txn->n_atoms;
+ txn_cloned->n_atoms_diff = txn->n_atoms_diff;
struct ovsdb_txn_table *t;
LIST_FOR_EACH (t, node, &txn->txn_tables) {
@@ -1080,6 +1122,7 @@ ovsdb_txn_add_to_history(struct ovsdb_txn *txn)
node->txn = ovsdb_txn_clone(txn);
ovs_list_push_back(&txn->db->txn_history, &node->node);
txn->db->n_txn_history++;
+ txn->db->n_txn_history_atoms += txn->n_atoms;
}
}
@@ -1090,6 +1133,7 @@ ovsdb_txn_complete(struct ovsdb_txn *txn)
if (!ovsdb_txn_is_empty(txn)) {
txn->db->run_triggers_now = txn->db->run_triggers = true;
+ txn->db->n_atoms += txn->n_atoms_diff;
ovsdb_monitors_commit(txn->db, txn);
ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_update_weak_refs));
ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_row_commit));
@@ -1548,12 +1592,18 @@ ovsdb_txn_history_run(struct ovsdb *db)
if (!db->need_txn_history) {
return;
}
- /* Remove old histories to limit the size of the history */
- while (db->n_txn_history > 100) {
+ /* Remove old histories to limit the size of the history. Removing until
+ * the number of ovsdb atoms in history becomes less than the number of
+ * atoms in the database, because it will be faster to just get a database
+ * snapshot than re-constructing changes from the history that big. */
+ while (db->n_txn_history &&
+ (db->n_txn_history > 100 ||
+ db->n_txn_history_atoms > db->n_atoms)) {
struct ovsdb_txn_history_node *txn_h_node = CONTAINER_OF(
ovs_list_pop_front(&db->txn_history),
struct ovsdb_txn_history_node, node);
+ db->n_txn_history_atoms -= txn_h_node->txn->n_atoms;
ovsdb_txn_destroy_cloned(txn_h_node->txn);
free(txn_h_node);
db->n_txn_history--;
@@ -1565,6 +1615,7 @@ ovsdb_txn_history_init(struct ovsdb *db, bool need_txn_history)
{
db->need_txn_history = need_txn_history;
db->n_txn_history = 0;
+ db->n_txn_history_atoms = 0;
ovs_list_init(&db->txn_history);
}
@@ -1583,4 +1634,5 @@ ovsdb_txn_history_destroy(struct ovsdb *db)
free(txn_h_node);
}
db->n_txn_history = 0;
+ db->n_txn_history_atoms = 0;
}