summaryrefslogtreecommitdiff
path: root/ofproto/ofproto.c
diff options
context:
space:
mode:
authorIlya Maximets <i.maximets@ovn.org>2021-11-22 16:23:10 +0100
committerIlya Maximets <i.maximets@ovn.org>2021-11-30 13:43:20 +0100
commit149169836d16146653472e54a54f6e755169cc20 (patch)
treef9f29f366602dd0edc60b090290ffcb14aa38675 /ofproto/ofproto.c
parenta05883b897a1ad6321c0f445ee4065da73b3f215 (diff)
downloadopenvswitch-149169836d16146653472e54a54f6e755169cc20.tar.gz
ofproto: Fix resource usage explosion due to removal of large number of flows.
While removing flows, removal itself is deferred, so classifier changes performed already from the RCU thread. This way every deferred removal triggers classifier change and reallocation of a pvector. Freeing of old version of a pvector is postponed. Since all this is happening from an RCU thread, all these copies of the same pvector will be freed only after the next grace period. Below is the example output of the 'valgrind --tool=massif' from an OVN deployment, where copies of that pvector took 5 GB of memory while processing a bundled flow removal: ------------------------------------------------------------------- n time(i) total(B) useful-heap(B) extra-heap(B) ------------------------------------------------------------------- 89 176,257,987,954 5,329,763,160 5,318,171,607 11,591,553 99.78% (5,318,171,607B) (heap allocation functions) malloc/new/new[] ->98.45% (5,247,008,392B) xmalloc__ (util.c:137) |->98.17% (5,232,137,408B) pvector_impl_dup (pvector.c:48) ||->98.16% (5,231,472,896B) pvector_remove (pvector.c:159) |||->98.16% (5,231,472,800B) destroy_subtable (classifier.c:1558) ||||->98.16% (5,231,472,800B) classifier_remove (classifier.c:792) |||| ->98.16% (5,231,472,800B) classifier_remove_assert (classifier.c:832) |||| ->98.16% (5,231,472,800B) remove_rule_rcu__ (ofproto.c:2978) |||| ->98.16% (5,231,472,800B) remove_rule_rcu (ofproto.c:2990) |||| ->98.16% (5,231,472,800B) ovsrcu_call_postponed (ovs-rcu.c:346) |||| ->98.16% (5,231,472,800B) ovsrcu_postpone_thread (ovs-rcu.c:362) |||| ->98.16% (5,231,472,800B) ovsthread_wrapper |||| ->98.16% (5,231,472,800B) start_thread |||| ->98.16% (5,231,472,800B) clone Collecting all the flows to be removed and postponing removal for all of them together to avoid the problem. This way all removals will trigger only a single pvector re-allocation greatly reducing the CPU and memory usage. Reported-by: Vladislav Odintsov <odivlad@gmail.com> Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2021-November/389538.html Tested-by: Vladislav Odintsov <odivlad@gmail.com> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Diffstat (limited to 'ofproto/ofproto.c')
-rw-r--r--ofproto/ofproto.c31
1 files changed, 29 insertions, 2 deletions
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index 139d6d394..56aeac720 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -213,6 +213,8 @@ static void ofproto_rule_insert__(struct ofproto *, struct rule *)
OVS_REQUIRES(ofproto_mutex);
static void ofproto_rule_remove__(struct ofproto *, struct rule *)
OVS_REQUIRES(ofproto_mutex);
+static void remove_rules_postponed(struct rule_collection *)
+ OVS_REQUIRES(ofproto_mutex);
/* The source of an OpenFlow request.
*
@@ -530,6 +532,8 @@ ofproto_create(const char *datapath_name, const char *datapath_type,
hindex_init(&ofproto->cookies);
hmap_init(&ofproto->learned_cookies);
ovs_list_init(&ofproto->expirable);
+ ofproto->to_remove = xzalloc(sizeof *ofproto->to_remove);
+ rule_collection_init(ofproto->to_remove);
ofproto->connmgr = connmgr_create(ofproto, datapath_name, datapath_name);
ofproto->min_mtu = INT_MAX;
cmap_init(&ofproto->groups);
@@ -1631,6 +1635,7 @@ ofproto_flush__(struct ofproto *ofproto, bool del)
}
ofproto_group_delete_all__(ofproto);
meter_delete_all(ofproto);
+ remove_rules_postponed(ofproto->to_remove);
/* XXX: Concurrent handler threads may insert new learned flows based on
* learn actions of the now deleted flows right after we release
* 'ofproto_mutex'. */
@@ -1682,6 +1687,11 @@ ofproto_destroy__(struct ofproto *ofproto)
ovs_assert(hmap_is_empty(&ofproto->learned_cookies));
hmap_destroy(&ofproto->learned_cookies);
+ ovs_mutex_lock(&ofproto_mutex);
+ rule_collection_destroy(ofproto->to_remove);
+ free(ofproto->to_remove);
+ ovs_mutex_unlock(&ofproto_mutex);
+
ofproto->ofproto_class->dealloc(ofproto);
}
@@ -1878,6 +1888,9 @@ ofproto_run(struct ofproto *p)
connmgr_run(p->connmgr, handle_openflow);
+ ovs_mutex_lock(&ofproto_mutex);
+ remove_rules_postponed(p->to_remove);
+ ovs_mutex_unlock(&ofproto_mutex);
return error;
}
@@ -4437,6 +4450,20 @@ rule_criteria_destroy(struct rule_criteria *criteria)
criteria->version = OVS_VERSION_NOT_REMOVED; /* Mark as destroyed. */
}
+/* Adds rules to the 'to_remove' collection, so they can be destroyed
+ * later all together. Destroys 'rules'. */
+static void
+rules_mark_for_removal(struct ofproto *ofproto, struct rule_collection *rules)
+ OVS_REQUIRES(ofproto_mutex)
+{
+ struct rule *rule;
+
+ RULE_COLLECTION_FOR_EACH (rule, rules) {
+ rule_collection_add(ofproto->to_remove, rule);
+ }
+ rule_collection_destroy(rules);
+}
+
/* Schedules postponed removal of rules, destroys 'rules'. */
static void
remove_rules_postponed(struct rule_collection *rules)
@@ -5833,7 +5860,7 @@ modify_flows_finish(struct ofproto *ofproto, struct ofproto_flow_mod *ofm,
}
}
learned_cookies_flush(ofproto, &dead_cookies);
- remove_rules_postponed(old_rules);
+ rules_mark_for_removal(ofproto, old_rules);
}
return error;
@@ -5941,7 +5968,7 @@ delete_flows_finish__(struct ofproto *ofproto,
learned_cookies_dec(ofproto, rule_get_actions(rule),
&dead_cookies);
}
- remove_rules_postponed(rules);
+ rules_mark_for_removal(ofproto, rules);
learned_cookies_flush(ofproto, &dead_cookies);
}