diff options
author | Ilya Maximets <i.maximets@ovn.org> | 2021-11-22 16:23:10 +0100 |
---|---|---|
committer | Ilya Maximets <i.maximets@ovn.org> | 2021-11-30 13:43:20 +0100 |
commit | 149169836d16146653472e54a54f6e755169cc20 (patch) | |
tree | f9f29f366602dd0edc60b090290ffcb14aa38675 /ofproto/ofproto.c | |
parent | a05883b897a1ad6321c0f445ee4065da73b3f215 (diff) | |
download | openvswitch-149169836d16146653472e54a54f6e755169cc20.tar.gz |
ofproto: Fix resource usage explosion due to removal of large number of flows.
While removing flows, removal itself is deferred, so classifier changes
performed already from the RCU thread. This way every deferred removal
triggers classifier change and reallocation of a pvector. Freeing of
old version of a pvector is postponed. Since all this is happening
from an RCU thread, all these copies of the same pvector will be freed
only after the next grace period.
Below is the example output of the 'valgrind --tool=massif' from an OVN
deployment, where copies of that pvector took 5 GB of memory while
processing a bundled flow removal:
-------------------------------------------------------------------
n time(i) total(B) useful-heap(B) extra-heap(B)
-------------------------------------------------------------------
89 176,257,987,954 5,329,763,160 5,318,171,607 11,591,553
99.78% (5,318,171,607B) (heap allocation functions) malloc/new/new[]
->98.45% (5,247,008,392B) xmalloc__ (util.c:137)
|->98.17% (5,232,137,408B) pvector_impl_dup (pvector.c:48)
||->98.16% (5,231,472,896B) pvector_remove (pvector.c:159)
|||->98.16% (5,231,472,800B) destroy_subtable (classifier.c:1558)
||||->98.16% (5,231,472,800B) classifier_remove (classifier.c:792)
|||| ->98.16% (5,231,472,800B) classifier_remove_assert (classifier.c:832)
|||| ->98.16% (5,231,472,800B) remove_rule_rcu__ (ofproto.c:2978)
|||| ->98.16% (5,231,472,800B) remove_rule_rcu (ofproto.c:2990)
|||| ->98.16% (5,231,472,800B) ovsrcu_call_postponed (ovs-rcu.c:346)
|||| ->98.16% (5,231,472,800B) ovsrcu_postpone_thread (ovs-rcu.c:362)
|||| ->98.16% (5,231,472,800B) ovsthread_wrapper
|||| ->98.16% (5,231,472,800B) start_thread
|||| ->98.16% (5,231,472,800B) clone
Collecting all the flows to be removed and postponing removal for
all of them together to avoid the problem. This way all removals
will trigger only a single pvector re-allocation greatly reducing
the CPU and memory usage.
Reported-by: Vladislav Odintsov <odivlad@gmail.com>
Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2021-November/389538.html
Tested-by: Vladislav Odintsov <odivlad@gmail.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Diffstat (limited to 'ofproto/ofproto.c')
-rw-r--r-- | ofproto/ofproto.c | 31 |
1 files changed, 29 insertions, 2 deletions
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 139d6d394..56aeac720 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -213,6 +213,8 @@ static void ofproto_rule_insert__(struct ofproto *, struct rule *) OVS_REQUIRES(ofproto_mutex); static void ofproto_rule_remove__(struct ofproto *, struct rule *) OVS_REQUIRES(ofproto_mutex); +static void remove_rules_postponed(struct rule_collection *) + OVS_REQUIRES(ofproto_mutex); /* The source of an OpenFlow request. * @@ -530,6 +532,8 @@ ofproto_create(const char *datapath_name, const char *datapath_type, hindex_init(&ofproto->cookies); hmap_init(&ofproto->learned_cookies); ovs_list_init(&ofproto->expirable); + ofproto->to_remove = xzalloc(sizeof *ofproto->to_remove); + rule_collection_init(ofproto->to_remove); ofproto->connmgr = connmgr_create(ofproto, datapath_name, datapath_name); ofproto->min_mtu = INT_MAX; cmap_init(&ofproto->groups); @@ -1631,6 +1635,7 @@ ofproto_flush__(struct ofproto *ofproto, bool del) } ofproto_group_delete_all__(ofproto); meter_delete_all(ofproto); + remove_rules_postponed(ofproto->to_remove); /* XXX: Concurrent handler threads may insert new learned flows based on * learn actions of the now deleted flows right after we release * 'ofproto_mutex'. */ @@ -1682,6 +1687,11 @@ ofproto_destroy__(struct ofproto *ofproto) ovs_assert(hmap_is_empty(&ofproto->learned_cookies)); hmap_destroy(&ofproto->learned_cookies); + ovs_mutex_lock(&ofproto_mutex); + rule_collection_destroy(ofproto->to_remove); + free(ofproto->to_remove); + ovs_mutex_unlock(&ofproto_mutex); + ofproto->ofproto_class->dealloc(ofproto); } @@ -1878,6 +1888,9 @@ ofproto_run(struct ofproto *p) connmgr_run(p->connmgr, handle_openflow); + ovs_mutex_lock(&ofproto_mutex); + remove_rules_postponed(p->to_remove); + ovs_mutex_unlock(&ofproto_mutex); return error; } @@ -4437,6 +4450,20 @@ rule_criteria_destroy(struct rule_criteria *criteria) criteria->version = OVS_VERSION_NOT_REMOVED; /* Mark as destroyed. */ } +/* Adds rules to the 'to_remove' collection, so they can be destroyed + * later all together. Destroys 'rules'. */ +static void +rules_mark_for_removal(struct ofproto *ofproto, struct rule_collection *rules) + OVS_REQUIRES(ofproto_mutex) +{ + struct rule *rule; + + RULE_COLLECTION_FOR_EACH (rule, rules) { + rule_collection_add(ofproto->to_remove, rule); + } + rule_collection_destroy(rules); +} + /* Schedules postponed removal of rules, destroys 'rules'. */ static void remove_rules_postponed(struct rule_collection *rules) @@ -5833,7 +5860,7 @@ modify_flows_finish(struct ofproto *ofproto, struct ofproto_flow_mod *ofm, } } learned_cookies_flush(ofproto, &dead_cookies); - remove_rules_postponed(old_rules); + rules_mark_for_removal(ofproto, old_rules); } return error; @@ -5941,7 +5968,7 @@ delete_flows_finish__(struct ofproto *ofproto, learned_cookies_dec(ofproto, rule_get_actions(rule), &dead_cookies); } - remove_rules_postponed(rules); + rules_mark_for_removal(ofproto, rules); learned_cookies_flush(ofproto, &dead_cookies); } |