diff options
author | Lucas Alvares Gomes <lucasagomes@gmail.com> | 2022-10-11 11:08:23 +0100 |
---|---|---|
committer | Lucas Alvares Gomes <lucasagomes@gmail.com> | 2022-10-14 14:37:53 +0100 |
commit | 2adb471f7575ddcd6af146f84f23bb6f916f79b9 (patch) | |
tree | 0df2e8c17ef44003453d6770369f4171845d7600 | |
parent | edd92ce6b9b36da5a7bc4e2569429646adf696d8 (diff) | |
download | neutron-2adb471f7575ddcd6af146f84f23bb6f916f79b9.tar.gz |
[OVN] Avoid deadlock when cleaning hash ring nodes
This patch avoids the clash of the hash ring cleaning operation and the
API workers by ensuring that the cleaning happens before the nodes for
that host are added to the ring and the connections to the OVSDBs (meaning
no events therefore no SELECTS on the hash ring table for that hostname).
This patch does this by re-using the same hash ring lock that starts
the probing thread. Now, the first worker that acquire the lock is
responsible for cleaning the hash ring for it's own host as well as
starting the probing thread. Subsequently workers only need to register
themselves to the hash ring.
Conflicts:
neutron/plugins/ml2/drivers/ovn/mech_driver/mech_driver.py
Change-Id: Iba73f7944592a003232eb397ba1d4da3dcba5c3a
Closes-Bug: #1990174
Signed-off-by: Lucas Alvares Gomes <lucasagomes@gmail.com>
(cherry picked from commit b7b8f7c571440577a40aacf9d8d93abc3a5a48b3)
-rw-r--r-- | neutron/plugins/ml2/drivers/ovn/mech_driver/mech_driver.py | 25 |
1 files changed, 16 insertions, 9 deletions
diff --git a/neutron/plugins/ml2/drivers/ovn/mech_driver/mech_driver.py b/neutron/plugins/ml2/drivers/ovn/mech_driver/mech_driver.py index 9ea84198e7..baf4ef2fd2 100644 --- a/neutron/plugins/ml2/drivers/ovn/mech_driver/mech_driver.py +++ b/neutron/plugins/ml2/drivers/ovn/mech_driver/mech_driver.py @@ -118,10 +118,6 @@ class OVNMechanismDriver(api.MechanismDriver): self.node_uuid = None self.hash_ring_group = ovn_const.HASH_RING_ML2_GROUP self.sg_enabled = ovn_acl.is_sg_enabled() - # NOTE(lucasagomes): _clean_hash_ring() must be called before - # self.subscribe() to avoid processes racing when adding or - # deleting nodes from the Hash Ring during service initialization - self._clean_hash_ring() self._post_fork_event = threading.Event() if cfg.CONF.SECURITYGROUP.firewall_driver: LOG.warning('Firewall driver configuration is ignored') @@ -307,8 +303,19 @@ class OVNMechanismDriver(api.MechanismDriver): service.RpcWorker) @lockutils.synchronized('hash_ring_probe_lock', external=True) - def _start_hash_ring_probe(self): + def _setup_hash_ring(self): + """Setup the hash ring. + + The first worker to acquire the lock is responsible for cleaning + the hash ring from previous runs as well as start the probing + thread for this host. Subsequently workers just need to register + themselves to the hash ring. + """ + admin_context = n_context.get_admin_context() if not self._hash_ring_probe_event.is_set(): + self._clean_hash_ring() + self.node_uuid = ovn_hash_ring_db.add_node(admin_context, + self.hash_ring_group) self._hash_ring_thread = maintenance.MaintenanceThread() self._hash_ring_thread.add_periodics( maintenance.HashRingHealthCheckPeriodics( @@ -316,6 +323,9 @@ class OVNMechanismDriver(api.MechanismDriver): self._hash_ring_thread.start() LOG.info("Hash Ring probing thread has started") self._hash_ring_probe_event.set() + else: + self.node_uuid = ovn_hash_ring_db.add_node(admin_context, + self.hash_ring_group) def post_fork_initialize(self, resource, event, trigger, payload=None): # Initialize API/Maintenance workers with OVN IDL connections @@ -328,10 +338,7 @@ class OVNMechanismDriver(api.MechanismDriver): self._ovn_client_inst = None if worker_class == neutron.wsgi.WorkerService: - admin_context = n_context.get_admin_context() - self.node_uuid = ovn_hash_ring_db.add_node(admin_context, - self.hash_ring_group) - self._start_hash_ring_probe() + self._setup_hash_ring() n_agent.AgentCache(self) # Initialize singleton agent cache self._nb_ovn, self._sb_ovn = impl_idl_ovn.get_ovn_idls(self, trigger) |