From 040782fb6896a47100fd8a322ec2975cfa1f6b0c Mon Sep 17 00:00:00 2001 From: Lucas Alvares Gomes Date: Tue, 4 Oct 2022 10:27:04 +0100 Subject: Split Hash Ring probing from the maintenance task This patch split out the Hash Ring probing out of the maitenance task into it's own thread. The idea is to speed up the start of probing by doing it right after adding a node to the Hash Ring. By doing that, we avoid the problem of delaying probing in case the connection with OVSDB takes longer than expected to connect and the hash ring nodes are considered dead as they weren't probed in time. The patch re-uses the same classes as before to start this new thread (instead of reusing the maintenance task thread). It adds a layer of synchronization with a lock to make sure that only one new Hash Ring probing thread is started. (cherry picked from commit 240f2c6aebb5a958e3cdea9b9188e7f605238494) Closes-Bug: #1991655 Change-Id: Ic04493f20eb9aecda563942c51f343dc4202523a Signed-off-by: Lucas Alvares Gomes --- .../ml2/drivers/ovn/mech_driver/mech_driver.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/neutron/plugins/ml2/drivers/ovn/mech_driver/mech_driver.py b/neutron/plugins/ml2/drivers/ovn/mech_driver/mech_driver.py index c79c7ded4b..0d3f2ee2ae 100644 --- a/neutron/plugins/ml2/drivers/ovn/mech_driver/mech_driver.py +++ b/neutron/plugins/ml2/drivers/ovn/mech_driver/mech_driver.py @@ -16,6 +16,7 @@ import atexit import copy import datetime import functools +import multiprocessing import operator import signal import threading @@ -33,6 +34,7 @@ from neutron_lib import exceptions as n_exc from neutron_lib.plugins import directory from neutron_lib.plugins.ml2 import api from neutron_lib.utils import helpers +from oslo_concurrency import lockutils from oslo_concurrency import processutils from oslo_config import cfg from oslo_db import exception as os_db_exc @@ -112,6 +114,8 @@ class OVNMechanismDriver(api.MechanismDriver): self._plugin_property = None self._ovn_client_inst = None self._maintenance_thread = None + self._hash_ring_thread = None + self._hash_ring_probe_event = multiprocessing.Event() self.node_uuid = None self.hash_ring_group = ovn_const.HASH_RING_ML2_GROUP self.sg_enabled = ovn_acl.is_sg_enabled() @@ -303,6 +307,17 @@ class OVNMechanismDriver(api.MechanismDriver): worker.MaintenanceWorker, service.RpcWorker) + @lockutils.synchronized('hash_ring_probe_lock', external=True) + def _start_hash_ring_probe(self): + if not self._hash_ring_probe_event.is_set(): + self._hash_ring_thread = maintenance.MaintenanceThread() + self._hash_ring_thread.add_periodics( + maintenance.HashRingHealthCheckPeriodics( + self.hash_ring_group)) + self._hash_ring_thread.start() + LOG.info("Hash Ring probing thread has started") + self._hash_ring_probe_event.set() + def post_fork_initialize(self, resource, event, trigger, payload=None): # Initialize API/Maintenance workers with OVN IDL connections worker_class = ovn_utils.get_method_class(trigger) @@ -317,6 +332,7 @@ class OVNMechanismDriver(api.MechanismDriver): admin_context = n_context.get_admin_context() self.node_uuid = ovn_hash_ring_db.add_node(admin_context, self.hash_ring_group) + self._start_hash_ring_probe() self._nb_ovn, self._sb_ovn = impl_idl_ovn.get_ovn_idls(self, trigger) @@ -363,10 +379,8 @@ class OVNMechanismDriver(api.MechanismDriver): self._maintenance_thread = maintenance.MaintenanceThread() self._maintenance_thread.add_periodics( maintenance.DBInconsistenciesPeriodics(self._ovn_client)) - self._maintenance_thread.add_periodics( - maintenance.HashRingHealthCheckPeriodics( - self.hash_ring_group)) self._maintenance_thread.start() + LOG.info("Maintenance task thread has started") def _wait_for_pg_drop_event(self): """Wait for event that occurs when neutron_pg_drop Port Group exists. -- cgit v1.2.1