summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTerry Wilson <twilson@redhat.com>2023-01-26 08:37:24 -0600
committeryatinkarel <ykarel@redhat.com>2023-01-27 17:10:18 +0530
commitde8f74aec889e4d6398a228dc14d58c2b475cd86 (patch)
tree992473c92dc59258214d81c8d9bd8c12f04f51af
parenta4514641e9baa4b3f9856acb5847d6e639389822 (diff)
downloadneutron-de8f74aec889e4d6398a228dc14d58c2b475cd86.tar.gz
Never raise an exception in notify()
notify() is called from python-ovs code which is not built to recover from an exception in this user-overriden code. If there is an exception (e.g. the DB server is down when we process the hash ring), this exception can cause an unrecoverable error in processing OVSDB messages, rendering the neutron worker useless. Change-Id: I5f703d82175d71a222c76df37a82b5ccad890d14 (cherry picked from commit 67e616b2380d6549308a15077b2043721dbea5d0) (cherry picked from commit 848787785eb1140ee7d0eac72f3967b39345e625) Conflicts: neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/ovsdb_monitor.py (cherry picked from commit 3566cc065eb7e811822a472bd40e37ecf7668971)
-rw-r--r--neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/ovsdb_monitor.py64
1 files changed, 34 insertions, 30 deletions
diff --git a/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/ovsdb_monitor.py b/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/ovsdb_monitor.py
index 9962be8973..50323bc4d8 100644
--- a/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/ovsdb_monitor.py
+++ b/neutron/plugins/ml2/drivers/ovn/mech_driver/ovsdb/ovsdb_monitor.py
@@ -567,38 +567,42 @@ class OvnIdlDistributedLock(BaseOvnIdl):
self._last_touch = None
def notify(self, event, row, updates=None):
- self.notify_handler.notify(event, row, updates, global_=True)
try:
- target_node = self._hash_ring.get_node(str(row.uuid))
- except exceptions.HashRingIsEmpty as e:
- LOG.error('HashRing is empty, error: %s', e)
- return
- if target_node != self._node_uuid:
- return
-
- # If the worker hasn't been health checked by the maintenance
- # thread (see bug #1834498), indicate that it's alive here
- time_now = timeutils.utcnow()
- touch_timeout = time_now - datetime.timedelta(
- seconds=ovn_const.HASH_RING_TOUCH_INTERVAL)
- if not self._last_touch or touch_timeout >= self._last_touch:
- # NOTE(lucasagomes): Guard the db operation with an exception
- # handler. If heartbeating fails for whatever reason, log
- # the error and continue with processing the event
+ self.notify_handler.notify(event, row, updates, global_=True)
try:
- ctx = neutron_context.get_admin_context()
- ovn_hash_ring_db.touch_node(ctx, self._node_uuid)
- self._last_touch = time_now
- except Exception:
- LOG.exception('Hash Ring node %s failed to heartbeat',
- self._node_uuid)
-
- LOG.debug('Hash Ring: Node %(node)s (host: %(hostname)s) '
- 'handling event "%(event)s" for row %(row)s '
- '(table: %(table)s)',
- {'node': self._node_uuid, 'hostname': CONF.host,
- 'event': event, 'row': row.uuid, 'table': row._table.name})
- self.notify_handler.notify(event, row, updates)
+ target_node = self._hash_ring.get_node(str(row.uuid))
+ except exceptions.HashRingIsEmpty as e:
+ LOG.error('HashRing is empty, error: %s', e)
+ return
+ if target_node != self._node_uuid:
+ return
+
+ # If the worker hasn't been health checked by the maintenance
+ # thread (see bug #1834498), indicate that it's alive here
+ time_now = timeutils.utcnow()
+ touch_timeout = time_now - datetime.timedelta(
+ seconds=ovn_const.HASH_RING_TOUCH_INTERVAL)
+ if not self._last_touch or touch_timeout >= self._last_touch:
+ # NOTE(lucasagomes): Guard the db operation with an exception
+ # handler. If heartbeating fails for whatever reason, log
+ # the error and continue with processing the event
+ try:
+ ctx = neutron_context.get_admin_context()
+ ovn_hash_ring_db.touch_node(ctx, self._node_uuid)
+ self._last_touch = time_now
+ except Exception:
+ LOG.exception('Hash Ring node %s failed to heartbeat',
+ self._node_uuid)
+
+ LOG.debug('Hash Ring: Node %(node)s (host: %(hostname)s) '
+ 'handling event "%(event)s" for row %(row)s '
+ '(table: %(table)s)',
+ {'node': self._node_uuid, 'hostname': CONF.host,
+ 'event': event, 'row': row.uuid,
+ 'table': row._table.name})
+ self.notify_handler.notify(event, row, updates)
+ except Exception as e:
+ LOG.exception(e)
@abc.abstractmethod
def post_connect(self):