summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Smith <dansmith@redhat.com>2022-08-11 10:18:25 -0700
committermelanie witt <melwittt@gmail.com>2022-09-22 23:04:14 +0000
commit19346082058d51c78bb157ca5e1304d15691dd9a (patch)
treec91b14cef97e3974a3bea98a5bf7c79647cb86dc
parent77273f067d96a4ec401c3b36f2922d63c4ad7103 (diff)
downloadnova-19346082058d51c78bb157ca5e1304d15691dd9a.tar.gz
Avoid n-cond startup abort for keystone failures
Conductor creates a placement client for the potential case where it needs to make a call for certain operations. A transient network or keystone failure will currently cause it to abort startup, which means it is not available for other unrelated activities, such as DB proxying for compute. This makes conductor test the placement client on startup, but only abort startup on errors that are highly likely to be permanent configuration errors, and only warn about things like being unable to contact keystone/placement during initialization. If a non-fatal error is encountered at startup, later operations needing the placement client will retry initialization. Conflicts: nova/tests/unit/conductor/test_conductor.py NOTE(melwitt): The conflict is because change Id5b04cf2f6ca24af8e366d23f15cf0e5cac8e1cc (Use unittest.mock instead of third party mock) is not in Yoga. Closes-Bug: #1846820 Change-Id: Idb7fcbce0c9562e7b9bd3e80f2a6d4b9bc286830 (cherry picked from commit 232684b44022f1bc4d72b07045900780de456e63)
-rw-r--r--nova/conductor/manager.py34
-rw-r--r--nova/tests/unit/conductor/test_conductor.py35
-rw-r--r--nova/tests/unit/scheduler/client/test_report.py19
3 files changed, 87 insertions, 1 deletions
diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py
index aaec1c99b7..53067bbef7 100644
--- a/nova/conductor/manager.py
+++ b/nova/conductor/manager.py
@@ -21,6 +21,7 @@ import eventlet
import functools
import sys
+from keystoneauth1 import exceptions as ks_exc
from oslo_config import cfg
from oslo_db import exception as db_exc
from oslo_limit import exception as limit_exceptions
@@ -243,11 +244,42 @@ class ComputeTaskManager:
self.network_api = neutron.API()
self.servicegroup_api = servicegroup.API()
self.query_client = query.SchedulerQueryClient()
- self.report_client = report.report_client_singleton()
self.notifier = rpc.get_notifier('compute')
# Help us to record host in EventReporter
self.host = CONF.host
+ try:
+ # Test our placement client during initialization
+ self.report_client
+ except (ks_exc.EndpointNotFound,
+ ks_exc.DiscoveryFailure,
+ ks_exc.RequestTimeout,
+ ks_exc.GatewayTimeout,
+ ks_exc.ConnectFailure) as e:
+ # Non-fatal, likely transient (although not definitely);
+ # continue startup but log the warning so that when things
+ # fail later, it will be clear why we can not do certain
+ # things.
+ LOG.warning('Unable to initialize placement client (%s); '
+ 'Continuing with startup, but some operations '
+ 'will not be possible.', e)
+ except (ks_exc.MissingAuthPlugin,
+ ks_exc.Unauthorized) as e:
+ # This is almost definitely fatal mis-configuration. The
+ # Unauthorized error might be transient, but it is
+ # probably reasonable to consider it fatal.
+ LOG.error('Fatal error initializing placement client; '
+ 'config is incorrect or incomplete: %s', e)
+ raise
+ except Exception as e:
+ # Unknown/unexpected errors here are fatal
+ LOG.error('Fatal error initializing placement client: %s', e)
+ raise
+
+ @property
+ def report_client(self):
+ return report.report_client_singleton()
+
def reset(self):
LOG.info('Reloading compute RPC API')
compute_rpcapi.LAST_VERSION = None
diff --git a/nova/tests/unit/conductor/test_conductor.py b/nova/tests/unit/conductor/test_conductor.py
index 15aa960aad..8c954db9a7 100644
--- a/nova/tests/unit/conductor/test_conductor.py
+++ b/nova/tests/unit/conductor/test_conductor.py
@@ -17,6 +17,8 @@
import copy
+import ddt
+from keystoneauth1 import exceptions as ks_exc
import mock
from oslo_db import exception as db_exc
from oslo_limit import exception as limit_exceptions
@@ -52,6 +54,7 @@ from nova.objects import block_device as block_device_obj
from nova.objects import fields
from nova.objects import request_spec
from nova.scheduler.client import query
+from nova.scheduler.client import report
from nova.scheduler import utils as scheduler_utils
from nova import test
from nova.tests import fixtures
@@ -4869,3 +4872,35 @@ class ConductorTaskAPITestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
logtext)
self.assertIn('host3\' because it is not up', logtext)
self.assertIn('image1 failed 1 times', logtext)
+
+
+@ddt.ddt
+class TestConductorTaskManager(test.NoDBTestCase):
+ def test_placement_client_startup(self):
+ self.assertIsNone(report.PLACEMENTCLIENT)
+ conductor_manager.ComputeTaskManager()
+ self.assertIsNotNone(report.PLACEMENTCLIENT)
+
+ @ddt.data(ks_exc.MissingAuthPlugin,
+ ks_exc.Unauthorized,
+ test.TestingException)
+ def test_placement_client_startup_fatals(self, exc):
+ self.assertRaises(exc,
+ self._test_placement_client_startup_exception, exc)
+
+ @ddt.data(ks_exc.EndpointNotFound,
+ ks_exc.DiscoveryFailure,
+ ks_exc.RequestTimeout,
+ ks_exc.GatewayTimeout,
+ ks_exc.ConnectFailure)
+ def test_placement_client_startup_non_fatal(self, exc):
+ self._test_placement_client_startup_exception(exc)
+
+ @mock.patch.object(report, 'LOG')
+ def _test_placement_client_startup_exception(self, exc, mock_log):
+ with mock.patch.object(report.SchedulerReportClient, '_create_client',
+ side_effect=exc):
+ try:
+ conductor_manager.ComputeTaskManager()
+ finally:
+ mock_log.error.assert_called_once()
diff --git a/nova/tests/unit/scheduler/client/test_report.py b/nova/tests/unit/scheduler/client/test_report.py
index 485f187d9e..9b2f5c3a0a 100644
--- a/nova/tests/unit/scheduler/client/test_report.py
+++ b/nova/tests/unit/scheduler/client/test_report.py
@@ -185,6 +185,25 @@ class TestSingleton(test.NoDBTestCase):
self.assertRaises(exc, report.report_client_singleton)
mock_log.error.assert_called_once()
+ def test_error_then_success(self):
+ # Simulate an error
+ self._test_error(ks_exc.ConnectFailure)
+
+ # Ensure we did not set the global client
+ self.assertIsNone(report.PLACEMENTCLIENT)
+
+ # Call again, with no error
+ client = report.report_client_singleton()
+
+ # Make sure we got a client and that it was set as the global
+ # one
+ self.assertIsNotNone(client)
+ self.assertEqual(client, report.PLACEMENTCLIENT)
+
+ # Make sure we keep getting the same one
+ client2 = report.report_client_singleton()
+ self.assertEqual(client, client2)
+
class TestConstructor(test.NoDBTestCase):
def setUp(self):