diff options
author | Dan Smith <dansmith@redhat.com> | 2022-08-11 10:18:25 -0700 |
---|---|---|
committer | melanie witt <melwittt@gmail.com> | 2022-09-22 23:04:14 +0000 |
commit | 19346082058d51c78bb157ca5e1304d15691dd9a (patch) | |
tree | c91b14cef97e3974a3bea98a5bf7c79647cb86dc | |
parent | 77273f067d96a4ec401c3b36f2922d63c4ad7103 (diff) | |
download | nova-19346082058d51c78bb157ca5e1304d15691dd9a.tar.gz |
Avoid n-cond startup abort for keystone failures
Conductor creates a placement client for the potential case where
it needs to make a call for certain operations. A transient network
or keystone failure will currently cause it to abort startup, which
means it is not available for other unrelated activities, such as
DB proxying for compute.
This makes conductor test the placement client on startup, but only
abort startup on errors that are highly likely to be permanent
configuration errors, and only warn about things like being unable
to contact keystone/placement during initialization. If a non-fatal
error is encountered at startup, later operations needing the
placement client will retry initialization.
Conflicts:
nova/tests/unit/conductor/test_conductor.py
NOTE(melwitt): The conflict is because change
Id5b04cf2f6ca24af8e366d23f15cf0e5cac8e1cc
(Use unittest.mock instead of third party mock) is not in Yoga.
Closes-Bug: #1846820
Change-Id: Idb7fcbce0c9562e7b9bd3e80f2a6d4b9bc286830
(cherry picked from commit 232684b44022f1bc4d72b07045900780de456e63)
-rw-r--r-- | nova/conductor/manager.py | 34 | ||||
-rw-r--r-- | nova/tests/unit/conductor/test_conductor.py | 35 | ||||
-rw-r--r-- | nova/tests/unit/scheduler/client/test_report.py | 19 |
3 files changed, 87 insertions, 1 deletions
diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py index aaec1c99b7..53067bbef7 100644 --- a/nova/conductor/manager.py +++ b/nova/conductor/manager.py @@ -21,6 +21,7 @@ import eventlet import functools import sys +from keystoneauth1 import exceptions as ks_exc from oslo_config import cfg from oslo_db import exception as db_exc from oslo_limit import exception as limit_exceptions @@ -243,11 +244,42 @@ class ComputeTaskManager: self.network_api = neutron.API() self.servicegroup_api = servicegroup.API() self.query_client = query.SchedulerQueryClient() - self.report_client = report.report_client_singleton() self.notifier = rpc.get_notifier('compute') # Help us to record host in EventReporter self.host = CONF.host + try: + # Test our placement client during initialization + self.report_client + except (ks_exc.EndpointNotFound, + ks_exc.DiscoveryFailure, + ks_exc.RequestTimeout, + ks_exc.GatewayTimeout, + ks_exc.ConnectFailure) as e: + # Non-fatal, likely transient (although not definitely); + # continue startup but log the warning so that when things + # fail later, it will be clear why we can not do certain + # things. + LOG.warning('Unable to initialize placement client (%s); ' + 'Continuing with startup, but some operations ' + 'will not be possible.', e) + except (ks_exc.MissingAuthPlugin, + ks_exc.Unauthorized) as e: + # This is almost definitely fatal mis-configuration. The + # Unauthorized error might be transient, but it is + # probably reasonable to consider it fatal. + LOG.error('Fatal error initializing placement client; ' + 'config is incorrect or incomplete: %s', e) + raise + except Exception as e: + # Unknown/unexpected errors here are fatal + LOG.error('Fatal error initializing placement client: %s', e) + raise + + @property + def report_client(self): + return report.report_client_singleton() + def reset(self): LOG.info('Reloading compute RPC API') compute_rpcapi.LAST_VERSION = None diff --git a/nova/tests/unit/conductor/test_conductor.py b/nova/tests/unit/conductor/test_conductor.py index 15aa960aad..8c954db9a7 100644 --- a/nova/tests/unit/conductor/test_conductor.py +++ b/nova/tests/unit/conductor/test_conductor.py @@ -17,6 +17,8 @@ import copy +import ddt +from keystoneauth1 import exceptions as ks_exc import mock from oslo_db import exception as db_exc from oslo_limit import exception as limit_exceptions @@ -52,6 +54,7 @@ from nova.objects import block_device as block_device_obj from nova.objects import fields from nova.objects import request_spec from nova.scheduler.client import query +from nova.scheduler.client import report from nova.scheduler import utils as scheduler_utils from nova import test from nova.tests import fixtures @@ -4869,3 +4872,35 @@ class ConductorTaskAPITestCase(_BaseTaskTestCase, test_compute.BaseTestCase): logtext) self.assertIn('host3\' because it is not up', logtext) self.assertIn('image1 failed 1 times', logtext) + + +@ddt.ddt +class TestConductorTaskManager(test.NoDBTestCase): + def test_placement_client_startup(self): + self.assertIsNone(report.PLACEMENTCLIENT) + conductor_manager.ComputeTaskManager() + self.assertIsNotNone(report.PLACEMENTCLIENT) + + @ddt.data(ks_exc.MissingAuthPlugin, + ks_exc.Unauthorized, + test.TestingException) + def test_placement_client_startup_fatals(self, exc): + self.assertRaises(exc, + self._test_placement_client_startup_exception, exc) + + @ddt.data(ks_exc.EndpointNotFound, + ks_exc.DiscoveryFailure, + ks_exc.RequestTimeout, + ks_exc.GatewayTimeout, + ks_exc.ConnectFailure) + def test_placement_client_startup_non_fatal(self, exc): + self._test_placement_client_startup_exception(exc) + + @mock.patch.object(report, 'LOG') + def _test_placement_client_startup_exception(self, exc, mock_log): + with mock.patch.object(report.SchedulerReportClient, '_create_client', + side_effect=exc): + try: + conductor_manager.ComputeTaskManager() + finally: + mock_log.error.assert_called_once() diff --git a/nova/tests/unit/scheduler/client/test_report.py b/nova/tests/unit/scheduler/client/test_report.py index 485f187d9e..9b2f5c3a0a 100644 --- a/nova/tests/unit/scheduler/client/test_report.py +++ b/nova/tests/unit/scheduler/client/test_report.py @@ -185,6 +185,25 @@ class TestSingleton(test.NoDBTestCase): self.assertRaises(exc, report.report_client_singleton) mock_log.error.assert_called_once() + def test_error_then_success(self): + # Simulate an error + self._test_error(ks_exc.ConnectFailure) + + # Ensure we did not set the global client + self.assertIsNone(report.PLACEMENTCLIENT) + + # Call again, with no error + client = report.report_client_singleton() + + # Make sure we got a client and that it was set as the global + # one + self.assertIsNotNone(client) + self.assertEqual(client, report.PLACEMENTCLIENT) + + # Make sure we keep getting the same one + client2 = report.report_client_singleton() + self.assertEqual(client, client2) + class TestConstructor(test.NoDBTestCase): def setUp(self): |