From d37cca361a4d575311318cb870da40079eb1617c Mon Sep 17 00:00:00 2001 From: Dan Smith Date: Wed, 22 Mar 2023 08:20:58 -0700 Subject: Make scheduler lazy-load the placement client Like we did for conductor, this makes the scheduler lazy-load the placement client instead of only doing it during __init__. This avoids a startup crash if keystone or placement are not available, but retains startup failures for other problems and errors likely to be a result of misconfigurations. Closes-Bug: #2012530 Change-Id: I42ed876b84d80536e83d9ae01696b0a64299c9f7 --- nova/scheduler/manager.py | 35 +++++++++++++++++++++++++++++- nova/tests/unit/scheduler/test_manager.py | 36 +++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/nova/scheduler/manager.py b/nova/scheduler/manager.py index 11581c4f2d..620519d403 100644 --- a/nova/scheduler/manager.py +++ b/nova/scheduler/manager.py @@ -23,6 +23,7 @@ import collections import copy import random +from keystoneauth1 import exceptions as ks_exc from oslo_log import log as logging import oslo_messaging as messaging from oslo_serialization import jsonutils @@ -67,10 +68,42 @@ class SchedulerManager(manager.Manager): self.host_manager = host_manager.HostManager() self.servicegroup_api = servicegroup.API() self.notifier = rpc.get_notifier('scheduler') - self.placement_client = report.report_client_singleton() + self._placement_client = None + + try: + # Test our placement client during initialization + self.placement_client + except (ks_exc.EndpointNotFound, + ks_exc.DiscoveryFailure, + ks_exc.RequestTimeout, + ks_exc.GatewayTimeout, + ks_exc.ConnectFailure) as e: + # Non-fatal, likely transient (although not definitely); + # continue startup but log the warning so that when things + # fail later, it will be clear why we can not do certain + # things. + LOG.warning('Unable to initialize placement client (%s); ' + 'Continuing with startup, but scheduling ' + 'will not be possible.', e) + except (ks_exc.MissingAuthPlugin, + ks_exc.Unauthorized) as e: + # This is almost definitely fatal mis-configuration. The + # Unauthorized error might be transient, but it is + # probably reasonable to consider it fatal. + LOG.error('Fatal error initializing placement client; ' + 'config is incorrect or incomplete: %s', e) + raise + except Exception as e: + # Unknown/unexpected errors here are fatal + LOG.error('Fatal error initializing placement client: %s', e) + raise super().__init__(service_name='scheduler', *args, **kwargs) + @property + def placement_client(self): + return report.report_client_singleton() + @periodic_task.periodic_task( spacing=CONF.scheduler.discover_hosts_in_cells_interval, run_immediately=True) diff --git a/nova/tests/unit/scheduler/test_manager.py b/nova/tests/unit/scheduler/test_manager.py index e7866069b3..e992fe6034 100644 --- a/nova/tests/unit/scheduler/test_manager.py +++ b/nova/tests/unit/scheduler/test_manager.py @@ -19,6 +19,7 @@ Tests For Scheduler from unittest import mock +from keystoneauth1 import exceptions as ks_exc import oslo_messaging as messaging from oslo_serialization import jsonutils from oslo_utils.fixture import uuidsentinel as uuids @@ -1688,6 +1689,41 @@ class SchedulerManagerTestCase(test.NoDBTestCase): mock_log_warning.assert_not_called() mock_log_debug.assert_called_once_with(msg) + @mock.patch('nova.scheduler.client.report.report_client_singleton') + @mock.patch.object(manager, 'LOG') + @mock.patch('nova.scheduler.host_manager.HostManager') + @mock.patch('nova.servicegroup.API') + @mock.patch('nova.rpc.get_notifier') + def test_init_lazy_placement_client(self, mock_rpc, mock_sg, mock_hm, + mock_log, mock_report): + # Simulate keytone or placement being offline at startup + mock_report.side_effect = ks_exc.RequestTimeout + mgr = manager.SchedulerManager() + mock_report.assert_called_once_with() + self.assertTrue(mock_log.warning.called) + + # Make sure we're raising the actual error to subsequent callers + self.assertRaises(ks_exc.RequestTimeout, lambda: mgr.placement_client) + + # Simulate recovery of the keystone or placement service + mock_report.reset_mock(side_effect=True) + mgr.placement_client + mock_report.assert_called_once_with() + + @mock.patch('nova.scheduler.client.report.report_client_singleton') + @mock.patch('nova.scheduler.host_manager.HostManager') + @mock.patch('nova.servicegroup.API') + @mock.patch('nova.rpc.get_notifier') + def test_init_lazy_placement_client_failures(self, mock_rpc, mock_sg, + mock_hm, mock_report): + # Certain keystoneclient exceptions are fatal + mock_report.side_effect = ks_exc.Unauthorized + self.assertRaises(ks_exc.Unauthorized, manager.SchedulerManager) + + # Anything else is fatal + mock_report.side_effect = test.TestingException + self.assertRaises(test.TestingException, manager.SchedulerManager) + class SchedulerManagerAllocationCandidateTestCase(test.NoDBTestCase): -- cgit v1.2.1