diff options
author | Zuul <zuul@review.opendev.org> | 2023-02-01 07:12:48 +0000 |
---|---|---|
committer | Gerrit Code Review <review@openstack.org> | 2023-02-01 07:12:48 +0000 |
commit | e9d716f555f4340615226a4ae62449f79620ab91 (patch) | |
tree | 461a84039d63f2cc349d4e978027e6d28e309528 /nova | |
parent | c993d8d311474e8ba8a768e4db02e7898011cda8 (diff) | |
parent | e258164f5a6c9ce378fc9828b137d68e9adf5bfd (diff) | |
download | nova-e9d716f555f4340615226a4ae62449f79620ab91.tar.gz |
Merge "Detect host renames and abort startup"
Diffstat (limited to 'nova')
-rw-r--r-- | nova/compute/manager.py | 40 | ||||
-rw-r--r-- | nova/tests/unit/compute/test_compute_mgr.py | 54 |
2 files changed, 82 insertions, 12 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 9a76e51b7a..8b4de4552d 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -1536,6 +1536,22 @@ class ComputeManager(manager.Manager): nova.virt.node.write_local_node_uuid(db_nodes[0].uuid) + def _check_for_host_rename(self, nodes_by_uuid): + if 'ironic' in CONF.compute_driver.lower(): + # Ironic (currently) rebalances nodes at various times, and as + # such, nodes being discovered as assigned to this host with a + # different hostname is not surprising. Skip this check for + # ironic. + return + for node in nodes_by_uuid.values(): + if node.host != self.host: + raise exception.InvalidConfiguration( + 'My node %s has host %r but my host is %r; ' + 'Possible rename detected, refusing to start!' % ( + node.uuid, node.host, self.host)) + LOG.debug('Verified node %s matches my host %s', + node.uuid, self.host) + def init_host(self, service_ref): """Initialization for a standalone compute service.""" @@ -1574,15 +1590,6 @@ class ComputeManager(manager.Manager): raise exception.InvalidConfiguration(msg) self.driver.init_host(host=self.host) - context = nova.context.get_admin_context() - instances = objects.InstanceList.get_by_host( - context, self.host, - expected_attrs=['info_cache', 'metadata', 'numa_topology']) - - self.init_virt_events() - - self._validate_pinning_configuration(instances) - self._validate_vtpm_configuration(instances) # NOTE(gibi): At this point the compute_nodes of the resource tracker # has not been populated yet so we cannot rely on the resource tracker @@ -1593,8 +1600,23 @@ class ComputeManager(manager.Manager): # _destroy_evacuated_instances and # _error_out_instances_whose_build_was_interrupted out in the # background on startup + context = nova.context.get_admin_context() nodes_by_uuid = self._get_nodes(context) + # NOTE(danms): Check for a possible host rename and abort + # startup before we start mucking with instances we think are + # ours. + self._check_for_host_rename(nodes_by_uuid) + + instances = objects.InstanceList.get_by_host( + context, self.host, + expected_attrs=['info_cache', 'metadata', 'numa_topology']) + + self.init_virt_events() + + self._validate_pinning_configuration(instances) + self._validate_vtpm_configuration(instances) + try: # checking that instance was not already evacuated to other host evacuated_instances = self._destroy_evacuated_instances( diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index fec13de7f0..7845f0ecc6 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -934,7 +934,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, inst_list = _make_instance_list(startup_instances) mock_host_get.return_value = inst_list our_node = objects.ComputeNode( - host='fake-host', uuid=uuids.our_node_uuid, + host=self.compute.host, uuid=uuids.our_node_uuid, hypervisor_hostname='fake-node') mock_get_nodes.return_value = {uuids.our_node_uuid: our_node} @@ -983,7 +983,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, """ mock_get_nodes.return_value = { uuids.cn_uuid1: objects.ComputeNode( - uuid=uuids.cn_uuid1, hypervisor_hostname='node1')} + uuid=uuids.cn_uuid1, hypervisor_hostname='node1', + host=self.compute.host)} self.compute.init_host(None) mock_error_interrupted.assert_called_once_with( @@ -1148,7 +1149,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, uuids.evac_instance: evacuating_instance } our_node = objects.ComputeNode( - host='fake-host', uuid=uuids.our_node_uuid, + host=self.compute.host, uuid=uuids.our_node_uuid, hypervisor_hostname='fake-node') mock_get_nodes.return_value = {uuids.our_node_uuid: our_node} @@ -1227,6 +1228,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, @mock.patch.object(objects.InstanceList, 'get_by_host', new=mock.Mock()) + @mock.patch('nova.objects.ComputeNodeList.get_all_by_uuids', + new=mock.Mock(return_value=[mock.MagicMock()])) @mock.patch('nova.compute.manager.ComputeManager.' '_validate_pinning_configuration') def test_init_host_pinning_configuration_validation_failure(self, @@ -1244,6 +1247,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, @mock.patch('nova.compute.manager.ComputeManager.' '_validate_pinning_configuration', new=mock.Mock()) + @mock.patch('nova.objects.ComputeNodeList.get_all_by_uuids', + new=mock.Mock(return_value=[mock.MagicMock()])) @mock.patch('nova.compute.manager.ComputeManager.' '_validate_vtpm_configuration') def test_init_host_vtpm_configuration_validation_failure(self, @@ -6434,6 +6439,49 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, mock.sentinel.service_ref) m.assert_called_once_with(mock.sentinel.service_ref) + def test_check_for_host_rename_ironic(self): + self.flags(compute_driver='ironic') + # Passing None here makes sure we take the early exit because of our + # virt driver + nodes = {uuids.node1: mock.MagicMock(uuid=uuids.node1, + host='not-this-host')} + self.compute._check_for_host_rename(nodes) + + def test_check_for_host_rename_renamed_only(self): + nodes = {uuids.node1: mock.MagicMock(uuid=uuids.node1, + host='not-this-host')} + self.assertRaises(exception.InvalidConfiguration, + self.compute._check_for_host_rename, nodes) + + def test_check_for_host_rename_renamed_one(self): + nodes = {uuids.node1: mock.MagicMock(uuid=uuids.node1, + host=self.compute.host), + uuids.node2: mock.MagicMock(uuid=uuids.node2, + host='not-this-host')} + self.assertRaises(exception.InvalidConfiguration, + self.compute._check_for_host_rename, nodes) + + def test_check_for_host_rename_not_renamed(self): + nodes = {uuids.node1: mock.MagicMock(uuid=uuids.node1, + host=self.compute.host)} + with mock.patch.object(manager.LOG, 'debug') as mock_debug: + self.compute._check_for_host_rename(nodes) + mock_debug.assert_called_once_with( + 'Verified node %s matches my host %s', + uuids.node1, self.compute.host) + + @mock.patch('nova.compute.manager.ComputeManager._get_nodes') + def test_check_for_host_rename_called_by_init_host(self, mock_nodes): + # Since testing init_host() requires a billion mocks, this + # tests that we do call it when expected, but make it raise + # to avoid running the rest of init_host(). + with mock.patch.object(self.compute, + '_check_for_host_rename') as m: + m.side_effect = test.TestingException + self.assertRaises(test.TestingException, + self.compute.init_host, None) + m.assert_called_once_with(mock_nodes.return_value) + class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): def setUp(self): |