summaryrefslogtreecommitdiff
path: root/nova
diff options
context:
space:
mode:
authorZuul <zuul@review.opendev.org>2023-02-01 07:12:48 +0000
committerGerrit Code Review <review@openstack.org>2023-02-01 07:12:48 +0000
commite9d716f555f4340615226a4ae62449f79620ab91 (patch)
tree461a84039d63f2cc349d4e978027e6d28e309528 /nova
parentc993d8d311474e8ba8a768e4db02e7898011cda8 (diff)
parente258164f5a6c9ce378fc9828b137d68e9adf5bfd (diff)
downloadnova-e9d716f555f4340615226a4ae62449f79620ab91.tar.gz
Merge "Detect host renames and abort startup"
Diffstat (limited to 'nova')
-rw-r--r--nova/compute/manager.py40
-rw-r--r--nova/tests/unit/compute/test_compute_mgr.py54
2 files changed, 82 insertions, 12 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index 9a76e51b7a..8b4de4552d 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -1536,6 +1536,22 @@ class ComputeManager(manager.Manager):
nova.virt.node.write_local_node_uuid(db_nodes[0].uuid)
+ def _check_for_host_rename(self, nodes_by_uuid):
+ if 'ironic' in CONF.compute_driver.lower():
+ # Ironic (currently) rebalances nodes at various times, and as
+ # such, nodes being discovered as assigned to this host with a
+ # different hostname is not surprising. Skip this check for
+ # ironic.
+ return
+ for node in nodes_by_uuid.values():
+ if node.host != self.host:
+ raise exception.InvalidConfiguration(
+ 'My node %s has host %r but my host is %r; '
+ 'Possible rename detected, refusing to start!' % (
+ node.uuid, node.host, self.host))
+ LOG.debug('Verified node %s matches my host %s',
+ node.uuid, self.host)
+
def init_host(self, service_ref):
"""Initialization for a standalone compute service."""
@@ -1574,15 +1590,6 @@ class ComputeManager(manager.Manager):
raise exception.InvalidConfiguration(msg)
self.driver.init_host(host=self.host)
- context = nova.context.get_admin_context()
- instances = objects.InstanceList.get_by_host(
- context, self.host,
- expected_attrs=['info_cache', 'metadata', 'numa_topology'])
-
- self.init_virt_events()
-
- self._validate_pinning_configuration(instances)
- self._validate_vtpm_configuration(instances)
# NOTE(gibi): At this point the compute_nodes of the resource tracker
# has not been populated yet so we cannot rely on the resource tracker
@@ -1593,8 +1600,23 @@ class ComputeManager(manager.Manager):
# _destroy_evacuated_instances and
# _error_out_instances_whose_build_was_interrupted out in the
# background on startup
+ context = nova.context.get_admin_context()
nodes_by_uuid = self._get_nodes(context)
+ # NOTE(danms): Check for a possible host rename and abort
+ # startup before we start mucking with instances we think are
+ # ours.
+ self._check_for_host_rename(nodes_by_uuid)
+
+ instances = objects.InstanceList.get_by_host(
+ context, self.host,
+ expected_attrs=['info_cache', 'metadata', 'numa_topology'])
+
+ self.init_virt_events()
+
+ self._validate_pinning_configuration(instances)
+ self._validate_vtpm_configuration(instances)
+
try:
# checking that instance was not already evacuated to other host
evacuated_instances = self._destroy_evacuated_instances(
diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py
index fec13de7f0..7845f0ecc6 100644
--- a/nova/tests/unit/compute/test_compute_mgr.py
+++ b/nova/tests/unit/compute/test_compute_mgr.py
@@ -934,7 +934,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
inst_list = _make_instance_list(startup_instances)
mock_host_get.return_value = inst_list
our_node = objects.ComputeNode(
- host='fake-host', uuid=uuids.our_node_uuid,
+ host=self.compute.host, uuid=uuids.our_node_uuid,
hypervisor_hostname='fake-node')
mock_get_nodes.return_value = {uuids.our_node_uuid: our_node}
@@ -983,7 +983,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
"""
mock_get_nodes.return_value = {
uuids.cn_uuid1: objects.ComputeNode(
- uuid=uuids.cn_uuid1, hypervisor_hostname='node1')}
+ uuid=uuids.cn_uuid1, hypervisor_hostname='node1',
+ host=self.compute.host)}
self.compute.init_host(None)
mock_error_interrupted.assert_called_once_with(
@@ -1148,7 +1149,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
uuids.evac_instance: evacuating_instance
}
our_node = objects.ComputeNode(
- host='fake-host', uuid=uuids.our_node_uuid,
+ host=self.compute.host, uuid=uuids.our_node_uuid,
hypervisor_hostname='fake-node')
mock_get_nodes.return_value = {uuids.our_node_uuid: our_node}
@@ -1227,6 +1228,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
@mock.patch.object(objects.InstanceList, 'get_by_host',
new=mock.Mock())
+ @mock.patch('nova.objects.ComputeNodeList.get_all_by_uuids',
+ new=mock.Mock(return_value=[mock.MagicMock()]))
@mock.patch('nova.compute.manager.ComputeManager.'
'_validate_pinning_configuration')
def test_init_host_pinning_configuration_validation_failure(self,
@@ -1244,6 +1247,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
@mock.patch('nova.compute.manager.ComputeManager.'
'_validate_pinning_configuration',
new=mock.Mock())
+ @mock.patch('nova.objects.ComputeNodeList.get_all_by_uuids',
+ new=mock.Mock(return_value=[mock.MagicMock()]))
@mock.patch('nova.compute.manager.ComputeManager.'
'_validate_vtpm_configuration')
def test_init_host_vtpm_configuration_validation_failure(self,
@@ -6434,6 +6439,49 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
mock.sentinel.service_ref)
m.assert_called_once_with(mock.sentinel.service_ref)
+ def test_check_for_host_rename_ironic(self):
+ self.flags(compute_driver='ironic')
+ # Passing None here makes sure we take the early exit because of our
+ # virt driver
+ nodes = {uuids.node1: mock.MagicMock(uuid=uuids.node1,
+ host='not-this-host')}
+ self.compute._check_for_host_rename(nodes)
+
+ def test_check_for_host_rename_renamed_only(self):
+ nodes = {uuids.node1: mock.MagicMock(uuid=uuids.node1,
+ host='not-this-host')}
+ self.assertRaises(exception.InvalidConfiguration,
+ self.compute._check_for_host_rename, nodes)
+
+ def test_check_for_host_rename_renamed_one(self):
+ nodes = {uuids.node1: mock.MagicMock(uuid=uuids.node1,
+ host=self.compute.host),
+ uuids.node2: mock.MagicMock(uuid=uuids.node2,
+ host='not-this-host')}
+ self.assertRaises(exception.InvalidConfiguration,
+ self.compute._check_for_host_rename, nodes)
+
+ def test_check_for_host_rename_not_renamed(self):
+ nodes = {uuids.node1: mock.MagicMock(uuid=uuids.node1,
+ host=self.compute.host)}
+ with mock.patch.object(manager.LOG, 'debug') as mock_debug:
+ self.compute._check_for_host_rename(nodes)
+ mock_debug.assert_called_once_with(
+ 'Verified node %s matches my host %s',
+ uuids.node1, self.compute.host)
+
+ @mock.patch('nova.compute.manager.ComputeManager._get_nodes')
+ def test_check_for_host_rename_called_by_init_host(self, mock_nodes):
+ # Since testing init_host() requires a billion mocks, this
+ # tests that we do call it when expected, but make it raise
+ # to avoid running the rest of init_host().
+ with mock.patch.object(self.compute,
+ '_check_for_host_rename') as m:
+ m.side_effect = test.TestingException
+ self.assertRaises(test.TestingException,
+ self.compute.init_host, None)
+ m.assert_called_once_with(mock_nodes.return_value)
+
class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
def setUp(self):