diff options
author | Zuul <zuul@review.opendev.org> | 2023-01-30 18:29:23 +0000 |
---|---|---|
committer | Gerrit Code Review <review@openstack.org> | 2023-01-30 18:29:23 +0000 |
commit | 01b30972ad1416f1ecee6300e0585fa82644e56e (patch) | |
tree | 95e32d60d8e82f05fccaf26893494a7a0ec0e995 | |
parent | e3b29100237305456830cf2dbee9c1e0718c7319 (diff) | |
parent | 53a925ff0f79fe822a96e1fadf552bb8d0f8e4b4 (diff) | |
download | nova-01b30972ad1416f1ecee6300e0585fa82644e56e.tar.gz |
Merge "Persist existing node uuids locally"
-rw-r--r-- | nova/compute/manager.py | 49 | ||||
-rw-r--r-- | nova/objects/service.py | 9 | ||||
-rw-r--r-- | nova/tests/unit/compute/test_compute_mgr.py | 136 |
3 files changed, 184 insertions, 10 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 1f61ad4266..aca6af3ed1 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -84,6 +84,7 @@ from nova.objects import external_event as external_event_obj from nova.objects import fields from nova.objects import instance as obj_instance from nova.objects import migrate_data as migrate_data_obj +from nova.objects import service as service_obj from nova.pci import request as pci_req_module from nova.pci import whitelist from nova import safe_utils @@ -1492,9 +1493,57 @@ class ComputeManager(manager.Manager): "then you can ignore this warning.", node_name) return nodes_by_uuid + def _ensure_existing_node_identity(self, service_ref): + """If we are upgrading from an older service version, we need + to write our node identity uuid (if not already done) based on + nodes assigned to us in the database. + """ + if service_ref.version >= service_obj.NODE_IDENTITY_VERSION: + # Already new enough, nothing to do here + return + + if 'ironic' in CONF.compute_driver.lower(): + # We do not persist a single local node identity for + # ironic + return + + if nova.virt.node.read_local_node_uuid(): + # We already have a local node identity, no migration needed + return + + context = nova.context.get_admin_context() + db_nodes = objects.ComputeNodeList.get_all_by_host(context, self.host) + if not db_nodes: + # This means we have no nodes in the database (that we + # know of) and thus have no need to record an existing + # UUID. That is probably strange, so log a warning. + raise exception.InvalidConfiguration( + ('Upgrading from service version %i but found no ' + 'nodes in the database for host %s to persist ' + 'locally; Possible rename detected, ' + 'refusing to start!') % ( + service_ref.version, self.host)) + + if len(db_nodes) > 1: + # If this happens we can't do the right thing, so raise an + # exception to abort host startup + LOG.warning('Multiple nodes found in the database for host %s; ' + 'unable to persist local node identity automatically') + raise exception.InvalidConfiguration( + 'Multiple nodes found in database, manual node uuid ' + 'configuration required') + + nova.virt.node.write_local_node_uuid(db_nodes[0].uuid) + def init_host(self, service_ref): """Initialization for a standalone compute service.""" + if service_ref: + # If we are an existing service, check to see if we need + # to record a locally-persistent node identity because + # we have upgraded from a previous version. + self._ensure_existing_node_identity(service_ref) + if CONF.pci.device_spec: # Simply loading the PCI passthrough spec will do a bunch of # validation that would otherwise wait until the PciDevTracker is diff --git a/nova/objects/service.py b/nova/objects/service.py index 71361e0168..6ae3f6a768 100644 --- a/nova/objects/service.py +++ b/nova/objects/service.py @@ -31,7 +31,7 @@ LOG = logging.getLogger(__name__) # NOTE(danms): This is the global service version counter -SERVICE_VERSION = 64 +SERVICE_VERSION = 65 # NOTE(danms): This is our SERVICE_VERSION history. The idea is that any @@ -225,8 +225,15 @@ SERVICE_VERSION_HISTORY = ( # Version 64: Compute RPC v6.1: # Add reimage_boot_volume parameter to rebuild_instance() {'compute_rpc': '6.1'}, + # Version 65: Compute RPC v6.1: + # Added stable local node identity + {'compute_rpc': '6.1'}, ) +# This is the version after which we can rely on having a persistent +# local node identity for single-node systems. +NODE_IDENTITY_VERSION = 65 + # This is used to raise an error at service startup if older than N-1 computes # are detected. Update this at the beginning of every release cycle to point to # the smallest service version that was added in N-1. diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index b1f89ce5cc..c9e57fccfe 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -57,6 +57,7 @@ from nova.objects import fields from nova.objects import instance as instance_obj from nova.objects import migrate_data as migrate_data_obj from nova.objects import network_request as net_req_obj +from nova.objects import service as service_obj from nova.pci import request as pci_request from nova.scheduler.client import report from nova import test @@ -76,6 +77,7 @@ from nova.virt import driver as virt_driver from nova.virt import event as virtevent from nova.virt import fake as fake_driver from nova.virt import hardware +from nova.virt import node as virt_node from nova.volume import cinder @@ -906,6 +908,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, return instance_obj._make_instance_list( self.context, objects.InstanceList(), db_list, None) + @mock.patch.object(manager.ComputeManager, + '_ensure_existing_node_identity') @mock.patch.object(manager.ComputeManager, '_get_nodes') @mock.patch.object(manager.ComputeManager, '_error_out_instances_whose_build_was_interrupted') @@ -924,7 +928,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, mock_validate_vtpm, mock_validate_pinning, mock_destroy, mock_admin_ctxt, mock_host_get, mock_init_host, - mock_error_interrupted, mock_get_nodes): + mock_error_interrupted, mock_get_nodes, + mock_existing_node): mock_admin_ctxt.return_value = self.context inst_list = _make_instance_list(startup_instances) mock_host_get.return_value = inst_list @@ -935,6 +940,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, self.compute.init_host(None) + mock_existing_node.assert_not_called() mock_validate_pinning.assert_called_once_with(inst_list) mock_validate_vtpm.assert_called_once_with(inst_list) mock_destroy.assert_called_once_with( @@ -988,13 +994,16 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, @mock.patch('nova.objects.InstanceList') @mock.patch('nova.objects.MigrationList.get_by_filters') - def test_cleanup_host(self, mock_miglist_get, mock_instance_list): + @mock.patch('nova.objects.ComputeNodeList.get_all_by_uuids') + def test_cleanup_host(self, mock_cnlist_get, mock_miglist_get, + mock_instance_list): # just testing whether the cleanup_host method # when fired will invoke the underlying driver's # equivalent method. mock_miglist_get.return_value = [] mock_instance_list.get_by_host.return_value = [] + mock_cnlist_get.return_value = [] with mock.patch.object(self.compute, 'driver') as mock_driver: self.compute.init_host(None) @@ -1154,19 +1163,22 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, @mock.patch.object(objects.ComputeNode, 'get_by_host_and_nodename') @mock.patch.object(fake_driver.FakeDriver, 'get_available_nodes') def test_get_nodes(self, mock_driver_get_nodes, mock_get_by_host_and_node): - mock_driver_get_nodes.return_value = ['fake-node1', 'fake-node2'] - cn1 = objects.ComputeNode(uuid=uuids.cn1) - cn2 = objects.ComputeNode(uuid=uuids.cn2) + mock_driver_get_nodes.return_value = ['fake_node1', 'fake_node2'] + # NOTE(danms): The fake driver, by default, uses + # uuidsentinel.$node_name, so we can predict the uuids it will + # return here. + cn1 = objects.ComputeNode(uuid=uuids.fake_node1) + cn2 = objects.ComputeNode(uuid=uuids.fake_node2) mock_get_by_host_and_node.side_effect = [cn1, cn2] nodes = self.compute._get_nodes(self.context) - self.assertEqual({uuids.cn1: cn1, uuids.cn2: cn2}, nodes) + self.assertEqual({uuids.fake_node1: cn1, uuids.fake_node2: cn2}, nodes) mock_driver_get_nodes.assert_called_once_with() mock_get_by_host_and_node.assert_has_calls([ - mock.call(self.context, self.compute.host, 'fake-node1'), - mock.call(self.context, self.compute.host, 'fake-node2'), + mock.call(self.context, self.compute.host, 'fake_node1'), + mock.call(self.context, self.compute.host, 'fake_node2') ]) @mock.patch.object(manager.LOG, 'warning') @@ -1206,7 +1218,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, mock_driver_get_nodes.assert_called_once_with() mock_get_by_host_and_node.assert_has_calls([ mock.call(self.context, self.compute.host, 'fake-node1'), - mock.call(self.context, self.compute.host, 'fake-node2'), + mock.call(self.context, self.compute.host, 'fake-node2') ]) mock_log_warning.assert_called_once_with( "Compute node %s not found in the database. If this is the first " @@ -6321,6 +6333,112 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase, self.assertEqual({'one-image': 'cached', 'two-image': 'existing'}, r) + @mock.patch.object(virt_node, 'write_local_node_uuid') + def test_ensure_node_uuid_not_needed_version(self, mock_node): + # Make sure an up-to-date service bypasses the persistence + service_ref = service_obj.Service() + self.assertEqual(service_obj.SERVICE_VERSION, service_ref.version) + mock_node.assert_not_called() + self.compute._ensure_existing_node_identity(service_ref) + mock_node.assert_not_called() + + @mock.patch.object(virt_node, 'write_local_node_uuid') + def test_ensure_node_uuid_not_needed_ironic(self, mock_node): + # Make sure an old service for ironic does not write a local node uuid + with mock.patch.object(service_obj, 'SERVICE_VERSION', new=60): + service_ref = service_obj.Service() + self.assertEqual(60, service_ref.version) + self.flags(compute_driver='ironic') + self.compute._ensure_existing_node_identity(service_ref) + mock_node.assert_not_called() + + @mock.patch.object(virt_node, 'write_local_node_uuid') + @mock.patch.object(virt_node, 'read_local_node_uuid') + def test_ensure_node_uuid_not_needed_preprovisioned(self, + mock_read_node, + mock_write_node): + # Make sure an old service does not write a uuid if one is present + with mock.patch.object(service_obj, 'SERVICE_VERSION', new=60): + service_ref = service_obj.Service() + self.assertEqual(60, service_ref.version) + mock_read_node.return_value = str(uuids.SOME_UUID) + self.compute._ensure_existing_node_identity(service_ref) + mock_write_node.assert_not_called() + + @mock.patch.object(virt_node, 'write_local_node_uuid') + @mock.patch.object(virt_node, 'read_local_node_uuid') + @mock.patch('nova.objects.ComputeNodeList.get_all_by_host') + def test_ensure_node_uuid_upgrade_no_node(self, mock_get_cn, + mock_read_node, + mock_write_node): + # If we are not a new service, we have no pre-provisioned node uuid + # and we find no nodes in the database, we do not write a local + # node uuid *and* we abort startup since something is likely wrong. + with mock.patch.object(service_obj, 'SERVICE_VERSION', new=60): + service_ref = service_obj.Service() + self.assertEqual(60, service_ref.version) + mock_read_node.return_value = None + mock_get_cn.return_value = [] + self.assertRaises(exception.InvalidConfiguration, + self.compute._ensure_existing_node_identity, + service_ref) + mock_get_cn.assert_called_once_with(mock.ANY, self.compute.host) + mock_write_node.assert_not_called() + + @mock.patch.object(virt_node, 'write_local_node_uuid') + @mock.patch.object(virt_node, 'read_local_node_uuid') + @mock.patch('nova.objects.ComputeNodeList.get_all_by_host') + def test_ensure_node_uuid_upgrade_multi_node(self, mock_get_cn, + mock_read_node, + mock_write_node): + # If we are not a new service, we have no pre-provisioned node uuid + # and we find multiple nodes in the database, we do not write a local + # node uuid *and* we abort startup since something is likely wrong. + with mock.patch.object(service_obj, 'SERVICE_VERSION', new=60): + service_ref = service_obj.Service() + self.assertEqual(60, service_ref.version) + mock_read_node.return_value = None + mock_get_cn.return_value = [1, 2] + self.assertRaises(exception.InvalidConfiguration, + self.compute._ensure_existing_node_identity, + service_ref) + mock_get_cn.assert_called_once_with(mock.ANY, self.compute.host) + mock_write_node.assert_not_called() + + @mock.patch.object(virt_node, 'write_local_node_uuid') + @mock.patch.object(virt_node, 'read_local_node_uuid') + @mock.patch('nova.objects.ComputeNodeList.get_all_by_host') + def test_ensure_node_uuid_upgrade_writes_node_uuid(self, mock_get_cn, + mock_read_node, + mock_write_node): + # If we are not a new service, there is no pre-provisioned local + # compute node uuid, and we find exactly one compute node in the + # database for our host, we persist that. + with mock.patch.object(service_obj, 'SERVICE_VERSION', new=60): + service_ref = service_obj.Service() + self.assertEqual(60, service_ref.version) + mock_read_node.return_value = None + mock_get_cn.return_value = [ + objects.ComputeNode(uuid=str(uuids.compute)), + ] + self.compute._ensure_existing_node_identity(service_ref) + mock_get_cn.assert_called_once_with(mock.ANY, self.compute.host) + mock_write_node.assert_called_once_with(str(uuids.compute)) + + def test_ensure_node_uuid_called_by_init_host(self): + # test_init_host() above ensures that we do not call + # _ensure_existing_node_identity() in the service_ref=None case. + # Since testing init_host() requires a billion mocks, this + # tests that we do call it when expected, but make it raise + # to avoid running the rest of init_host(). + with mock.patch.object(self.compute, + '_ensure_existing_node_identity') as m: + m.side_effect = test.TestingException + self.assertRaises(test.TestingException, + self.compute.init_host, + mock.sentinel.service_ref) + m.assert_called_once_with(mock.sentinel.service_ref) + class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): def setUp(self): |