summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Smith <dansmith@redhat.com>2023-01-31 07:15:40 -0800
committerDan Smith <dansmith@redhat.com>2023-02-01 09:20:59 -0800
commitd892905904fd8ed7b5bb916ca4a119997ea0c9a7 (patch)
treeddc851608b9ed6c43431e77a08af4eaedc973957
parent72370a188c0755bc9c864b5a5e4a972077cb8dd6 (diff)
downloadnova-d892905904fd8ed7b5bb916ca4a119997ea0c9a7.tar.gz
Protect against a deleted node id file
If we are starting up for the first time, we expect to generate and write a node_uuid file if one does not exist. If we are upgrading, we expect to do the same. However, if we are starting up not after an upgrade and not for the first time, a missing compute_id file is an error, and we should abort. Because of the additional check that this adds, which is called from a variety of places that don't have the stable compute node singleton stubbed to make it happy, this mocks the check for any test that does not specifically aim to exercise it. Related to blueprint stable-compute-uuid Change-Id: If83ce14b96e7d84ae38eba9d798754557d5abdfd
-rw-r--r--nova/compute/manager.py14
-rw-r--r--nova/test.py9
-rw-r--r--nova/tests/fixtures/nova.py4
-rw-r--r--nova/tests/unit/compute/test_compute_mgr.py23
4 files changed, 42 insertions, 8 deletions
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index f59fd82d10..2aad5c77fb 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -1509,15 +1509,21 @@ class ComputeManager(manager.Manager):
to write our node identity uuid (if not already done) based on
nodes assigned to us in the database.
"""
- if service_ref.version >= service_obj.NODE_IDENTITY_VERSION:
- # Already new enough, nothing to do here
- return
-
if 'ironic' in CONF.compute_driver.lower():
# We do not persist a single local node identity for
# ironic
return
+ if service_ref.version >= service_obj.NODE_IDENTITY_VERSION:
+ # Already new enough, nothing to do here, but make sure that we
+ # have a UUID file already, as this is not our first time starting.
+ if nova.virt.node.read_local_node_uuid() is None:
+ raise exception.InvalidConfiguration(
+ ('No local node identity found, but this is not our '
+ 'first startup on this host. Refusing to start after '
+ 'potentially having lost that state!'))
+ return
+
if nova.virt.node.read_local_node_uuid():
# We already have a local node identity, no migration needed
return
diff --git a/nova/test.py b/nova/test.py
index 562bd2516e..0f7965ea33 100644
--- a/nova/test.py
+++ b/nova/test.py
@@ -171,6 +171,12 @@ class TestCase(base.BaseTestCase):
# base class when USES_DB is True.
NUMBER_OF_CELLS = 1
+ # The stable compute id stuff is intentionally singleton-ish, which makes
+ # it a nightmare for testing multiple host/node combinations in tests like
+ # we do. So, mock it out by default, unless the test is specifically
+ # designed to handle it.
+ STUB_COMPUTE_ID = True
+
def setUp(self):
"""Run before each test method to initialize test environment."""
# Ensure BaseTestCase's ConfigureLogging fixture is disabled since
@@ -301,7 +307,8 @@ class TestCase(base.BaseTestCase):
# Reset our local node uuid cache (and avoid writing to the
# local filesystem when we generate a new one).
- self.useFixture(nova_fixtures.ComputeNodeIdFixture())
+ if self.STUB_COMPUTE_ID:
+ self.useFixture(nova_fixtures.ComputeNodeIdFixture())
def _setup_cells(self):
"""Setup a normal cellsv2 environment.
diff --git a/nova/tests/fixtures/nova.py b/nova/tests/fixtures/nova.py
index 76dc63755d..5fd893e7dc 100644
--- a/nova/tests/fixtures/nova.py
+++ b/nova/tests/fixtures/nova.py
@@ -1863,3 +1863,7 @@ class ComputeNodeIdFixture(fixtures.Fixture):
self.useFixture(fixtures.MockPatch(
'nova.virt.node.write_local_node_uuid',
lambda uuid: None))
+ self.useFixture(fixtures.MockPatch(
+ 'nova.compute.manager.ComputeManager.'
+ '_ensure_existing_node_identity',
+ mock.DEFAULT))
diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py
index c5fed20377..16de724a42 100644
--- a/nova/tests/unit/compute/test_compute_mgr.py
+++ b/nova/tests/unit/compute/test_compute_mgr.py
@@ -91,6 +91,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
# os-brick>=5.1 now uses external file system locks instead of internal
# locks so we need to set up locking
REQUIRES_LOCKING = True
+ STUB_COMPUTE_ID = False
def setUp(self):
super(ComputeManagerUnitTestCase, self).setUp()
@@ -6361,13 +6362,15 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
'two-image': 'existing'}, r)
@mock.patch.object(virt_node, 'write_local_node_uuid')
- def test_ensure_node_uuid_not_needed_version(self, mock_node):
+ @mock.patch.object(virt_node, 'read_local_node_uuid')
+ def test_ensure_node_uuid_not_needed_version(self, mock_read, mock_write):
# Make sure an up-to-date service bypasses the persistence
service_ref = service_obj.Service()
self.assertEqual(service_obj.SERVICE_VERSION, service_ref.version)
- mock_node.assert_not_called()
+ mock_read.return_value = 'not none'
+ mock_write.assert_not_called()
self.compute._ensure_existing_node_identity(service_ref)
- mock_node.assert_not_called()
+ mock_write.assert_not_called()
@mock.patch.object(virt_node, 'write_local_node_uuid')
def test_ensure_node_uuid_not_needed_ironic(self, mock_node):
@@ -6452,6 +6455,20 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase,
mock_get_cn.assert_called_once_with(mock.ANY, self.compute.host)
mock_write_node.assert_called_once_with(str(uuids.compute))
+ @mock.patch.object(virt_node, 'read_local_node_uuid')
+ def test_ensure_node_uuid_missing_file_ironic(self, mock_read):
+ mock_service = mock.MagicMock(
+ version=service_obj.NODE_IDENTITY_VERSION)
+ mock_read.return_value = None
+ self.assertRaises(exception.InvalidConfiguration,
+ self.compute._ensure_existing_node_identity,
+ mock_service)
+ mock_read.assert_called_once_with()
+
+ # Now make sure that ironic causes this exact configuration to pass
+ self.flags(compute_driver='ironic')
+ self.compute._ensure_existing_node_identity(mock_service)
+
def test_ensure_node_uuid_called_by_init_host(self):
# test_init_host() above ensures that we do not call
# _ensure_existing_node_identity() in the service_ref=None case.