summaryrefslogtreecommitdiff
path: root/nova/tests/functional/test_servers.py
diff options
context:
space:
mode:
authorMatt Riedemann <mriedem.os@gmail.com>2019-12-13 17:47:42 -0500
committerMatt Riedemann <mriedem.os@gmail.com>2019-12-16 17:02:29 -0500
commit95c9d710dd6a06b8d2fcd941343075872cdf95d5 (patch)
tree07d1b40e62368324fe7d32b5087e8639711c9c59 /nova/tests/functional/test_servers.py
parentd052e72040258b6ab91f53b43e8d45b330b898e1 (diff)
downloadnova-95c9d710dd6a06b8d2fcd941343075872cdf95d5.tar.gz
Add recreate test for bug 1855927
When _poll_unconfirmed_resizes runs, if the actual resize confirm fails on the source host the migration status may be stuck in "confirming" status if it never reached the source compute. Subsequent runs of _poll_unconfirmed_resizes will not be able to auto-confirm the resize nor will the user be able to manually confirm the resize. An admin could reset the status on the server to ACTIVE or ERROR but that means the source compute never gets cleaned up since you can only confirm or revert a resize on a server with VERIFY_RESIZE status. This adds a functional recreate test for the bug. Note that if the request does get to the source compute and fails, the _error_out_instance_on_exception context manager should set the instance to ERROR status and the errors_out_migration decorator should set the migration status to "error". The problem is if we never get that far we can't recover automatically or via the API without operator intervention. Change-Id: I93e27d907a237ae16b6b4c7202ad157b067aa30e Related-Bug: #1855927
Diffstat (limited to 'nova/tests/functional/test_servers.py')
-rw-r--r--nova/tests/functional/test_servers.py66
1 files changed, 66 insertions, 0 deletions
diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py
index f46477864a..5e4029fa57 100644
--- a/nova/tests/functional/test_servers.py
+++ b/nova/tests/functional/test_servers.py
@@ -27,6 +27,7 @@ from oslo_config import cfg
from oslo_log import log as logging
from oslo_serialization import base64
from oslo_serialization import jsonutils
+from oslo_utils import fixture as osloutils_fixture
from oslo_utils.fixture import uuidsentinel as uuids
from oslo_utils import timeutils
import six
@@ -46,6 +47,7 @@ from nova.tests import fixtures as nova_fixtures
from nova.tests.functional.api import client
from nova.tests.functional import integrated_helpers
from nova.tests.unit.api.openstack import fakes
+from nova.tests.unit import cast_as_call
from nova.tests.unit import fake_block_device
from nova.tests.unit import fake_notifier
from nova.tests.unit import fake_requests
@@ -3395,6 +3397,70 @@ class ServerMovingTests(integrated_helpers.ProviderUsageBaseTestCase):
self._delete_and_check_allocations(server)
+class PollUnconfirmedResizesTest(integrated_helpers.ProviderUsageBaseTestCase):
+ """Tests for the _poll_unconfirmed_resizes periodic task."""
+ compute_driver = 'fake.MediumFakeDriver'
+
+ def setUp(self):
+ super(PollUnconfirmedResizesTest, self).setUp()
+ # Start two computes for the resize.
+ self._start_compute('host1')
+ self._start_compute('host2')
+
+ def test_source_host_down_during_confirm(self):
+ """Tests the scenario that between the time that the server goes to
+ VERIFY_RESIZE status and the _poll_unconfirmed_resizes periodic task
+ runs the source compute service goes down so the confirm task fails.
+ """
+ server = self._build_minimal_create_server_request(
+ name='test_source_host_down_during_confirm',
+ image_uuid=nova.tests.unit.image.fake.get_valid_image_id(),
+ networks='none', host='host1')
+ server = self.api.post_server({'server': server})
+ server = self._wait_for_state_change(server, 'ACTIVE')
+ # Cold migrate the server to the other host.
+ self.api.post_server_action(server['id'], {'migrate': None})
+ server = self._wait_for_state_change(server, 'VERIFY_RESIZE')
+ self.assertEqual('host2', server['OS-EXT-SRV-ATTR:host'])
+ # Make sure the migration is finished.
+ self._wait_for_migration_status(server, ['finished'])
+ # Stop and force down the source compute service.
+ self.computes['host1'].stop()
+ source_service = self.api.get_services(
+ binary='nova-compute', host='host1')[0]
+ self.api.put_service(
+ source_service['id'], {'status': 'disabled', 'forced_down': True})
+ # Now configure auto-confirm and call the method on the target compute
+ # so we do not have to wait for the periodic to run. Also configure
+ # the RPC call from the API to the source compute to timeout after 1
+ # second.
+ self.flags(resize_confirm_window=1, rpc_response_timeout=1)
+ # Stub timeutils so the DB API query finds the unconfirmed migration.
+ future = timeutils.utcnow() + datetime.timedelta(hours=1)
+ ctxt = context.get_admin_context()
+ with osloutils_fixture.TimeFixture(future):
+ # Use the CastAsCall fixture since the fake rpc is not going to
+ # simulate a failure from the service being down.
+ with cast_as_call.CastAsCall(self):
+ self.computes['host2'].manager._poll_unconfirmed_resizes(ctxt)
+ self.assertIn('Error auto-confirming resize',
+ self.stdlog.logger.output)
+ self.assertIn('No reply on topic compute', self.stdlog.logger.output)
+ # FIXME(mriedem): This is bug 1855927 where the migration status is
+ # left in "confirming" so the _poll_unconfirmed_resizes task will not
+ # process it again nor can the user confirm the resize in the API since
+ # the migration status is not "finished".
+ self._wait_for_migration_status(server, ['confirming'])
+ ex = self.assertRaises(client.OpenStackApiException,
+ self.api.post_server_action,
+ server['id'], {'confirmResize': None})
+ self.assertEqual(400, ex.response.status_code)
+ self.assertIn('Instance has not been resized', six.text_type(ex))
+ # That error message is bogus because the server is resized.
+ server = self.api.get_server(server['id'])
+ self.assertEqual('VERIFY_RESIZE', server['status'])
+
+
class ServerLiveMigrateForceAndAbort(
integrated_helpers.ProviderUsageBaseTestCase):
"""Test Server live migrations, which delete the migration or