diff options
author | Matt Riedemann <mriedem.os@gmail.com> | 2019-12-13 17:47:42 -0500 |
---|---|---|
committer | Matt Riedemann <mriedem.os@gmail.com> | 2019-12-16 17:02:29 -0500 |
commit | 95c9d710dd6a06b8d2fcd941343075872cdf95d5 (patch) | |
tree | 07d1b40e62368324fe7d32b5087e8639711c9c59 /nova/tests/functional/test_servers.py | |
parent | d052e72040258b6ab91f53b43e8d45b330b898e1 (diff) | |
download | nova-95c9d710dd6a06b8d2fcd941343075872cdf95d5.tar.gz |
Add recreate test for bug 1855927
When _poll_unconfirmed_resizes runs, if the actual resize
confirm fails on the source host the migration status may
be stuck in "confirming" status if it never reached the
source compute. Subsequent runs of _poll_unconfirmed_resizes
will not be able to auto-confirm the resize nor will the user
be able to manually confirm the resize. An admin could reset
the status on the server to ACTIVE or ERROR but that means
the source compute never gets cleaned up since you can only
confirm or revert a resize on a server with VERIFY_RESIZE status.
This adds a functional recreate test for the bug.
Note that if the request does get to the source compute and fails,
the _error_out_instance_on_exception context manager should set
the instance to ERROR status and the errors_out_migration decorator
should set the migration status to "error". The problem is if we
never get that far we can't recover automatically or via the API
without operator intervention.
Change-Id: I93e27d907a237ae16b6b4c7202ad157b067aa30e
Related-Bug: #1855927
Diffstat (limited to 'nova/tests/functional/test_servers.py')
-rw-r--r-- | nova/tests/functional/test_servers.py | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py index f46477864a..5e4029fa57 100644 --- a/nova/tests/functional/test_servers.py +++ b/nova/tests/functional/test_servers.py @@ -27,6 +27,7 @@ from oslo_config import cfg from oslo_log import log as logging from oslo_serialization import base64 from oslo_serialization import jsonutils +from oslo_utils import fixture as osloutils_fixture from oslo_utils.fixture import uuidsentinel as uuids from oslo_utils import timeutils import six @@ -46,6 +47,7 @@ from nova.tests import fixtures as nova_fixtures from nova.tests.functional.api import client from nova.tests.functional import integrated_helpers from nova.tests.unit.api.openstack import fakes +from nova.tests.unit import cast_as_call from nova.tests.unit import fake_block_device from nova.tests.unit import fake_notifier from nova.tests.unit import fake_requests @@ -3395,6 +3397,70 @@ class ServerMovingTests(integrated_helpers.ProviderUsageBaseTestCase): self._delete_and_check_allocations(server) +class PollUnconfirmedResizesTest(integrated_helpers.ProviderUsageBaseTestCase): + """Tests for the _poll_unconfirmed_resizes periodic task.""" + compute_driver = 'fake.MediumFakeDriver' + + def setUp(self): + super(PollUnconfirmedResizesTest, self).setUp() + # Start two computes for the resize. + self._start_compute('host1') + self._start_compute('host2') + + def test_source_host_down_during_confirm(self): + """Tests the scenario that between the time that the server goes to + VERIFY_RESIZE status and the _poll_unconfirmed_resizes periodic task + runs the source compute service goes down so the confirm task fails. + """ + server = self._build_minimal_create_server_request( + name='test_source_host_down_during_confirm', + image_uuid=nova.tests.unit.image.fake.get_valid_image_id(), + networks='none', host='host1') + server = self.api.post_server({'server': server}) + server = self._wait_for_state_change(server, 'ACTIVE') + # Cold migrate the server to the other host. + self.api.post_server_action(server['id'], {'migrate': None}) + server = self._wait_for_state_change(server, 'VERIFY_RESIZE') + self.assertEqual('host2', server['OS-EXT-SRV-ATTR:host']) + # Make sure the migration is finished. + self._wait_for_migration_status(server, ['finished']) + # Stop and force down the source compute service. + self.computes['host1'].stop() + source_service = self.api.get_services( + binary='nova-compute', host='host1')[0] + self.api.put_service( + source_service['id'], {'status': 'disabled', 'forced_down': True}) + # Now configure auto-confirm and call the method on the target compute + # so we do not have to wait for the periodic to run. Also configure + # the RPC call from the API to the source compute to timeout after 1 + # second. + self.flags(resize_confirm_window=1, rpc_response_timeout=1) + # Stub timeutils so the DB API query finds the unconfirmed migration. + future = timeutils.utcnow() + datetime.timedelta(hours=1) + ctxt = context.get_admin_context() + with osloutils_fixture.TimeFixture(future): + # Use the CastAsCall fixture since the fake rpc is not going to + # simulate a failure from the service being down. + with cast_as_call.CastAsCall(self): + self.computes['host2'].manager._poll_unconfirmed_resizes(ctxt) + self.assertIn('Error auto-confirming resize', + self.stdlog.logger.output) + self.assertIn('No reply on topic compute', self.stdlog.logger.output) + # FIXME(mriedem): This is bug 1855927 where the migration status is + # left in "confirming" so the _poll_unconfirmed_resizes task will not + # process it again nor can the user confirm the resize in the API since + # the migration status is not "finished". + self._wait_for_migration_status(server, ['confirming']) + ex = self.assertRaises(client.OpenStackApiException, + self.api.post_server_action, + server['id'], {'confirmResize': None}) + self.assertEqual(400, ex.response.status_code) + self.assertIn('Instance has not been resized', six.text_type(ex)) + # That error message is bogus because the server is resized. + server = self.api.get_server(server['id']) + self.assertEqual('VERIFY_RESIZE', server['status']) + + class ServerLiveMigrateForceAndAbort( integrated_helpers.ProviderUsageBaseTestCase): """Test Server live migrations, which delete the migration or |