diff options
author | Zuul <zuul@review.openstack.org> | 2018-08-07 15:36:43 +0000 |
---|---|---|
committer | Gerrit Code Review <review@openstack.org> | 2018-08-07 15:36:43 +0000 |
commit | 1364df4361e0746aef89ea1224f45d61f8cb9990 (patch) | |
tree | b1d457c075fb59828c5732a73ce70ee9b410cfe3 /ironic | |
parent | a62a029f670b783ac82ec5b7e2b340666644514b (diff) | |
parent | d70db292aeadc99434c002a042da0993150c32fc (diff) | |
download | ironic-1364df4361e0746aef89ea1224f45d61f8cb9990.tar.gz |
Merge "Node gets stuck in ING state when conductor goes down"
Diffstat (limited to 'ironic')
-rw-r--r-- | ironic/common/states.py | 9 | ||||
-rw-r--r-- | ironic/conductor/base_manager.py | 14 | ||||
-rw-r--r-- | ironic/tests/unit/conductor/test_base_manager.py | 23 | ||||
-rw-r--r-- | ironic/tests/unit/conductor/test_manager.py | 22 |
4 files changed, 49 insertions, 19 deletions
diff --git a/ironic/common/states.py b/ironic/common/states.py index cef2459f5..6a4d5013d 100644 --- a/ironic/common/states.py +++ b/ironic/common/states.py @@ -234,6 +234,15 @@ UNSTABLE_STATES = (DEPLOYING, DEPLOYWAIT, CLEANING, CLEANWAIT, VERIFYING, RESCUEWAIT, UNRESCUING) """States that can be changed without external request.""" +STUCK_STATES_TREATED_AS_FAIL = (DEPLOYING, CLEANING, VERIFYING, INSPECTING, + ADOPTING, RESCUING, UNRESCUING) +"""States that cannot be resumed once a conductor dies. + +If a node gets stuck with one of these states for some reason +(eg. conductor goes down when executing task), node will be moved +to fail state. +""" + ############## # Power states ############## diff --git a/ironic/conductor/base_manager.py b/ironic/conductor/base_manager.py index 1be8274b3..5156cbc94 100644 --- a/ironic/conductor/base_manager.py +++ b/ironic/conductor/base_manager.py @@ -179,14 +179,12 @@ class BaseConductorManager(object): self._periodic_tasks_worker.add_done_callback( self._on_periodic_tasks_stop) - self._fail_transient_state( - states.DEPLOYING, - _("The deployment can't be resumed by conductor " - "%s. Moving to fail state.") % self.host) - self._fail_transient_state( - states.CLEANING, - _("The cleaning can't be resumed by conductor " - "%s. Moving to fail state.") % self.host) + for state in states.STUCK_STATES_TREATED_AS_FAIL: + self._fail_transient_state( + state, + _("The %(state)s state can't be resumed by conductor " + "%(host)s. Moving to fail state.") % + {'state': state, 'host': self.host}) # Start consoles if it set enabled in a greenthread. try: diff --git a/ironic/tests/unit/conductor/test_base_manager.py b/ironic/tests/unit/conductor/test_base_manager.py index 5de426afc..0c619d912 100644 --- a/ironic/tests/unit/conductor/test_base_manager.py +++ b/ironic/tests/unit/conductor/test_base_manager.py @@ -13,6 +13,7 @@ """Test class for Ironic BaseConductorManager.""" import collections +import uuid import eventlet import futurist @@ -24,6 +25,7 @@ from oslo_utils import uuidutils from ironic.common import driver_factory from ironic.common import exception +from ironic.common import states from ironic.conductor import base_manager from ironic.conductor import manager from ironic.conductor import notification_utils @@ -206,6 +208,27 @@ class StartStopTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): self.assertRaisesRegex(RuntimeError, 'already running', self.service.init_host) + def test_start_recover_nodes_stuck(self): + state_trans = [ + (states.DEPLOYING, states.DEPLOYFAIL), + (states.CLEANING, states.CLEANFAIL), + (states.VERIFYING, states.ENROLL), + (states.INSPECTING, states.INSPECTFAIL), + (states.ADOPTING, states.ADOPTFAIL), + (states.RESCUING, states.RESCUEFAIL), + (states.UNRESCUING, states.UNRESCUEFAIL), + ] + nodes = [obj_utils.create_test_node(self.context, uuid=uuid.uuid4(), + driver='fake-hardware', + provision_state=state[0]) + for state in state_trans] + + self._start_service() + for node, state in zip(nodes, state_trans): + node.refresh() + self.assertEqual(state[1], node.provision_state, + 'Test failed when recovering from %s' % state[0]) + @mock.patch.object(base_manager, 'LOG') def test_warning_on_low_workers_pool(self, log_mock): CONF.set_override('workers_pool_size', 3, 'conductor') diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py index 80bd7a466..03c9982bd 100644 --- a/ironic/tests/unit/conductor/test_manager.py +++ b/ironic/tests/unit/conductor/test_manager.py @@ -4035,11 +4035,11 @@ class DoNodeRescueTestCase(mgr_utils.CommonMixIn, mgr_utils.ServiceSetUpMixin, @mock.patch('ironic.drivers.modules.fake.FakeRescue.rescue') def test__do_node_rescue_returns_rescuewait(self, mock_rescue): + self._start_service() node = obj_utils.create_test_node(self.context, driver='fake-hardware', provision_state=states.RESCUING, instance_info={'rescue_password': 'password'}) - self._start_service() with task_manager.TaskManager(self.context, node.uuid) as task: mock_rescue.return_value = states.RESCUEWAIT self.service._do_node_rescue(task) @@ -4050,11 +4050,11 @@ class DoNodeRescueTestCase(mgr_utils.CommonMixIn, mgr_utils.ServiceSetUpMixin, @mock.patch('ironic.drivers.modules.fake.FakeRescue.rescue') def test__do_node_rescue_returns_rescue(self, mock_rescue): + self._start_service() node = obj_utils.create_test_node(self.context, driver='fake-hardware', provision_state=states.RESCUING, instance_info={'rescue_password': 'password'}) - self._start_service() with task_manager.TaskManager(self.context, node.uuid) as task: mock_rescue.return_value = states.RESCUE self.service._do_node_rescue(task) @@ -4066,11 +4066,11 @@ class DoNodeRescueTestCase(mgr_utils.CommonMixIn, mgr_utils.ServiceSetUpMixin, @mock.patch.object(manager, 'LOG') @mock.patch('ironic.drivers.modules.fake.FakeRescue.rescue') def test__do_node_rescue_errors(self, mock_rescue, mock_log): + self._start_service() node = obj_utils.create_test_node(self.context, driver='fake-hardware', provision_state=states.RESCUING, instance_info={'rescue_password': 'password'}) - self._start_service() mock_rescue.side_effect = exception.InstanceRescueFailure( 'failed to rescue') with task_manager.TaskManager(self.context, node.uuid) as task: @@ -4086,11 +4086,11 @@ class DoNodeRescueTestCase(mgr_utils.CommonMixIn, mgr_utils.ServiceSetUpMixin, @mock.patch.object(manager, 'LOG') @mock.patch('ironic.drivers.modules.fake.FakeRescue.rescue') def test__do_node_rescue_bad_state(self, mock_rescue, mock_log): + self._start_service() node = obj_utils.create_test_node(self.context, driver='fake-hardware', provision_state=states.RESCUING, instance_info={'rescue_password': 'password'}) - self._start_service() mock_rescue.return_value = states.ACTIVE with task_manager.TaskManager(self.context, node.uuid) as task: self.service._do_node_rescue(task) @@ -4156,11 +4156,11 @@ class DoNodeRescueTestCase(mgr_utils.CommonMixIn, mgr_utils.ServiceSetUpMixin, @mock.patch('ironic.drivers.modules.fake.FakeRescue.unrescue') def test__do_node_unrescue(self, mock_unrescue): + self._start_service() node = obj_utils.create_test_node(self.context, driver='fake-hardware', provision_state=states.UNRESCUING, target_provision_state=states.ACTIVE, instance_info={}) - self._start_service() with task_manager.TaskManager(self.context, node.uuid) as task: mock_unrescue.return_value = states.ACTIVE self.service._do_node_unrescue(task) @@ -4171,11 +4171,11 @@ class DoNodeRescueTestCase(mgr_utils.CommonMixIn, mgr_utils.ServiceSetUpMixin, @mock.patch.object(manager, 'LOG') @mock.patch('ironic.drivers.modules.fake.FakeRescue.unrescue') def test__do_node_unrescue_ironic_error(self, mock_unrescue, mock_log): + self._start_service() node = obj_utils.create_test_node(self.context, driver='fake-hardware', provision_state=states.UNRESCUING, target_provision_state=states.ACTIVE, instance_info={}) - self._start_service() mock_unrescue.side_effect = exception.InstanceUnrescueFailure( 'Unable to unrescue') with task_manager.TaskManager(self.context, node.uuid) as task: @@ -4190,11 +4190,11 @@ class DoNodeRescueTestCase(mgr_utils.CommonMixIn, mgr_utils.ServiceSetUpMixin, @mock.patch.object(manager, 'LOG') @mock.patch('ironic.drivers.modules.fake.FakeRescue.unrescue') def test__do_node_unrescue_other_error(self, mock_unrescue, mock_log): + self._start_service() node = obj_utils.create_test_node(self.context, driver='fake-hardware', provision_state=states.UNRESCUING, target_provision_state=states.ACTIVE, instance_info={}) - self._start_service() mock_unrescue.side_effect = RuntimeError('Some failure') with task_manager.TaskManager(self.context, node.uuid) as task: self.assertRaises(RuntimeError, @@ -4207,10 +4207,10 @@ class DoNodeRescueTestCase(mgr_utils.CommonMixIn, mgr_utils.ServiceSetUpMixin, @mock.patch('ironic.drivers.modules.fake.FakeRescue.unrescue') def test__do_node_unrescue_bad_state(self, mock_unrescue): + self._start_service() node = obj_utils.create_test_node(self.context, driver='fake-hardware', provision_state=states.UNRESCUING, instance_info={}) - self._start_service() mock_unrescue.return_value = states.RESCUEWAIT with task_manager.TaskManager(self.context, node.uuid) as task: self.service._do_node_unrescue(task) @@ -4272,6 +4272,7 @@ class DoNodeVerifyTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): @mock.patch('ironic.drivers.modules.fake.FakePower.validate') def test__do_node_verify(self, mock_validate, mock_get_power_state, mock_notif): + self._start_service() mock_get_power_state.return_value = states.POWER_OFF # Required for exception handling mock_notif.__name__ = 'NodeCorrectedPowerStateNotification' @@ -4282,7 +4283,6 @@ class DoNodeVerifyTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): last_error=None, power_state=states.NOSTATE) - self._start_service() with task_manager.acquire( self.context, node['id'], shared=False) as task: self.service._do_node_verify(task) @@ -4311,6 +4311,7 @@ class DoNodeVerifyTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): @mock.patch('ironic.drivers.modules.fake.FakePower.validate') def test__do_node_verify_validation_fails(self, mock_validate, mock_get_power_state): + self._start_service() node = obj_utils.create_test_node( self.context, driver='fake-hardware', provision_state=states.VERIFYING, @@ -4320,7 +4321,6 @@ class DoNodeVerifyTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): mock_validate.side_effect = RuntimeError("boom") - self._start_service() with task_manager.acquire( self.context, node['id'], shared=False) as task: self.service._do_node_verify(task) @@ -4339,6 +4339,7 @@ class DoNodeVerifyTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): @mock.patch('ironic.drivers.modules.fake.FakePower.validate') def test__do_node_verify_get_state_fails(self, mock_validate, mock_get_power_state): + self._start_service() node = obj_utils.create_test_node( self.context, driver='fake-hardware', provision_state=states.VERIFYING, @@ -4348,7 +4349,6 @@ class DoNodeVerifyTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): mock_get_power_state.side_effect = RuntimeError("boom") - self._start_service() with task_manager.acquire( self.context, node['id'], shared=False) as task: self.service._do_node_verify(task) |