diff options
author | Hironori Shiina <shiina.hironori@jp.fujitsu.com> | 2018-02-08 18:03:09 +0900 |
---|---|---|
committer | Thomas Bechtold <tbechtold@suse.com> | 2018-04-25 09:04:58 +0200 |
commit | a55331cf084f0dd12796581605585d5bdfb22ecb (patch) | |
tree | 7531c682342fb825a37146be1a45b51dc88a88ba | |
parent | 1204f667851b5b71daa07ecf9cc20d984899e404 (diff) | |
download | ironic-a55331cf084f0dd12796581605585d5bdfb22ecb.tar.gz |
Remove too large configdrive for handling error
When configdrive is too large, a node object cannot be saved to DB. If
it happens, the node cannot moved to DEPLOYFAIL because saving the node
is prevented again by the large configdrive in the object. In this
case, the node gets stuck in DEPLOYING, which doesn't allow any state
transition.
This patch removes the configdrive from a node object when storing the
configdrive fails. This also catches ConfigInvalid exception, which is
mentioned in the docsting, and any unexpected exception from
_store_configdrive() to avoid getting a node stuck in DEPLOYING.
(cherry picked from commit 927c487a0f822ab789f38be5f895fe061e5e71cb)
Change-Id: I83cf3e02622fc3ed8f5b5389f533e374c1b985f3
Closes-Bug: 1745630
-rw-r--r-- | ironic/conductor/manager.py | 23 | ||||
-rw-r--r-- | ironic/tests/unit/conductor/test_manager.py | 63 | ||||
-rw-r--r-- | releasenotes/notes/bug-1745630-d28c8de54cebd329.yaml | 8 |
3 files changed, 89 insertions, 5 deletions
diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index fae7d56ff..c6221a1e6 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -50,6 +50,7 @@ import eventlet from futurist import periodics from futurist import waiters from ironic_lib import metrics_utils +from oslo_db import exception as db_exception from oslo_log import log import oslo_messaging as messaging from oslo_utils import excutils @@ -2810,6 +2811,7 @@ def _store_configdrive(node, configdrive): i_info = node.instance_info i_info['configdrive'] = configdrive node.instance_info = i_info + node.save() @METRICS.timer('do_node_deploy') @@ -2829,7 +2831,7 @@ def do_node_deploy(task, conductor_id, configdrive=None): try: if configdrive: _store_configdrive(node, configdrive) - except exception.SwiftOperationError as e: + except (exception.SwiftOperationError, exception.ConfigInvalid) as e: with excutils.save_and_reraise_exception(): handle_failure( e, task, @@ -2837,6 +2839,25 @@ def do_node_deploy(task, conductor_id, configdrive=None): '%(node)s to Swift'), _('Failed to upload the configdrive to Swift. ' 'Error: %s')) + except db_exception.DBDataError as e: + with excutils.save_and_reraise_exception(): + # NOTE(hshiina): This error happens when the configdrive is + # too large. Remove the configdrive from the + # object to update DB successfully in handling + # the failure. + node.obj_reset_changes() + handle_failure( + e, task, + ('Error while storing the configdrive for %(node)s into ' + 'the database: %(err)s'), + _("Failed to store the configdrive in the database. : %s")) + except Exception as e: + with excutils.save_and_reraise_exception(): + handle_failure( + e, task, + ('Unexpected error while preparing the configdrive for ' + 'node %(node)s'), + _("Failed to prepare the configdrive. Exception: %s")) try: task.driver.deploy.prepare(task) diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py index b743d42c6..619451efd 100644 --- a/ironic/tests/unit/conductor/test_manager.py +++ b/ironic/tests/unit/conductor/test_manager.py @@ -24,6 +24,7 @@ import datetime import eventlet import mock from oslo_config import cfg +from oslo_db import exception as db_exception import oslo_messaging as messaging from oslo_utils import uuidutils from oslo_versionedobjects import base as ovo_base @@ -48,7 +49,6 @@ from ironic.drivers.modules.network import flat as n_flat from ironic import objects from ironic.objects import base as obj_base from ironic.objects import fields as obj_fields -from ironic.tests import base as tests_base from ironic.tests.unit.conductor import mgr_utils from ironic.tests.unit.db import base as db_base from ironic.tests.unit.db import utils as db_utils @@ -1540,6 +1540,59 @@ class DoNodeDeployTearDownTestCase(mgr_utils.ServiceSetUpMixin, self.assertFalse(mock_deploy.called) @mock.patch('ironic.drivers.modules.fake.FakeDeploy.deploy') + def test__do_node_deploy_configdrive_db_error(self, mock_deploy): + self._start_service() + node = obj_utils.create_test_node(self.context, driver='fake', + provision_state=states.DEPLOYING, + target_provision_state=states.ACTIVE) + task = task_manager.TaskManager(self.context, node.uuid) + task.node.save() + expected_instance_info = dict(node.instance_info) + with mock.patch.object(dbapi.IMPL, 'update_node') as mock_db: + db_node = self.dbapi.get_node_by_uuid(node.uuid) + mock_db.side_effect = [db_exception.DBDataError('DB error'), + db_node, db_node] + self.assertRaises(db_exception.DBDataError, + manager.do_node_deploy, task, + self.service.conductor.id, + configdrive=b'fake config drive') + expected_instance_info.update(configdrive=b'fake config drive') + expected_calls = [ + mock.call(node.uuid, + {'version': mock.ANY, + 'instance_info': expected_instance_info}), + mock.call(node.uuid, + {'version': mock.ANY, + 'provision_state': states.DEPLOYFAIL, + 'target_provision_state': states.ACTIVE}), + mock.call(node.uuid, + {'version': mock.ANY, + 'last_error': mock.ANY})] + self.assertEqual(expected_calls, mock_db.mock_calls) + self.assertFalse(mock_deploy.called) + + @mock.patch.object(manager, '_store_configdrive') + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.deploy') + def test__do_node_deploy_configdrive_unexpected_error(self, mock_deploy, + mock_store): + self._start_service() + node = obj_utils.create_test_node(self.context, driver='fake', + provision_state=states.DEPLOYING, + target_provision_state=states.ACTIVE) + task = task_manager.TaskManager(self.context, node.uuid) + + mock_store.side_effect = RuntimeError('unexpected') + self.assertRaises(RuntimeError, + manager.do_node_deploy, task, + self.service.conductor.id, + configdrive=b'fake config drive') + node.refresh() + self.assertEqual(states.DEPLOYFAIL, node.provision_state) + self.assertEqual(states.ACTIVE, node.target_provision_state) + self.assertIsNotNone(node.last_error) + self.assertFalse(mock_deploy.called) + + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.deploy') def test__do_node_deploy_ok_2(self, mock_deploy): # NOTE(rloo): a different way of testing for the same thing as in # test__do_node_deploy_ok() @@ -5284,16 +5337,17 @@ class ManagerSyncLocalStateTestCase(mgr_utils.CommonMixIn, db_base.DbTestCase): @mock.patch.object(swift, 'SwiftAPI') -class StoreConfigDriveTestCase(tests_base.TestCase): +class StoreConfigDriveTestCase(db_base.DbTestCase): def setUp(self): super(StoreConfigDriveTestCase, self).setUp() - self.node = obj_utils.get_test_node(self.context, driver='fake', - instance_info=None) + self.node = obj_utils.create_test_node(self.context, driver='fake', + instance_info=None) def test_store_configdrive(self, mock_swift): manager._store_configdrive(self.node, 'foo') expected_instance_info = {'configdrive': 'foo'} + self.node.refresh() self.assertEqual(expected_instance_info, self.node.instance_info) self.assertFalse(mock_swift.called) @@ -5321,6 +5375,7 @@ class StoreConfigDriveTestCase(tests_base.TestCase): object_headers=expected_obj_header) mock_swift.return_value.get_temp_url.assert_called_once_with( container_name, expected_obj_name, timeout) + self.node.refresh() self.assertEqual(expected_instance_info, self.node.instance_info) diff --git a/releasenotes/notes/bug-1745630-d28c8de54cebd329.yaml b/releasenotes/notes/bug-1745630-d28c8de54cebd329.yaml new file mode 100644 index 000000000..5fafb4be5 --- /dev/null +++ b/releasenotes/notes/bug-1745630-d28c8de54cebd329.yaml @@ -0,0 +1,8 @@ +--- +fixes: + - | + Fixes a bug to get a node stuck in ``deploying`` state when the size of + the configdrive exceeds the limitation of the database. In MySQL, the + limitation is about 64KiB. With this fix, the provision state gets + ``deploy failed`` in this case. See `bug 1745630 + <https://bugs.launchpad.net/ironic/+bug/1745630>`_ for details. |