diff options
author | Zhou Hao <zhouhao@fujitsu.com> | 2021-09-15 09:01:25 +0800 |
---|---|---|
committer | Iury Gregory Melo Ferreira <iurygregory@gmail.com> | 2021-10-05 06:53:07 +0000 |
commit | 5155ade356eb88c9d8b74b33f88835914e2346f5 (patch) | |
tree | 30dda6b7921a6e062ce0e2efe96a5ac6db40faf6 | |
parent | 7db97242eef774a6641fd65496257907ee3295af (diff) | |
download | ironic-5155ade356eb88c9d8b74b33f88835914e2346f5.tar.gz |
[iRMC] Avoid repeatedly resuming clean after creating raid configuration
Fixed the bug of repeated resume cleaning due to the value of `fgi_status` not being updated correctly when obtaining the
RAID configuration status of the node managed by the `irmc` hardware type.
```
Unexpected error when processing next clean step. TypeError: 'NoneType' object is not subscriptable
```
This `NoneType` error occurs because ironic resumes clean without waiting for IPA to get the clean steps, and then it tries to read the
clean steps which should be a list but actually is none.
During auto clean, resume clean should be triggered by IPA, but in this case, it is triggered by a iRMC periodic task which checks
the progress of running raid config.
This error does not occur every time, sometime raid can be configured, auto clean can complete without error and the node can be deployed successfully.
Story: #2009206
Task: #43265
Signed-off-by: Zhou Hao <zhouhao@fujitsu.com>
Change-Id: I5a1c5708bdc1709e928f0faf7e18396e260dc551
(cherry picked from commit 8a5c672fa96521bd7d340c64b0c6ae2785643951)
3 files changed, 46 insertions, 1 deletions
diff --git a/ironic/drivers/modules/irmc/raid.py b/ironic/drivers/modules/irmc/raid.py index 34d1c3f38..25a856e8c 100644 --- a/ironic/drivers/modules/irmc/raid.py +++ b/ironic/drivers/modules/irmc/raid.py @@ -483,6 +483,7 @@ class IRMCRAID(base.RAIDInterface): if all(fgi_status == 'Idle' for fgi_status in fgi_status_dict.values()): raid_config.update({'fgi_status': RAID_COMPLETED}) + raid_common.update_raid_info(node, raid_config) LOG.info('RAID configuration has completed on ' 'node %(node)s with fgi_status is %(fgi)s', {'node': node_uuid, 'fgi': RAID_COMPLETED}) @@ -506,5 +507,4 @@ class IRMCRAID(base.RAIDInterface): task.process_event('fail') def _resume_cleaning(self, task): - raid_common.update_raid_info(task.node, task.node.raid_config) manager_utils.notify_conductor_resume_clean(task) diff --git a/ironic/tests/unit/drivers/modules/irmc/test_periodic_task.py b/ironic/tests/unit/drivers/modules/irmc/test_periodic_task.py index 6bda0fee6..865f58962 100644 --- a/ironic/tests/unit/drivers/modules/irmc/test_periodic_task.py +++ b/ironic/tests/unit/drivers/modules/irmc/test_periodic_task.py @@ -308,3 +308,41 @@ class iRMCPeriodicTaskTestCase(test_common.BaseIRMCTest): clean_fail_mock.assert_called_once_with(mock.ANY, task, fgi_status_dict) clean_mock.assert_called_once_with(mock.ANY, task) + + @mock.patch('ironic.drivers.modules.irmc.raid.IRMCRAID._resume_cleaning', + autospec=True) + @mock.patch('ironic.drivers.modules.irmc.raid.IRMCRAID._set_clean_failed', + autospec=True) + @mock.patch('ironic.drivers.modules.irmc.raid._get_fgi_status', + autospec=True) + @mock.patch.object(irmc_common, 'get_irmc_report', autospec=True) + @mock.patch.object(task_manager, 'acquire', autospec=True) + def test__query_raid_config_fgi_status_avoid_repeatedly_resume_cleaning( + self, mock_acquire, report_mock, fgi_mock, clean_fail_mock, + clean_mock): + mock_manager = mock.Mock() + raid_config = self.raid_config + fgi_mock.return_value = {'0': 'Idle', '1': 'Idle'} + task = mock.Mock(node=self.node, driver=self.driver) + mock_acquire.return_value = mock.MagicMock( + __enter__=mock.MagicMock(return_value=task)) + task.node.raid_config = raid_config + node_list = [(self.node.uuid, 'irmc', '', raid_config)] + mock_manager.iter_nodes.return_value = node_list + # Set provision state value + task.node.provision_state = 'clean wait' + task.node.save() + task.driver.raid._query_raid_config_fgi_status(mock_manager, + self.context) + raid_config = task.node.raid_config + node_list = [(self.node.uuid, 'irmc', '', raid_config)] + mock_manager.iter_nodes.return_value = node_list + task.node.provision_state = 'clean wait' + task.node.save() + task.driver.raid._query_raid_config_fgi_status(mock_manager, + self.context) + self.assertEqual(0, clean_fail_mock.call_count) + report_mock.assert_called_once_with(task.node) + fgi_mock.assert_called_once_with(report_mock.return_value, + self.node.uuid) + clean_mock.assert_called_once_with(mock.ANY, task) diff --git a/releasenotes/notes/irmc-fix-repeatedly-resuming-clean-020f0dfc2e30d7bc.yaml b/releasenotes/notes/irmc-fix-repeatedly-resuming-clean-020f0dfc2e30d7bc.yaml new file mode 100644 index 000000000..02104cc18 --- /dev/null +++ b/releasenotes/notes/irmc-fix-repeatedly-resuming-clean-020f0dfc2e30d7bc.yaml @@ -0,0 +1,7 @@ +--- +fixes: + - | + Fixed the bug of repeated resume cleaning due to the value of + `fgi_status` not being updated correctly when obtaining the + RAID configuration status of the node managed by the `irmc` + hardware type. |