From 8b920d9af9acf83eb2aa60a11159fad3b6a0bf3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aija=20Jaunt=C4=93va?= Date: Tue, 4 Jan 2022 04:42:51 -0500 Subject: Fix redfish RAID failed tasks Redfish Tasks do not report failures via HTTP status codes. Instead have to check TaskState and TaskStatus. Story: 2009767 Task: 44244 Change-Id: I8f9a89d18d22d18a2e695fde894bcd27f18f8954 (cherry picked from commit 196d8f7fc055667af501ea0569ba5c501a2c6e8d) --- ironic/drivers/modules/redfish/raid.py | 73 +++++++++---------- .../unit/drivers/modules/redfish/test_raid.py | 84 ++++++++++++++++++++++ .../unit/drivers/third_party_driver_mock_specs.py | 1 + .../tests/unit/drivers/third_party_driver_mocks.py | 1 + ...redfish-raid-failed-tasks-02487c4698dea176.yaml | 6 ++ 5 files changed, 126 insertions(+), 39 deletions(-) create mode 100644 releasenotes/notes/fix-redfish-raid-failed-tasks-02487c4698dea176.yaml diff --git a/ironic/drivers/modules/redfish/raid.py b/ironic/drivers/modules/redfish/raid.py index f726d9d57..ce58ca3c7 100644 --- a/ironic/drivers/modules/redfish/raid.py +++ b/ironic/drivers/modules/redfish/raid.py @@ -1110,60 +1110,55 @@ class RedfishRAID(base.RAIDInterface): '%(node)s was already locked by another process. ' 'Skip.', {'node': node_uuid}) - def _get_error_messages(self, response): - try: - body = response.json() - except ValueError: - return [] - else: - error = body.get('error', {}) - code = error.get('code', '') - message = error.get('message', code) - ext_info = error.get('@Message.ExtendedInfo', [{}]) - messages = [m.get('Message') for m in ext_info if 'Message' in m] - if not messages and message: - messages = [message] - return messages - def _raid_config_in_progress(self, task, raid_config): - """Check if this RAID configuration operation is still in progress.""" + """Check if this RAID configuration operation is still in progress. + + :param task: TaskManager object containing the node. + :param raid_config: RAID configuration operation details. + :returns: True, if still in progress, otherwise False. + """ task_monitor_uri = raid_config['task_monitor_uri'] try: task_monitor = redfish_utils.get_task_monitor(task.node, task_monitor_uri) except exception.RedfishError: - LOG.info('Unable to get status of RAID %(operation)s task to node ' - '%(node_uuid)s; assuming task completed successfully', + LOG.info('Unable to get status of RAID %(operation)s task ' + '%(task_mon_uri)s to node %(node_uuid)s; assuming task ' + 'completed successfully', {'operation': raid_config['operation'], + 'task_mon_uri': task_monitor_uri, 'node_uuid': task.node.uuid}) return False if task_monitor.is_processing: - LOG.debug('RAID %(operation)s task %(task_mon)s to node ' + LOG.debug('RAID %(operation)s task %(task_mon_uri)s to node ' '%(node_uuid)s still in progress', {'operation': raid_config['operation'], - 'task_mon': task_monitor.task_monitor_uri, + 'task_mon_uri': task_monitor.task_monitor_uri, 'node_uuid': task.node.uuid}) return True else: - response = task_monitor.response - if response is not None: - status_code = response.status_code - if status_code >= 400: - messages = self._get_error_messages(response) - LOG.error('RAID %(operation)s task to node ' - '%(node_uuid)s failed with status ' - '%(status_code)s; messages: %(messages)s', - {'operation': raid_config['operation'], - 'node_uuid': task.node.uuid, - 'status_code': status_code, - 'messages': ", ".join(messages)}) - else: - LOG.info('RAID %(operation)s task to node ' - '%(node_uuid)s completed with status ' - '%(status_code)s', - {'operation': raid_config['operation'], - 'node_uuid': task.node.uuid, - 'status_code': status_code}) + sushy_task = task_monitor.get_task() + messages = [] + if sushy_task.messages and not sushy_task.messages[0].message: + sushy_task.parse_messages() + + messages = [m.message for m in sushy_task.messages] + + if (sushy_task.task_state == sushy.TASK_STATE_COMPLETED + and sushy_task.task_status in + [sushy.HEALTH_OK, sushy.HEALTH_WARNING]): + LOG.info('RAID %(operation)s task %(task_mon_uri)s to node ' + '%(node_uuid)s completed.', + {'operation': raid_config['operation'], + 'task_mon_uri': task_monitor.task_monitor_uri, + 'node_uuid': task.node.uuid}) + else: + LOG.error('RAID %(operation)s task %(task_mon_uri)s to node ' + '%(node_uuid)s failed; messages: %(messages)s', + {'operation': raid_config['operation'], + 'task_mon_uri': task_monitor.task_monitor_uri, + 'node_uuid': task.node.uuid, + 'messages': ", ".join(messages)}) return False @METRICS.timer('RedfishRAID._check_node_raid_config') diff --git a/ironic/tests/unit/drivers/modules/redfish/test_raid.py b/ironic/tests/unit/drivers/modules/redfish/test_raid.py index 59178570a..74a3adf99 100644 --- a/ironic/tests/unit/drivers/modules/redfish/test_raid.py +++ b/ironic/tests/unit/drivers/modules/redfish/test_raid.py @@ -1058,3 +1058,87 @@ class RedfishRAIDTestCase(db_base.DbTestCase): self.assertEqual(storage, self.mock_storage) nonraid_storage.drives.assert_not_called() + + @mock.patch.object(redfish_utils, 'get_task_monitor', autospec=True) + @mock.patch.object(redfish_raid.LOG, 'info', autospec=True) + def test__raid_config_in_progress_success( + self, mock_info, mock_get_task_monitor, mock_get_system): + mock_task = mock.Mock() + mock_task.task_state = sushy.TASK_STATE_COMPLETED + mock_task.task_status = sushy.HEALTH_OK + mock_task.messages = [] + mock_task_monitor = mock.Mock() + mock_task_monitor.is_processing = False + mock_task_monitor.get_task.return_value = mock_task + mock_get_task_monitor.return_value = mock_task_monitor + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + + raid = redfish_raid.RedfishRAID() + result = raid._raid_config_in_progress( + task, {'task_monitor_uri': '/TaskService/123', + 'operation': 'create'}) + self.assertEqual(False, result) + mock_info.assert_called_once() + + @mock.patch.object(redfish_utils, 'get_task_monitor', autospec=True) + @mock.patch.object(redfish_raid.LOG, 'info', autospec=True) + def test__raid_config_in_progress_task_mon_error( + self, mock_info, mock_get_task_monitor, mock_get_system): + mock_get_task_monitor.side_effect = exception.RedfishError( + error='Task not found') + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + + raid = redfish_raid.RedfishRAID() + result = raid._raid_config_in_progress( + task, {'task_monitor_uri': '/TaskService/123', + 'operation': 'create'}) + self.assertEqual(False, result) + mock_info.assert_called_once() + + @mock.patch.object(redfish_utils, 'get_task_monitor', autospec=True) + @mock.patch.object(redfish_raid.LOG, 'debug', autospec=True) + def test__raid_config_in_progress_still_processing( + self, mock_debug, mock_get_task_monitor, mock_get_system): + mock_task_monitor = mock.Mock() + mock_task_monitor.is_processing = True + mock_get_task_monitor.return_value = mock_task_monitor + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + + raid = redfish_raid.RedfishRAID() + result = raid._raid_config_in_progress( + task, {'task_monitor_uri': '/TaskService/123', + 'operation': 'create'}) + self.assertEqual(True, result) + mock_debug.assert_called_once() + + @mock.patch.object(redfish_utils, 'get_task_monitor', autospec=True) + @mock.patch.object(redfish_raid.LOG, 'error', autospec=True) + def test__raid_config_in_progress_failed( + self, mock_error, mock_get_task_monitor, mock_get_system): + mock_message = mock.Mock() + mock_message.message = 'RAID configuration failed' + mock_message.severity = sushy.SEVERITY_CRITICAL + mock_task = mock.Mock() + mock_task.task_state = sushy.TASK_STATE_COMPLETED + mock_task.task_status = sushy.HEALTH_CRITICAL + mock_task.messages = [mock_message] + mock_task_monitor = mock.Mock() + mock_task_monitor.is_processing = False + mock_task_monitor.get_task.return_value = mock_task + mock_get_task_monitor.return_value = mock_task_monitor + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + + raid = redfish_raid.RedfishRAID() + result = raid._raid_config_in_progress( + task, {'task_monitor_uri': '/TaskService/123', + 'operation': 'create'}) + self.assertEqual(False, result) + mock_error.assert_called_once() diff --git a/ironic/tests/unit/drivers/third_party_driver_mock_specs.py b/ironic/tests/unit/drivers/third_party_driver_mock_specs.py index d9e9b57d7..54c15ffce 100644 --- a/ironic/tests/unit/drivers/third_party_driver_mock_specs.py +++ b/ironic/tests/unit/drivers/third_party_driver_mock_specs.py @@ -175,6 +175,7 @@ SUSHY_SPEC = ( 'TASK_STATE_COMPLETED', 'HEALTH_OK', 'HEALTH_WARNING', + 'HEALTH_CRITICAL', 'SECURE_BOOT_RESET_KEYS_TO_DEFAULT', 'SECURE_BOOT_RESET_KEYS_DELETE_ALL', 'VOLUME_TYPE_RAW_DEVICE', diff --git a/ironic/tests/unit/drivers/third_party_driver_mocks.py b/ironic/tests/unit/drivers/third_party_driver_mocks.py index e7cf9a8b7..aca9a1376 100644 --- a/ironic/tests/unit/drivers/third_party_driver_mocks.py +++ b/ironic/tests/unit/drivers/third_party_driver_mocks.py @@ -235,6 +235,7 @@ if not sushy: TASK_STATE_COMPLETED='completed', HEALTH_OK='ok', HEALTH_WARNING='warning', + HEALTH_CRITICAL='critical', SECURE_BOOT_RESET_KEYS_TO_DEFAULT="ResetAllKeysToDefault", SECURE_BOOT_RESET_KEYS_DELETE_ALL="DeleteAllKeys", VOLUME_TYPE_RAW_DEVICE='rawdevice', diff --git a/releasenotes/notes/fix-redfish-raid-failed-tasks-02487c4698dea176.yaml b/releasenotes/notes/fix-redfish-raid-failed-tasks-02487c4698dea176.yaml new file mode 100644 index 000000000..9e827fc9d --- /dev/null +++ b/releasenotes/notes/fix-redfish-raid-failed-tasks-02487c4698dea176.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + Fixes the determination of a failed RAID configuration task in the + ``redfish`` hardware type. Prior to this fix the tasks that have failed + were reported as successful. -- cgit v1.2.1