summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAija Jauntēva <aija.jaunteva@dell.com>2020-10-02 11:04:01 -0400
committerAija Jauntēva <aija.jaunteva@dell.com>2021-04-09 08:19:26 +0000
commit6130dc15e05325b7fbfa621334e806b15d5cf818 (patch)
tree4418e7443ab3a707cea2991339283db87265e153
parent4fd099345d662dc82a978e767c0b534de4e44e7d (diff)
downloadironic-6130dc15e05325b7fbfa621334e806b15d5cf818.tar.gz
Fix idrac-wsman BIOS step async error handling
Instead of using process_event('fail') use error_handlers, otherwise in case of failure node gets stuck and fails because of timeout, instead of failing earlier due to step failure. And improve coverage to test this error handling and also happy paths. Story: 2008307 Task: 41197 Change-Id: I1e957c2b526abc37920212b6431b11eedc9f89be (cherry picked from commit 83ce7c4264a57658fb749c740e6b32e3eb6a906b)
-rw-r--r--ironic/drivers/modules/drac/bios.py20
-rw-r--r--ironic/tests/unit/drivers/modules/drac/test_bios.py82
-rw-r--r--releasenotes/notes/fix-wsman-bios-async-step-error-handling-80cd30c54c71c595.yaml8
3 files changed, 101 insertions, 9 deletions
diff --git a/ironic/drivers/modules/drac/bios.py b/ironic/drivers/modules/drac/bios.py
index bc09c0176..db4a497a1 100644
--- a/ironic/drivers/modules/drac/bios.py
+++ b/ironic/drivers/modules/drac/bios.py
@@ -261,14 +261,18 @@ class DracWSManBIOS(base.BIOSInterface):
:param task: a TaskManager instance with node to act on
:param config_job: a python-dracclient Job object (named tuple)
"""
- LOG.error("BIOS configuration job failed for node %(node)s. "
- "Failed config job: %(config_job_id)s. "
- "Message: '%(message)s'.",
- {'node': task.node_uuid, 'config_job_id': config_job.id,
- 'message': config_job.message})
- task.node.last_error = config_job.message
- # tell conductor to handle failure of clean/deploy step
- task.process_event('fail')
+ error_msg = (_("Failed config job: %(config_job_id)s. "
+ "Message: '%(message)s'.") %
+ {'config_job_id': config_job.id,
+ 'message': config_job.message})
+ log_msg = ("BIOS configuration job failed for node %(node)s. "
+ "%(error)s " %
+ {'node': task.node.uuid,
+ 'error': error_msg})
+ if task.node.clean_step:
+ manager_utils.cleaning_error_handler(task, log_msg, error_msg)
+ else:
+ manager_utils.deploying_error_handler(task, log_msg, error_msg)
def _resume_current_operation(self, task):
"""Continue cleaning/deployment of the node.
diff --git a/ironic/tests/unit/drivers/modules/drac/test_bios.py b/ironic/tests/unit/drivers/modules/drac/test_bios.py
index 6fdca0684..3903d979c 100644
--- a/ironic/tests/unit/drivers/modules/drac/test_bios.py
+++ b/ironic/tests/unit/drivers/modules/drac/test_bios.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
-# Copyright (c) 2015-2016 Dell Inc. or its subsidiaries.
+# Copyright (c) 2015-2021 Dell Inc. or its subsidiaries.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
@@ -26,6 +26,7 @@ from dracclient import exceptions as drac_exceptions
from ironic.common import exception
from ironic.common import states
from ironic.conductor import task_manager
+from ironic.conductor import utils as manager_utils
from ironic.drivers.modules import deploy_utils
from ironic.drivers.modules.drac import bios as drac_bios
from ironic.drivers.modules.drac import common as drac_common
@@ -238,6 +239,85 @@ class DracWSManBIOSConfigurationTestCase(test_utils.BaseDracTest):
self.assertRaises(exception.DracOperationError,
task.driver.bios.factory_reset, task)
+ @mock.patch.object(manager_utils, 'notify_conductor_resume_clean',
+ autospec=True)
+ @mock.patch.object(drac_job, 'get_job', spec_set=True,
+ autospec=True)
+ def test__check_node_bios_jobs(self, mock_get_job,
+ mock_notify_conductor_resume_clean):
+ mock_job = mock.Mock()
+ mock_job.status = 'Completed'
+ mock_get_job.return_value = mock_job
+
+ with task_manager.acquire(self.context, self.node.uuid) as task:
+ driver_internal_info = task.node.driver_internal_info
+ driver_internal_info['bios_config_job_ids'] = ['123', '789']
+ task.node.driver_internal_info = driver_internal_info
+ task.node.clean_step = {'priority': 100, 'interface': 'bios',
+ 'step': 'factory_reset', 'argsinfo': {}}
+ task.node.save()
+ mock_cache = mock.Mock()
+ task.driver.bios.cache_bios_settings = mock_cache
+
+ task.driver.bios._check_node_bios_jobs(task)
+
+ self.assertEqual([], task.node.driver_internal_info.get(
+ 'bios_config_job_ids'))
+ mock_cache.assert_called_once_with(task)
+ mock_notify_conductor_resume_clean.assert_called_once_with(task)
+
+ @mock.patch.object(drac_job, 'get_job', spec_set=True,
+ autospec=True)
+ def test__check_node_bios_jobs_still_running(self, mock_get_job):
+ mock_job = mock.Mock()
+ mock_job.status = 'Running'
+ mock_get_job.return_value = mock_job
+
+ with task_manager.acquire(self.context, self.node.uuid) as task:
+ driver_internal_info = task.node.driver_internal_info
+ driver_internal_info['bios_config_job_ids'] = ['123']
+ task.node.driver_internal_info = driver_internal_info
+ task.node.save()
+ mock_resume = mock.Mock()
+ task.driver.bios._resume_current_operation = mock_resume
+ mock_cache = mock.Mock()
+ task.driver.bios.cache_bios_settings = mock_cache
+
+ task.driver.bios._check_node_bios_jobs(task)
+
+ self.assertEqual(['123'],
+ task.node.driver_internal_info.get(
+ 'bios_config_job_ids'))
+ mock_cache.assert_not_called()
+ mock_resume.assert_not_called()
+
+ @mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True)
+ @mock.patch.object(drac_job, 'get_job', spec_set=True,
+ autospec=True)
+ def test__check_node_bios_jobs_failed(self, mock_get_job,
+ mock_cleaning_error_handler):
+ mock_job = mock.Mock()
+ mock_job.status = 'Failed'
+ mock_job.id = '123'
+ mock_job.message = 'Invalid'
+ mock_get_job.return_value = mock_job
+
+ with task_manager.acquire(self.context, self.node.uuid) as task:
+ driver_internal_info = task.node.driver_internal_info
+ driver_internal_info['bios_config_job_ids'] = ['123']
+ task.node.driver_internal_info = driver_internal_info
+ task.node.clean_step = {'priority': 100, 'interface': 'bios',
+ 'step': 'factory_reset', 'argsinfo': {}}
+ task.node.save()
+
+ task.driver.bios._check_node_bios_jobs(task)
+
+ self.assertEqual([],
+ task.node.driver_internal_info.get(
+ 'bios_config_job_ids'))
+ mock_cleaning_error_handler.assert_called_once_with(
+ task, mock.ANY, "Failed config job: 123. Message: 'Invalid'.")
+
class DracBIOSConfigurationTestCase(test_utils.BaseDracTest):
diff --git a/releasenotes/notes/fix-wsman-bios-async-step-error-handling-80cd30c54c71c595.yaml b/releasenotes/notes/fix-wsman-bios-async-step-error-handling-80cd30c54c71c595.yaml
new file mode 100644
index 000000000..ca39a1512
--- /dev/null
+++ b/releasenotes/notes/fix-wsman-bios-async-step-error-handling-80cd30c54c71c595.yaml
@@ -0,0 +1,8 @@
+---
+fixes:
+ - |
+ Fixes ``idrac-wsman`` BIOS ``apply_configuration`` and ``factory_reset``
+ clean and deploy steps to fail correctly in case of error when checking
+ completed jobs. Before the fix when BIOS job failed, then node clean or
+ deploy failed with timeout instead of actual error in cleaning or
+ deploying step.