summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--devstack/lib/ironic17
-rw-r--r--ironic/conductor/manager.py5
-rw-r--r--ironic/drivers/modules/drac/power.py80
-rw-r--r--ironic/drivers/modules/drac/raid.py2
-rw-r--r--ironic/tests/unit/drivers/modules/drac/test_power.py55
-rw-r--r--ironic/tests/unit/drivers/third_party_driver_mocks.py11
-rw-r--r--releasenotes/notes/bug-2004265-cd9056868295f374.yaml7
-rw-r--r--releasenotes/notes/deploy-step-error-d343e8cb7d1b2305.yaml6
-rw-r--r--releasenotes/notes/idrac-fix-reboot-failure-c740e765ff41bcf0.yaml7
-rw-r--r--zuul.d/ironic-jobs.yaml1
10 files changed, 163 insertions, 28 deletions
diff --git a/devstack/lib/ironic b/devstack/lib/ironic
index 0723dddce..cff7bbc5b 100644
--- a/devstack/lib/ironic
+++ b/devstack/lib/ironic
@@ -895,12 +895,17 @@ function install_ironic {
# one. The EFI ROM should work on with both boot modes, Legacy
# BIOS and UEFI.
if is_ubuntu; then
- # FIXME(lucasagomes): Enable the multiverse repository by
- # default in the image running the gate tests. Also move the
- # installation of the ovmf package to files/debs/ironic
- sudo add-apt-repository "deb http://archive.ubuntu.com/ubuntu $(lsb_release -sc) multiverse"
- sudo apt-get update
- install_package ovmf
+ # (rpittau) in bionic the UEFI in the ovmf 0~20180205.c0d9813c-2
+ # package is broken: EFI v2.70 by EDK II
+ # As a workaround, here we download and install the old working
+ # version from the multiverse repository: EFI v2.60 by EDK II
+ # Bug reference:
+ # https://bugs.launchpad.net/ubuntu/+source/edk2/+bug/1821729
+ local temp_deb
+ temp_deb="$(mktemp)"
+ wget http://archive.ubuntu.com/ubuntu/pool/multiverse/e/edk2/ovmf_0~20160408.ffea0a2c-2_all.deb -O "$temp_deb"
+ sudo dpkg -i "$temp_deb"
+ rm -f "$temp_deb"
sudo rm /usr/share/qemu/pxe-virtio.rom
sudo ln -s /usr/lib/ipxe/qemu/efi-virtio.rom /usr/share/qemu/pxe-virtio.rom
diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py
index 466e96c53..677d2d0c2 100644
--- a/ironic/conductor/manager.py
+++ b/ironic/conductor/manager.py
@@ -3862,7 +3862,10 @@ def _do_next_deploy_step(task, step_index, conductor_id):
{'node': node.uuid, 'step': node.deploy_step, 'err': e})
utils.deploying_error_handler(
task, log_msg,
- _("Failed to deploy: %s") % node.deploy_step)
+ _("Failed to deploy: Deploy step %(step)s, "
+ "error: %(err)s.") % {
+ 'step': node.deploy_step,
+ 'err': e})
return
except Exception as e:
log_msg = ('Node %(node)s failed deploy step %(step)s with '
diff --git a/ironic/drivers/modules/drac/power.py b/ironic/drivers/modules/drac/power.py
index fe02fe515..2be801e76 100644
--- a/ironic/drivers/modules/drac/power.py
+++ b/ironic/drivers/modules/drac/power.py
@@ -15,6 +15,8 @@
DRAC power interface
"""
+import time
+
from ironic_lib import metrics_utils
from oslo_log import log as logging
from oslo_utils import importutils
@@ -42,6 +44,10 @@ if drac_constants:
REVERSE_POWER_STATES = dict((v, k) for (k, v) in POWER_STATES.items())
+POWER_STATE_TRIES = 15
+POWER_STATE_SLEEP = 2
+POWER_STATE_CHANGE_FAIL = 'The command failed to set RequestedState'
+
def _get_power_state(node):
"""Returns the current power state of the node.
@@ -100,18 +106,60 @@ def _set_power_state(node, power_state):
_commit_boot_list_change(node)
client = drac_common.get_drac_client(node)
- target_power_state = REVERSE_POWER_STATES[power_state]
-
- try:
- client.set_power_state(target_power_state)
- except drac_exceptions.BaseClientException as exc:
- LOG.error('DRAC driver failed to set power state for node '
- '%(node_uuid)s to %(power_state)s. '
- 'Reason: %(error)s.',
- {'node_uuid': node.uuid,
- 'power_state': power_state,
- 'error': exc})
- raise exception.DracOperationError(error=exc)
+ tries = POWER_STATE_TRIES
+
+ # Cases have been seen where the iDRAC returns a SYS021 error even when
+ # the server is in the right power state and a valid power state change
+ # is attempted. Retry in this case.
+ while tries > 0:
+ # The iDRAC will return a SYS021 error if the server is powered off
+ # and a reboot is requested. In this situation, convert the requested
+ # reboot into a power on to avoid this error. To minimize the chance
+ # of a race condition, it is critical to do this check immediately
+ # before sending the power state change command. This keeps the
+ # window during which the server could change power states without us
+ # knowing about it as small as possible.
+ calc_power_state = power_state
+ if power_state == states.REBOOT:
+ current_power_state = _get_power_state(node)
+ # If the server is not on, then power it on instead of rebooting
+ if current_power_state != states.POWER_ON:
+ calc_power_state = states.POWER_ON
+
+ target_power_state = REVERSE_POWER_STATES[calc_power_state]
+
+ try:
+ client.set_power_state(target_power_state)
+ break
+ except drac_exceptions.BaseClientException as exc:
+ if (power_state == states.REBOOT
+ and POWER_STATE_CHANGE_FAIL in str(exc)
+ and tries > 0):
+ LOG.warning('DRAC driver failed to set power state for node '
+ '%(node_uuid)s to %(calc_power_state)s. '
+ 'Reason: %(error)s. Retrying...',
+ {'node_uuid': node.uuid,
+ 'calc_power_state': calc_power_state,
+ 'error': exc})
+ tries -= 1
+ time.sleep(POWER_STATE_SLEEP)
+ else:
+ LOG.error('DRAC driver failed to set power state for node '
+ '%(node_uuid)s to %(calc_power_state)s. '
+ 'Reason: %(error)s.',
+ {'node_uuid': node.uuid,
+ 'calc_power_state': calc_power_state,
+ 'error': exc})
+ raise exception.DracOperationError(error=exc)
+
+ if tries <= 0:
+ error_msg = (_('DRAC driver timed out while trying to set the power '
+ 'state for node %(node_uuid)s to '
+ '%(calc_power_state)s.') %
+ {'node_uuid': node.uuid,
+ 'calc_power_state': calc_power_state})
+ LOG.error(error_msg)
+ raise exception.DracOperationError(error_msg)
class DracPower(base.PowerInterface):
@@ -187,10 +235,4 @@ class DracPower(base.PowerInterface):
"timeout=%(timeout)s",
{'timeout': timeout})
- current_power_state = _get_power_state(task.node)
- if current_power_state == states.POWER_ON:
- target_power_state = states.REBOOT
- else:
- target_power_state = states.POWER_ON
-
- _set_power_state(task.node, target_power_state)
+ _set_power_state(task.node, states.REBOOT)
diff --git a/ironic/drivers/modules/drac/raid.py b/ironic/drivers/modules/drac/raid.py
index 633216d1c..56f8cbc90 100644
--- a/ironic/drivers/modules/drac/raid.py
+++ b/ironic/drivers/modules/drac/raid.py
@@ -495,7 +495,7 @@ def _calculate_volume_props(logical_disk, physical_disks, free_space_mb):
error_msg = _('invalid number of physical disks was provided')
raise exception.DracOperationError(error=error_msg)
- disks_per_span = len(selected_disks) / spans_count
+ disks_per_span = int(len(selected_disks) / spans_count)
# Best practice is to not pass span_length and span_depth when creating a
# RAID10. The iDRAC will dynamically calculate these values using maximum
diff --git a/ironic/tests/unit/drivers/modules/drac/test_power.py b/ironic/tests/unit/drivers/modules/drac/test_power.py
index 27cc75981..4c442ba6f 100644
--- a/ironic/tests/unit/drivers/modules/drac/test_power.py
+++ b/ironic/tests/unit/drivers/modules/drac/test_power.py
@@ -144,3 +144,58 @@ class DracPowerTestCase(test_utils.BaseDracTest):
drac_power_state = drac_power.REVERSE_POWER_STATES[states.POWER_ON]
mock_client.set_power_state.assert_called_once_with(drac_power_state)
+
+ @mock.patch('time.sleep')
+ def test_reboot_retries_success(self, mock_sleep, mock_get_drac_client):
+ mock_client = mock_get_drac_client.return_value
+ mock_client.get_power_state.return_value = drac_constants.POWER_OFF
+ exc = drac_exceptions.DRACOperationFailed(
+ drac_messages=['The command failed to set RequestedState'])
+ mock_client.set_power_state.side_effect = [exc, None]
+
+ with task_manager.acquire(self.context, self.node.uuid,
+ shared=False) as task:
+ task.driver.power.reboot(task)
+
+ drac_power_state = drac_power.REVERSE_POWER_STATES[states.POWER_ON]
+ self.assertEqual(2, mock_client.set_power_state.call_count)
+ mock_client.set_power_state.assert_has_calls(
+ [mock.call(drac_power_state),
+ mock.call(drac_power_state)])
+
+ @mock.patch('time.sleep')
+ def test_reboot_retries_fail(self, mock_sleep, mock_get_drac_client):
+ mock_client = mock_get_drac_client.return_value
+ mock_client.get_power_state.return_value = drac_constants.POWER_OFF
+ exc = drac_exceptions.DRACOperationFailed(
+ drac_messages=['The command failed to set RequestedState'])
+ mock_client.set_power_state.side_effect = exc
+
+ with task_manager.acquire(self.context, self.node.uuid,
+ shared=False) as task:
+ self.assertRaises(exception.DracOperationError,
+ task.driver.power.reboot, task)
+
+ self.assertEqual(drac_power.POWER_STATE_TRIES,
+ mock_client.set_power_state.call_count)
+
+ @mock.patch('time.sleep')
+ def test_reboot_retries_power_change_success(self, mock_sleep,
+ mock_get_drac_client):
+ mock_client = mock_get_drac_client.return_value
+ mock_client.get_power_state.side_effect = [drac_constants.POWER_OFF,
+ drac_constants.POWER_ON]
+ exc = drac_exceptions.DRACOperationFailed(
+ drac_messages=['The command failed to set RequestedState'])
+ mock_client.set_power_state.side_effect = [exc, None]
+
+ with task_manager.acquire(self.context, self.node.uuid,
+ shared=False) as task:
+ task.driver.power.reboot(task)
+
+ self.assertEqual(2, mock_client.set_power_state.call_count)
+ drac_power_state1 = drac_power.REVERSE_POWER_STATES[states.POWER_ON]
+ drac_power_state2 = drac_power.REVERSE_POWER_STATES[states.REBOOT]
+ mock_client.set_power_state.assert_has_calls(
+ [mock.call(drac_power_state1),
+ mock.call(drac_power_state2)])
diff --git a/ironic/tests/unit/drivers/third_party_driver_mocks.py b/ironic/tests/unit/drivers/third_party_driver_mocks.py
index d953ca475..281c1ab64 100644
--- a/ironic/tests/unit/drivers/third_party_driver_mocks.py
+++ b/ironic/tests/unit/drivers/third_party_driver_mocks.py
@@ -96,6 +96,17 @@ if not dracclient:
sys.modules['dracclient.exceptions'] = dracclient.exceptions
dracclient.exceptions.BaseClientException = type('BaseClientException',
(Exception,), {})
+
+ dracclient.exceptions.DRACRequestFailed = type(
+ 'DRACRequestFailed', (dracclient.exceptions.BaseClientException,), {})
+
+ class DRACOperationFailed(dracclient.exceptions.DRACRequestFailed):
+ def __init__(self, **kwargs):
+ super(DRACOperationFailed, self).__init__(
+ 'DRAC operation failed. Messages: %(drac_messages)s' % kwargs)
+
+ dracclient.exceptions.DRACOperationFailed = DRACOperationFailed
+
# Now that the external library has been mocked, if anything had already
# loaded any of the drivers, reload them.
if 'ironic.drivers.modules.drac' in sys.modules:
diff --git a/releasenotes/notes/bug-2004265-cd9056868295f374.yaml b/releasenotes/notes/bug-2004265-cd9056868295f374.yaml
new file mode 100644
index 000000000..5d25b5ea8
--- /dev/null
+++ b/releasenotes/notes/bug-2004265-cd9056868295f374.yaml
@@ -0,0 +1,7 @@
+---
+fixes:
+ - |
+ Fixes 'Invalid parameter value for SpanLength' when configuring RAID
+ using Python 3. This passed incorrect data type to iDRAC, e.g., instead
+ of `2` it passed `2.0`.
+ See `story 2004265 <https://storyboard.openstack.org/#!/story/2004265>`_.
diff --git a/releasenotes/notes/deploy-step-error-d343e8cb7d1b2305.yaml b/releasenotes/notes/deploy-step-error-d343e8cb7d1b2305.yaml
new file mode 100644
index 000000000..9af1e9782
--- /dev/null
+++ b/releasenotes/notes/deploy-step-error-d343e8cb7d1b2305.yaml
@@ -0,0 +1,6 @@
+---
+fixes:
+ - |
+ Fixes vague node ``last_error`` field reporting upon deploy step
+ failure by providing the exception error message in addition
+ to the step that failed.
diff --git a/releasenotes/notes/idrac-fix-reboot-failure-c740e765ff41bcf0.yaml b/releasenotes/notes/idrac-fix-reboot-failure-c740e765ff41bcf0.yaml
new file mode 100644
index 000000000..d5b922c70
--- /dev/null
+++ b/releasenotes/notes/idrac-fix-reboot-failure-c740e765ff41bcf0.yaml
@@ -0,0 +1,7 @@
+---
+fixes:
+ - |
+ Fixed a bug where rebooting a node managed by the ``idrac`` hardware
+ type when using the WS-MAN power interface sometimes fails with a
+ ``The command failed to set RequestedState`` error. See bug `2007487
+ <https://storyboard.openstack.org/#!/story/2007487>`_ for details.
diff --git a/zuul.d/ironic-jobs.yaml b/zuul.d/ironic-jobs.yaml
index 738edb4ce..e3b17432d 100644
--- a/zuul.d/ironic-jobs.yaml
+++ b/zuul.d/ironic-jobs.yaml
@@ -165,7 +165,6 @@
name: ironic-tempest-ipa-partition-uefi-pxe_ipmitool-tinyipa
description: ironic-tempest-ipa-partition-uefi-pxe_ipmitool-tinyipa
parent: ironic-base
- nodeset: openstack-single-node-xenial
timeout: 5400
vars:
devstack_localrc: