summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAija Jauntēva <aija.jaunteva@dell.com>2021-09-08 11:20:20 -0400
committerJulia Kreger <juliaashleykreger@gmail.com>2021-09-14 21:16:52 +0000
commit0df43f7586f93ab54d533201de21ed7ed95ec182 (patch)
tree93e22cd8584a46ae8a56ce86bf7d879026047406
parent87dee0250c43e059db727e76b407a0f67c719f43 (diff)
downloadironic-0df43f7586f93ab54d533201de21ed7ed95ec182.tar.gz
Fix idrac-wsman set_power_state to wait on HW
set_power_state has returned to the caller immediately without confirming the system has reached the requested state. This fixes that by synchronously waiting until the target state has been read before returning. That bug can cause instance workload deployments to fail on Dell EMC PowerEdge server models on which IPA ramdisk soft power off fails and ironic employs its OOB fallback strategy. After an otherwise successful deployment, the node is active, but is powered off. No error is reported in last_error. If the subsequent instance workflow expects the system to be powered on into the operating system, it fails. Story: 2009204 Task: 43261 Change-Id: I3112a22149c07e5508f26c79f33d09aeb905c308 (cherry picked from commit 2a0fd1d13f178c9872330cf6498dc25572a721da)
-rw-r--r--ironic/drivers/modules/drac/power.py45
-rw-r--r--ironic/tests/unit/drivers/modules/drac/test_power.py14
-rw-r--r--releasenotes/notes/idrac-wsman-set-power-state-wait-cd8f9ff41b19c7a7.yaml10
3 files changed, 44 insertions, 25 deletions
diff --git a/ironic/drivers/modules/drac/power.py b/ironic/drivers/modules/drac/power.py
index 96f6f9932..468cc64b6 100644
--- a/ironic/drivers/modules/drac/power.py
+++ b/ironic/drivers/modules/drac/power.py
@@ -24,6 +24,7 @@ from oslo_utils import importutils
from ironic.common import exception
from ironic.common import states
from ironic.conductor import task_manager
+from ironic.conductor import utils as cond_utils
from ironic.drivers import base
from ironic.drivers.modules.drac import common as drac_common
from ironic.drivers.modules.drac import management as drac_management
@@ -87,15 +88,17 @@ def _commit_boot_list_change(node):
node.save()
-def _set_power_state(node, power_state):
+def _set_power_state(task, power_state, timeout=None):
"""Turns the server power on/off or do a reboot.
- :param node: an ironic node object.
+ :param task: a TaskManager instance containing the node to act on.
:param power_state: a power state from :mod:`ironic.common.states`.
+ :param timeout: Time to wait for the node to reach the requested state.
+ When requested state is reboot, not used as not waiting then.
:raises: InvalidParameterValue if required DRAC credentials are missing.
:raises: DracOperationError on an error from python-dracclient
"""
-
+ node = task.node
# NOTE(ifarkas): DRAC interface doesn't allow changing the boot device
# multiple times in a row without a reboot. This is
# because a change need to be committed via a
@@ -131,6 +134,20 @@ def _set_power_state(node, power_state):
try:
client.set_power_state(target_power_state)
+ if calc_power_state == states.REBOOT:
+ # TODO(rloo): Support timeouts!
+ if timeout is not None:
+ LOG.warning("The 'idrac-wsman' Power Interface does not "
+ "support 'timeout' parameter when setting "
+ "power state to reboot. Ignoring "
+ "timeout=%(timeout)s",
+ {'timeout': timeout})
+ else:
+ # Skipped for reboot as can't match reboot with on/off.
+ # Reboot so far has been part of workflow that is not followed
+ # by another power state change that could break the flow.
+ cond_utils.node_wait_for_power_state(
+ task, calc_power_state, timeout)
break
except drac_exceptions.BaseClientException as exc:
if (power_state == states.REBOOT
@@ -214,20 +231,13 @@ class DracWSManPower(base.PowerInterface):
:param task: a TaskManager instance containing the node to act on.
:param power_state: a power state from :mod:`ironic.common.states`.
- :param timeout: timeout (in seconds). Unsupported by this interface.
+ :param timeout: Time to wait for the node to reach the requested state.
+ When requested state is reboot, not used as not waiting then.
:raises: InvalidParameterValue if required DRAC credentials are
missing.
:raises: DracOperationError on an error from python-dracclient.
"""
- # TODO(rloo): Support timeouts!
- if timeout is not None:
- LOG.warning(
- "The 'idrac' Power Interface's 'set_power_state' method "
- "doesn't support the 'timeout' parameter. Ignoring "
- "timeout=%(timeout)s",
- {'timeout': timeout})
-
- _set_power_state(task.node, power_state)
+ _set_power_state(task, power_state, timeout)
@METRICS.timer('DracPower.reboot')
@task_manager.require_exclusive_lock
@@ -240,14 +250,7 @@ class DracWSManPower(base.PowerInterface):
missing.
:raises: DracOperationError on an error from python-dracclient.
"""
- # TODO(rloo): Support timeouts!
- if timeout is not None:
- LOG.warning("The 'idrac' Power Interface's 'reboot' method "
- "doesn't support the 'timeout' parameter. Ignoring "
- "timeout=%(timeout)s",
- {'timeout': timeout})
-
- _set_power_state(task.node, states.REBOOT)
+ _set_power_state(task, states.REBOOT, timeout)
class DracPower(DracWSManPower):
diff --git a/ironic/tests/unit/drivers/modules/drac/test_power.py b/ironic/tests/unit/drivers/modules/drac/test_power.py
index aeb3c038e..b6ba3e159 100644
--- a/ironic/tests/unit/drivers/modules/drac/test_power.py
+++ b/ironic/tests/unit/drivers/modules/drac/test_power.py
@@ -72,7 +72,8 @@ class DracPowerTestCase(test_utils.BaseDracTest):
@mock.patch.object(drac_power.LOG, 'warning', autospec=True)
def test_set_power_state(self, mock_log, mock_get_drac_client):
mock_client = mock_get_drac_client.return_value
-
+ mock_client.get_power_state.side_effect = [drac_constants.POWER_ON,
+ drac_constants.POWER_OFF]
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
task.driver.power.set_power_state(task, states.POWER_OFF)
@@ -98,6 +99,8 @@ class DracPowerTestCase(test_utils.BaseDracTest):
@mock.patch.object(drac_power.LOG, 'warning', autospec=True)
def test_set_power_state_timeout(self, mock_log, mock_get_drac_client):
mock_client = mock_get_drac_client.return_value
+ mock_client.get_power_state.side_effect = [drac_constants.POWER_ON,
+ drac_constants.POWER_OFF]
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
@@ -106,7 +109,7 @@ class DracPowerTestCase(test_utils.BaseDracTest):
drac_power_state = drac_power.REVERSE_POWER_STATES[states.POWER_OFF]
mock_client.set_power_state.assert_called_once_with(drac_power_state)
- self.assertTrue(mock_log.called)
+ self.assertFalse(mock_log.called)
@mock.patch.object(drac_power.LOG, 'warning', autospec=True)
def test_reboot_while_powered_on(self, mock_log, mock_get_drac_client):
@@ -137,7 +140,8 @@ class DracPowerTestCase(test_utils.BaseDracTest):
def test_reboot_while_powered_off(self, mock_get_drac_client):
mock_client = mock_get_drac_client.return_value
- mock_client.get_power_state.return_value = drac_constants.POWER_OFF
+ mock_client.get_power_state.side_effect = [drac_constants.POWER_OFF,
+ drac_constants.POWER_ON]
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
@@ -149,7 +153,9 @@ class DracPowerTestCase(test_utils.BaseDracTest):
@mock.patch('time.sleep', autospec=True)
def test_reboot_retries_success(self, mock_sleep, mock_get_drac_client):
mock_client = mock_get_drac_client.return_value
- mock_client.get_power_state.return_value = drac_constants.POWER_OFF
+ mock_client.get_power_state.side_effect = [drac_constants.POWER_OFF,
+ drac_constants.POWER_OFF,
+ drac_constants.POWER_ON]
exc = drac_exceptions.DRACOperationFailed(
drac_messages=['The command failed to set RequestedState'])
mock_client.set_power_state.side_effect = [exc, None]
diff --git a/releasenotes/notes/idrac-wsman-set-power-state-wait-cd8f9ff41b19c7a7.yaml b/releasenotes/notes/idrac-wsman-set-power-state-wait-cd8f9ff41b19c7a7.yaml
new file mode 100644
index 000000000..4ac895625
--- /dev/null
+++ b/releasenotes/notes/idrac-wsman-set-power-state-wait-cd8f9ff41b19c7a7.yaml
@@ -0,0 +1,10 @@
+---
+fixes:
+ - |
+ Fixes ``idrac-wsman`` power interface to wait for the hardware to reach the
+ target state before returning. For systems where soft power off at the end
+ of deployment to boot to instance failed and forced hard power off was
+ used, this left node successfully deployed in off state without any errors.
+ This broke other workflows expecting node to be on booted into
+ OS at the end of deployment. Additional information can be found in
+ `story 2009204 <https://storyboard.openstack.org/#!/story/2009204>`_.