summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZuul <zuul@review.opendev.org>2020-05-29 10:53:15 +0000
committerGerrit Code Review <review@openstack.org>2020-05-29 10:53:15 +0000
commit911bc51a103b98621a803a84b00a96c09a610ac8 (patch)
tree2b00cbb5b53b60f68b88f33b74a25652d589f658
parent64fcd40406ba3ffec1cdb84cd796eca09a3246ee (diff)
parentf2eac72e888595615bd0fd79648c538bb889be5f (diff)
downloadironic-911bc51a103b98621a803a84b00a96c09a610ac8.tar.gz
Merge "redfish: handle hardware that is unable to set persistent boot" into stable/train
-rw-r--r--ironic/common/utils.py28
-rw-r--r--ironic/drivers/modules/redfish/management.py107
-rw-r--r--ironic/drivers/modules/redfish/power.py11
-rw-r--r--ironic/tests/unit/drivers/modules/redfish/test_management.py82
-rw-r--r--ironic/tests/unit/drivers/modules/redfish/test_power.py36
-rw-r--r--releasenotes/notes/redfish-sadness-6e2a37b3f45ef1aa.yaml18
6 files changed, 258 insertions, 24 deletions
diff --git a/ironic/common/utils.py b/ironic/common/utils.py
index ee34041f0..037ec78cd 100644
--- a/ironic/common/utils.py
+++ b/ironic/common/utils.py
@@ -537,3 +537,31 @@ def validate_conductor_group(conductor_group):
raise exception.InvalidConductorGroup(group=conductor_group)
if not re.match(r'^[a-zA-Z0-9_\-\.]*$', conductor_group):
raise exception.InvalidConductorGroup(group=conductor_group)
+
+
+def set_node_nested_field(node, collection, field, value):
+ """Set a value in a dictionary field of a node.
+
+ :param node: Node object.
+ :param collection: Name of the field with the dictionary.
+ :param field: Nested field name.
+ :param value: New value.
+ """
+ col = getattr(node, collection)
+ col[field] = value
+ setattr(node, collection, col)
+
+
+def pop_node_nested_field(node, collection, field, default=None):
+ """Pop a value from a dictionary field of a node.
+
+ :param node: Node object.
+ :param collection: Name of the field with the dictionary.
+ :param field: Nested field name.
+ :param default: The default value to return.
+ :return: The removed value or the default.
+ """
+ col = getattr(node, collection)
+ result = col.pop(field, default)
+ setattr(node, collection, col)
+ return result
diff --git a/ironic/drivers/modules/redfish/management.py b/ironic/drivers/modules/redfish/management.py
index 32f6b8094..1b238bd60 100644
--- a/ironic/drivers/modules/redfish/management.py
+++ b/ironic/drivers/modules/redfish/management.py
@@ -22,6 +22,7 @@ from ironic.common import boot_devices
from ironic.common import boot_modes
from ironic.common import exception
from ironic.common.i18n import _
+from ironic.common import utils
from ironic.conductor import task_manager
from ironic.drivers import base
from ironic.drivers.modules.redfish import utils as redfish_utils
@@ -56,6 +57,63 @@ if sushy:
BOOT_DEVICE_PERSISTENT_MAP.items()}
+def _set_boot_device(task, system, device,
+ enabled=sushy.BOOT_SOURCE_ENABLED_ONCE):
+ """An internal routine to set the boot device.
+
+ :param task: a task from TaskManager.
+ :param system: a Redfish System object.
+ :param device: the Redfish boot device.
+ :param enabled: Redfish boot device persistence value.
+ :raises: SushyError on an error from the Sushy library
+ """
+ desired_enabled = enabled
+ current_enabled = system.boot.get('enabled')
+
+ # NOTE(etingof): this can be racy, esp if BMC is not RESTful
+ new_enabled = (desired_enabled
+ if desired_enabled != current_enabled else None)
+
+ use_new_set_system_boot = True
+
+ try:
+ try:
+ system.set_system_boot_options(device, enabled=new_enabled)
+ except AttributeError:
+ new_enabled = enabled
+ use_new_set_system_boot = False
+ system.set_system_boot_source(device, enabled=new_enabled)
+ except sushy.exceptions.SushyError as e:
+ if new_enabled == sushy.BOOT_SOURCE_ENABLED_CONTINUOUS:
+ # NOTE(dtantsur): continuous boot device settings have been
+ # removed from Redfish, and some vendors stopped supporting
+ # it before an alternative was provided. As a work around,
+ # use one-time boot and restore the boot device on every
+ # reboot via RedfishPower.
+ LOG.debug('Error %(error)s when trying to set a '
+ 'persistent boot device on node %(node)s, '
+ 'falling back to one-time boot settings',
+ {'error': e, 'node': task.node.uuid})
+
+ if use_new_set_system_boot:
+ system.set_system_boot_options(
+ device, enabled=sushy.BOOT_SOURCE_ENABLED_ONCE)
+ else:
+ system.set_system_boot_source(
+ device, enabled=sushy.BOOT_SOURCE_ENABLED_ONCE)
+
+ LOG.warning('Could not set persistent boot device to '
+ '%(dev)s for node %(node)s, using one-time '
+ 'boot device instead',
+ {'dev': device, 'node': task.node.uuid})
+ utils.set_node_nested_field(
+ task.node, 'driver_internal_info',
+ 'redfish_boot_device', device)
+ task.node.save()
+ else:
+ raise
+
+
class RedfishManagement(base.ManagementInterface):
def __init__(self):
@@ -96,6 +154,33 @@ class RedfishManagement(base.ManagementInterface):
return list(BOOT_DEVICE_MAP_REV)
@task_manager.require_exclusive_lock
+ def restore_boot_device(self, task, system):
+ """Restore boot device if needed.
+
+ Checks the redfish_boot_device internal flag and sets the one-time
+ boot device accordingly. A warning is issued if it fails.
+
+ This method is supposed to be called from the Redfish power interface
+ and should be considered private to the Redfish hardware type.
+
+ :param task: a task from TaskManager.
+ :param system: a Redfish System object.
+ """
+ device = task.node.driver_internal_info.get('redfish_boot_device')
+ if not device:
+ return
+
+ LOG.debug('Restoring boot device %(dev)s on node %(node)s',
+ {'dev': device, 'node': task.node.uuid})
+ try:
+ _set_boot_device(task, system, device)
+ except sushy.exceptions.SushyError as e:
+ LOG.warning('Unable to recover boot device %(dev)s for node '
+ '%(node)s, relying on the pre-configured boot order. '
+ 'Error: %(error)s',
+ {'dev': device, 'node': task.node.uuid, 'error': e})
+
+ @task_manager.require_exclusive_lock
def set_boot_device(self, task, device, persistent=False):
"""Set the boot device for a node.
@@ -112,24 +197,16 @@ class RedfishManagement(base.ManagementInterface):
:raises: RedfishConnectionError when it fails to connect to Redfish
:raises: RedfishError on an error from the Sushy library
"""
- system = redfish_utils.get_system(task.node)
+ utils.pop_node_nested_field(
+ task.node, 'driver_internal_info', 'redfish_boot_device')
+ task.node.save()
- desired_persistence = BOOT_DEVICE_PERSISTENT_MAP_REV[persistent]
- current_persistence = system.boot.get('enabled')
-
- # NOTE(etingof): this can be racy, esp if BMC is not RESTful
- enabled = (desired_persistence
- if desired_persistence != current_persistence else None)
+ system = redfish_utils.get_system(task.node)
try:
- try:
- system.set_system_boot_options(
- BOOT_DEVICE_MAP_REV[device], enabled=enabled)
- except AttributeError:
- system.set_system_boot_source(
- BOOT_DEVICE_MAP_REV[device],
- enabled=BOOT_DEVICE_PERSISTENT_MAP_REV[persistent])
-
+ _set_boot_device(
+ task, system, BOOT_DEVICE_MAP_REV[device],
+ enabled=BOOT_DEVICE_PERSISTENT_MAP_REV[persistent])
except sushy.exceptions.SushyError as e:
error_msg = (_('Redfish set boot device failed for node '
'%(node)s. Error: %(error)s') %
diff --git a/ironic/drivers/modules/redfish/power.py b/ironic/drivers/modules/redfish/power.py
index cb944a427..0a6f3f338 100644
--- a/ironic/drivers/modules/redfish/power.py
+++ b/ironic/drivers/modules/redfish/power.py
@@ -22,6 +22,7 @@ from ironic.common import states
from ironic.conductor import task_manager
from ironic.conductor import utils as cond_utils
from ironic.drivers import base
+from ironic.drivers.modules.redfish import management as redfish_mgmt
from ironic.drivers.modules.redfish import utils as redfish_utils
LOG = log.getLogger(__name__)
@@ -123,6 +124,12 @@ class RedfishPower(base.PowerInterface):
:raises: RedfishError on an error from the Sushy library
"""
system = redfish_utils.get_system(task.node)
+
+ if (power_state in (states.POWER_ON, states.SOFT_REBOOT, states.REBOOT)
+ and isinstance(task.driver.management,
+ redfish_mgmt.RedfishManagement)):
+ task.driver.management.restore_boot_device(task, system)
+
try:
_set_power_state(task, system, power_state, timeout=timeout)
except sushy.exceptions.SushyError as e:
@@ -151,6 +158,10 @@ class RedfishPower(base.PowerInterface):
next_state = states.POWER_OFF
_set_power_state(task, system, next_state, timeout=timeout)
+ if isinstance(task.driver.management,
+ redfish_mgmt.RedfishManagement):
+ task.driver.management.restore_boot_device(task, system)
+
next_state = states.POWER_ON
_set_power_state(task, system, next_state, timeout=timeout)
except sushy.exceptions.SushyError as e:
diff --git a/ironic/tests/unit/drivers/modules/redfish/test_management.py b/ironic/tests/unit/drivers/modules/redfish/test_management.py
index bf9887f5c..9dc0a5c65 100644
--- a/ironic/tests/unit/drivers/modules/redfish/test_management.py
+++ b/ironic/tests/unit/drivers/modules/redfish/test_management.py
@@ -118,6 +118,8 @@ class RedfishManagementTestCase(db_base.DbTestCase):
fake_system.set_system_boot_source.assert_called_once_with(
expected, enabled=sushy.BOOT_SOURCE_ENABLED_ONCE)
mock_get_system.assert_called_once_with(task.node)
+ self.assertNotIn('redfish_boot_device',
+ task.node.driver_internal_info)
# Reset mocks
fake_system.set_system_boot_options.reset_mock()
@@ -141,6 +143,8 @@ class RedfishManagementTestCase(db_base.DbTestCase):
fake_system.set_system_boot_options.assert_called_once_with(
sushy.BOOT_SOURCE_TARGET_PXE, enabled=expected)
mock_get_system.assert_called_once_with(task.node)
+ self.assertNotIn('redfish_boot_device',
+ task.node.driver_internal_info)
# Reset mocks
fake_system.set_system_boot_options.reset_mock()
@@ -188,6 +192,84 @@ class RedfishManagementTestCase(db_base.DbTestCase):
sushy.BOOT_SOURCE_TARGET_PXE,
enabled=sushy.BOOT_SOURCE_ENABLED_ONCE)
mock_get_system.assert_called_once_with(task.node)
+ self.assertNotIn('redfish_boot_device',
+ task.node.driver_internal_info)
+
+ @mock.patch.object(sushy, 'Sushy', autospec=True)
+ @mock.patch.object(redfish_utils, 'get_system', autospec=True)
+ def test_set_boot_device_persistence_fallback(self, mock_get_system,
+ mock_sushy):
+ fake_system = mock.Mock()
+ fake_system.set_system_boot_options.side_effect = [
+ sushy.exceptions.SushyError(),
+ None,
+ ]
+ mock_get_system.return_value = fake_system
+ with task_manager.acquire(self.context, self.node.uuid,
+ shared=False) as task:
+ task.driver.management.set_boot_device(
+ task, boot_devices.PXE, persistent=True)
+ fake_system.set_system_boot_options.assert_has_calls([
+ mock.call(sushy.BOOT_SOURCE_TARGET_PXE,
+ enabled=sushy.BOOT_SOURCE_ENABLED_CONTINUOUS),
+ mock.call(sushy.BOOT_SOURCE_TARGET_PXE,
+ enabled=sushy.BOOT_SOURCE_ENABLED_ONCE),
+ ])
+ mock_get_system.assert_called_once_with(task.node)
+
+ task.node.refresh()
+ self.assertEqual(
+ sushy.BOOT_SOURCE_TARGET_PXE,
+ task.node.driver_internal_info['redfish_boot_device'])
+
+ def test_restore_boot_device(self):
+ fake_system = mock.Mock()
+ with task_manager.acquire(self.context, self.node.uuid,
+ shared=False) as task:
+ task.node.driver_internal_info['redfish_boot_device'] = (
+ sushy.BOOT_SOURCE_TARGET_HDD
+ )
+
+ task.driver.management.restore_boot_device(task, fake_system)
+
+ fake_system.set_system_boot_options.assert_called_once_with(
+ sushy.BOOT_SOURCE_TARGET_HDD,
+ enabled=sushy.BOOT_SOURCE_ENABLED_ONCE)
+ # The stored boot device is kept intact
+ self.assertEqual(
+ sushy.BOOT_SOURCE_TARGET_HDD,
+ task.node.driver_internal_info['redfish_boot_device'])
+
+ def test_restore_boot_device_noop(self):
+ fake_system = mock.Mock()
+ with task_manager.acquire(self.context, self.node.uuid,
+ shared=False) as task:
+ task.driver.management.restore_boot_device(task, fake_system)
+
+ self.assertFalse(fake_system.set_system_boot_options.called)
+
+ @mock.patch.object(redfish_mgmt.LOG, 'warning', autospec=True)
+ def test_restore_boot_device_failure(self, mock_log):
+ fake_system = mock.Mock()
+ fake_system.set_system_boot_options.side_effect = (
+ sushy.exceptions.SushyError()
+ )
+ with task_manager.acquire(self.context, self.node.uuid,
+ shared=False) as task:
+ task.node.driver_internal_info['redfish_boot_device'] = (
+ sushy.BOOT_SOURCE_TARGET_HDD
+ )
+
+ task.driver.management.restore_boot_device(task, fake_system)
+
+ fake_system.set_system_boot_options.assert_called_once_with(
+ sushy.BOOT_SOURCE_TARGET_HDD,
+ enabled=sushy.BOOT_SOURCE_ENABLED_ONCE)
+ self.assertTrue(mock_log.called)
+ # The stored boot device is kept intact
+ self.assertEqual(
+ sushy.BOOT_SOURCE_TARGET_HDD,
+ task.node.driver_internal_info['redfish_boot_device'])
@mock.patch.object(redfish_utils, 'get_system', autospec=True)
def test_get_boot_device(self, mock_get_system):
diff --git a/ironic/tests/unit/drivers/modules/redfish/test_power.py b/ironic/tests/unit/drivers/modules/redfish/test_power.py
index 031ca42f1..bbbd57184 100644
--- a/ironic/tests/unit/drivers/modules/redfish/test_power.py
+++ b/ironic/tests/unit/drivers/modules/redfish/test_power.py
@@ -19,6 +19,7 @@ from oslo_utils import importutils
from ironic.common import exception
from ironic.common import states
from ironic.conductor import task_manager
+from ironic.drivers.modules.redfish import management as redfish_mgmt
from ironic.drivers.modules.redfish import power as redfish_power
from ironic.drivers.modules.redfish import utils as redfish_utils
from ironic.tests.unit.db import base as db_base
@@ -82,19 +83,21 @@ class RedfishPowerTestCase(db_base.DbTestCase):
mock_get_system.assert_called_once_with(task.node)
mock_get_system.reset_mock()
+ @mock.patch.object(redfish_mgmt.RedfishManagement, 'restore_boot_device',
+ autospec=True)
@mock.patch.object(redfish_utils, 'get_system', autospec=True)
- def test_set_power_state(self, mock_get_system):
+ def test_set_power_state(self, mock_get_system, mock_restore_bootdev):
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
expected_values = [
- (states.POWER_ON, sushy.RESET_ON),
- (states.POWER_OFF, sushy.RESET_FORCE_OFF),
- (states.REBOOT, sushy.RESET_FORCE_RESTART),
- (states.SOFT_REBOOT, sushy.RESET_GRACEFUL_RESTART),
- (states.SOFT_POWER_OFF, sushy.RESET_GRACEFUL_SHUTDOWN)
+ (states.POWER_ON, sushy.RESET_ON, True),
+ (states.POWER_OFF, sushy.RESET_FORCE_OFF, False),
+ (states.REBOOT, sushy.RESET_FORCE_RESTART, True),
+ (states.SOFT_REBOOT, sushy.RESET_GRACEFUL_RESTART, True),
+ (states.SOFT_POWER_OFF, sushy.RESET_GRACEFUL_SHUTDOWN, False)
]
- for target, expected in expected_values:
+ for target, expected, restore_bootdev in expected_values:
if target in (states.POWER_OFF, states.SOFT_POWER_OFF):
final = sushy.SYSTEM_POWER_STATE_OFF
transient = sushy.SYSTEM_POWER_STATE_ON
@@ -113,9 +116,15 @@ class RedfishPowerTestCase(db_base.DbTestCase):
system_result[0].reset_system.assert_called_once_with(expected)
mock_get_system.assert_called_with(task.node)
self.assertEqual(4, mock_get_system.call_count)
+ if restore_bootdev:
+ mock_restore_bootdev.assert_called_once_with(
+ task.driver.management, task, system_result[0])
+ else:
+ self.assertFalse(mock_restore_bootdev.called)
# Reset mocks
mock_get_system.reset_mock()
+ mock_restore_bootdev.reset_mock()
@mock.patch.object(redfish_utils, 'get_system', autospec=True)
def test_set_power_state_not_reached(self, mock_get_system):
@@ -164,8 +173,11 @@ class RedfishPowerTestCase(db_base.DbTestCase):
sushy.RESET_ON)
mock_get_system.assert_called_once_with(task.node)
+ @mock.patch.object(redfish_mgmt.RedfishManagement, 'restore_boot_device',
+ autospec=True)
@mock.patch.object(redfish_utils, 'get_system', autospec=True)
- def test_reboot_from_power_off(self, mock_get_system):
+ def test_reboot_from_power_off(self, mock_get_system,
+ mock_restore_bootdev):
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
system_result = [
@@ -185,9 +197,13 @@ class RedfishPowerTestCase(db_base.DbTestCase):
sushy.RESET_ON)
mock_get_system.assert_called_with(task.node)
self.assertEqual(3, mock_get_system.call_count)
+ mock_restore_bootdev.assert_called_once_with(
+ task.driver.management, task, system_result[0])
+ @mock.patch.object(redfish_mgmt.RedfishManagement, 'restore_boot_device',
+ autospec=True)
@mock.patch.object(redfish_utils, 'get_system', autospec=True)
- def test_reboot_from_power_on(self, mock_get_system):
+ def test_reboot_from_power_on(self, mock_get_system, mock_restore_bootdev):
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
system_result = [
@@ -209,6 +225,8 @@ class RedfishPowerTestCase(db_base.DbTestCase):
])
mock_get_system.assert_called_with(task.node)
self.assertEqual(3, mock_get_system.call_count)
+ mock_restore_bootdev.assert_called_once_with(
+ task.driver.management, task, system_result[0])
@mock.patch.object(redfish_utils, 'get_system', autospec=True)
def test_reboot_not_reached(self, mock_get_system):
diff --git a/releasenotes/notes/redfish-sadness-6e2a37b3f45ef1aa.yaml b/releasenotes/notes/redfish-sadness-6e2a37b3f45ef1aa.yaml
new file mode 100644
index 000000000..a8cb499be
--- /dev/null
+++ b/releasenotes/notes/redfish-sadness-6e2a37b3f45ef1aa.yaml
@@ -0,0 +1,18 @@
+---
+fixes:
+ - |
+ Provides a workaround for hardware that does not support persistent boot
+ device setting with the ``redfish`` hardware type. When such situation is
+ detected, ironic will fall back to one-time boot device setting, restoring
+ it on every reboot.
+issues:
+ - |
+ Some redfish-enabled hardware is known not to support persistent boot
+ device setting that is used by the Bare Metal service for deployed
+ instances. The ``redfish`` hardware type tries to work around this problem,
+ but rebooting such an instance in-band may cause it to boot incorrectly.
+ A predictable boot order should be configured in the node's boot firmware
+ to avoid issues and at least metadata cleaning must be enabled.
+ See `this mailing list thread
+ <http://lists.openstack.org/pipermail/openstack-discuss/2020-April/014543.html>`_
+ for technical details.