summaryrefslogtreecommitdiff
path: root/ironic/drivers/modules/redfish/management.py
diff options
context:
space:
mode:
Diffstat (limited to 'ironic/drivers/modules/redfish/management.py')
-rw-r--r--ironic/drivers/modules/redfish/management.py337
1 files changed, 337 insertions, 0 deletions
diff --git a/ironic/drivers/modules/redfish/management.py b/ironic/drivers/modules/redfish/management.py
index 22ef03b49..e4fec1a2e 100644
--- a/ironic/drivers/modules/redfish/management.py
+++ b/ironic/drivers/modules/redfish/management.py
@@ -15,8 +15,11 @@
import collections
+from futurist import periodics
+from ironic_lib import metrics_utils
from oslo_log import log
from oslo_utils import importutils
+from oslo_utils import timeutils
from ironic.common import boot_devices
from ironic.common import boot_modes
@@ -24,12 +27,17 @@ from ironic.common import components
from ironic.common import exception
from ironic.common.i18n import _
from ironic.common import indicator_states
+from ironic.common import states
from ironic.common import utils
from ironic.conductor import task_manager
+from ironic.conductor import utils as manager_utils
+from ironic.conf import CONF
from ironic.drivers import base
+from ironic.drivers.modules import deploy_utils
from ironic.drivers.modules.redfish import utils as redfish_utils
LOG = log.getLogger(__name__)
+METRICS = metrics_utils.get_metrics_logger(__name__)
sushy = importutils.try_import('sushy')
@@ -663,3 +671,332 @@ class RedfishManagement(base.ManagementInterface):
"node %(uuid)s") % {'indicator': indicator,
'component': component,
'uuid': task.node.uuid})
+
+ @METRICS.timer('RedfishManagement.update_firmware')
+ @base.clean_step(priority=0, abortable=False, argsinfo={
+ 'firmware_images': {
+ 'description': (
+ 'A list of firmware images to apply.'
+ ),
+ 'required': True
+ }})
+ def update_firmware(self, task, firmware_images):
+ """Updates the firmware on the node.
+
+ :param task: a TaskManager instance containing the node to act on.
+ :param firmware_images: A list of firmware images are to apply.
+ :returns: None if it is completed.
+ :raises: RedfishError on an error from the Sushy library.
+ """
+ node = task.node
+
+ LOG.debug('Updating firmware on node %(node_uuid)s with firmware '
+ '%(firmware_images)s',
+ {'node_uuid': node.uuid,
+ 'firmware_images': firmware_images})
+
+ update_service = redfish_utils.get_update_service(task.node)
+
+ # The cleaning infrastructure has an exclusive lock on the node, so
+ # there is no need to get one here.
+ self._apply_firmware_update(node, update_service, firmware_images)
+
+ # set_async_step_flags calls node.save()
+ deploy_utils.set_async_step_flags(
+ node,
+ reboot=True,
+ skip_current_step=True,
+ polling=True)
+
+ manager_utils.node_power_action(task, states.REBOOT)
+
+ return deploy_utils.get_async_step_return_state(task.node)
+
+ def _apply_firmware_update(self, node, update_service, firmware_updates):
+ """Applies the next firmware update to the node
+
+ Applies the first firmware update in the firmware_updates list to
+ the node.
+
+ Note that the caller must have an exclusive lock on the node and
+ the caller must ensure node.save() is called after making this
+ call.
+
+ :param node: the node to apply the next update to
+ :param update_service: the sushy firmware update service
+ :param firmware_updates: the remaining firmware updates to apply
+ """
+
+ firmware_update = firmware_updates[0]
+ firmware_url = firmware_update['url']
+
+ LOG.debug('Applying firmware %(firmware_image)s to node '
+ '%(node_uuid)s',
+ {'firmware_image': firmware_url,
+ 'node_uuid': node.uuid})
+
+ task_monitor = update_service.simple_update(firmware_url)
+
+ driver_internal_info = node.driver_internal_info
+ firmware_update['task_monitor'] = task_monitor.task_monitor
+ driver_internal_info['firmware_updates'] = firmware_updates
+ node.driver_internal_info = driver_internal_info
+
+ def _continue_firmware_updates(self, task, update_service,
+ firmware_updates):
+ """Continues processing the firmware updates
+
+ Continues to process the firmware updates on the node.
+
+ Note that the caller must have an exclusive lock on the node.
+
+ :param task: a TaskManager instance containing the node to act on.
+ :param update_service: the sushy firmware update service
+ :param firmware_updates: the remaining firmware updates to apply
+ """
+
+ node = task.node
+ firmware_update = firmware_updates[0]
+ wait_interval = firmware_update.get('wait')
+ if wait_interval:
+ time_now = str(timeutils.utcnow().isoformat())
+ firmware_update['wait_start_time'] = time_now
+
+ LOG.debug('Waiting at %(time)s for %(seconds)s seconds after '
+ 'firmware update %(firmware_image)s on node %(node)s',
+ {'time': time_now,
+ 'seconds': wait_interval,
+ 'firmware_image': firmware_update['url'],
+ 'node': node.uuid})
+
+ driver_internal_info = node.driver_internal_info
+ driver_internal_info['firmware_updates'] = firmware_updates
+ node.driver_internal_info = driver_internal_info
+ node.save()
+ return
+
+ if len(firmware_updates) == 1:
+ self._clear_firmware_updates(node)
+
+ LOG.info('Firmware updates completed for node %(node)s',
+ {'node': node.uuid})
+
+ manager_utils.notify_conductor_resume_clean(task)
+ else:
+ firmware_updates.pop(0)
+ self._apply_firmware_update(node,
+ update_service,
+ firmware_updates)
+ node.save()
+ manager_utils.node_power_action(task, states.REBOOT)
+
+ def _clear_firmware_updates(self, node):
+ """Clears firmware updates from driver_internal_info
+
+ Note that the caller must have an exclusive lock on the node.
+
+ :param node: the node to clear the firmware updates from
+ """
+ driver_internal_info = node.driver_internal_info
+ driver_internal_info.pop('firmware_updates', None)
+ node.driver_internal_info = driver_internal_info
+ node.save()
+
+ @METRICS.timer('RedfishManagement._query_firmware_update_failed')
+ @periodics.periodic(
+ spacing=CONF.redfish.firmware_update_fail_interval,
+ enabled=CONF.redfish.firmware_update_fail_interval > 0)
+ def _query_firmware_update_failed(self, manager, context):
+ """Periodic job to check for failed firmware updates."""
+
+ filters = {'reserved': False, 'provision_state': states.CLEANFAIL,
+ 'maintenance': True}
+
+ fields = ['driver_internal_info']
+
+ node_list = manager.iter_nodes(fields=fields, filters=filters)
+ for (node_uuid, driver, conductor_group,
+ driver_internal_info) in node_list:
+ try:
+ lock_purpose = 'checking async firmware update failed.'
+ with task_manager.acquire(context, node_uuid,
+ purpose=lock_purpose,
+ shared=True) as task:
+ if not isinstance(task.driver.management,
+ RedfishManagement):
+ continue
+
+ firmware_updates = driver_internal_info.get(
+ 'firmware_updates')
+ if not firmware_updates:
+ continue
+
+ node = task.node
+
+ # A firmware update failed. Discard any remaining firmware
+ # updates so when the user takes the node out of
+ # maintenance mode, pending firmware updates do not
+ # automatically continue.
+ LOG.warning('Firmware update failed for node %(node)s. '
+ 'Discarding remaining firmware updates.',
+ {'node': node.uuid})
+
+ task.upgrade_lock()
+ self._clear_firmware_updates(node)
+
+ except exception.NodeNotFound:
+ LOG.info('During _query_firmware_update_failed, node '
+ '%(node)s was not found and presumed deleted by '
+ 'another process.', {'node': node_uuid})
+ except exception.NodeLocked:
+ LOG.info('During _query_firmware_update_failed, node '
+ '%(node)s was already locked by another process. '
+ 'Skip.', {'node': node_uuid})
+
+ @METRICS.timer('RedfishManagement._query_firmware_update_status')
+ @periodics.periodic(
+ spacing=CONF.redfish.firmware_update_status_interval,
+ enabled=CONF.redfish.firmware_update_status_interval > 0)
+ def _query_firmware_update_status(self, manager, context):
+ """Periodic job to check firmware update tasks."""
+
+ filters = {'reserved': False, 'provision_state': states.CLEANWAIT}
+ fields = ['driver_internal_info']
+
+ node_list = manager.iter_nodes(fields=fields, filters=filters)
+ for (node_uuid, driver, conductor_group,
+ driver_internal_info) in node_list:
+ try:
+ lock_purpose = 'checking async firmware update tasks.'
+ with task_manager.acquire(context, node_uuid,
+ purpose=lock_purpose,
+ shared=True) as task:
+ if not isinstance(task.driver.management,
+ RedfishManagement):
+ continue
+
+ firmware_updates = driver_internal_info.get(
+ 'firmware_updates')
+ if not firmware_updates:
+ continue
+
+ self._check_node_firmware_update(task)
+
+ except exception.NodeNotFound:
+ LOG.info('During _query_firmware_update_status, node '
+ '%(node)s was not found and presumed deleted by '
+ 'another process.', {'node': node_uuid})
+ except exception.NodeLocked:
+ LOG.info('During _query_firmware_update_status, node '
+ '%(node)s was already locked by another process. '
+ 'Skip.', {'node': node_uuid})
+
+ @METRICS.timer('RedfishManagement._check_node_firmware_update')
+ def _check_node_firmware_update(self, task):
+ """Check the progress of running firmware update on a node."""
+
+ node = task.node
+
+ firmware_updates = node.driver_internal_info['firmware_updates']
+ current_update = firmware_updates[0]
+
+ try:
+ update_service = redfish_utils.get_update_service(node)
+ except exception.RedfishConnectionError as e:
+ # If the BMC firmware is being updated, the BMC will be
+ # unavailable for some amount of time.
+ LOG.warning('Unable to communicate with firmware update service '
+ 'on node %(node)s. Will try again on the next poll. '
+ 'Error: %(error)s',
+ {'node': node.uuid,
+ 'error': e})
+ return
+
+ wait_start_time = current_update.get('wait_start_time')
+ if wait_start_time:
+ wait_start = timeutils.parse_isotime(wait_start_time)
+
+ elapsed_time = timeutils.utcnow(True) - wait_start
+ if elapsed_time.seconds >= current_update['wait']:
+ LOG.debug('Finished waiting after firmware update '
+ '%(firmware_image)s on node %(node)s. '
+ 'Elapsed time: %(seconds)s seconds',
+ {'firmware_image': current_update['url'],
+ 'node': node.uuid,
+ 'seconds': elapsed_time.seconds})
+ current_update.pop('wait', None)
+ current_update.pop('wait_start_time', None)
+
+ task.upgrade_lock()
+ self._continue_firmware_updates(task,
+ update_service,
+ firmware_updates)
+ else:
+ LOG.debug('Continuing to wait after firmware update '
+ '%(firmware_image)s on node %(node)s. '
+ 'Elapsed time: %(seconds)s seconds',
+ {'firmware_image': current_update['url'],
+ 'node': node.uuid,
+ 'seconds': elapsed_time.seconds})
+
+ return
+
+ try:
+ task_monitor = update_service.get_task_monitor(
+ current_update['task_monitor'])
+ except sushy.exceptions.ResourceNotFoundError:
+ # The BMC deleted the Task before we could query it
+ LOG.warning('Firmware update completed for node %(node)s, '
+ 'firmware %(firmware_image)s, but success of the '
+ 'update is unknown. Assuming update was successful.',
+ {'node': node.uuid,
+ 'firmware_image': current_update['url']})
+ task.upgrade_lock()
+ self._continue_firmware_updates(task,
+ update_service,
+ firmware_updates)
+ return
+
+ if not task_monitor.is_processing:
+ # The last response does not necessarily contain a Task,
+ # so get it
+ sushy_task = task_monitor.get_task()
+
+ # Only parse the messages if the BMC did not return parsed
+ # messages
+ messages = []
+ if not sushy_task.messages[0].message:
+ sushy_task.parse_messages()
+
+ messages = [m.message for m in sushy_task.messages]
+
+ if (sushy_task.task_state == sushy.TASK_STATE_COMPLETED
+ and sushy_task.task_status in
+ [sushy.HEALTH_OK, sushy.HEALTH_WARNING]):
+ LOG.info('Firmware update succeeded for node %(node)s, '
+ 'firmware %(firmware_image)s: %(messages)s',
+ {'node': node.uuid,
+ 'firmware_image': current_update['url'],
+ 'messages': ", ".join(messages)})
+
+ task.upgrade_lock()
+ self._continue_firmware_updates(task,
+ update_service,
+ firmware_updates)
+ else:
+ error_msg = (_('Firmware update failed for node %(node)s, '
+ 'firmware %(firmware_image)s. '
+ 'Error: %(errors)s') %
+ {'node': node.uuid,
+ 'firmware_image': current_update['url'],
+ 'errors': ", ".join(messages)})
+ LOG.error(error_msg)
+
+ task.upgrade_lock()
+ self._clear_firmware_updates(node)
+ manager_utils.cleaning_error_handler(task, error_msg)
+ else:
+ LOG.debug('Firmware update in progress for node %(node)s, '
+ 'firmware %(firmware_image)s.',
+ {'node': node.uuid,
+ 'firmware_image': current_update['url']})