summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacob Anders <janders@redhat.com>2021-07-08 19:49:49 +1000
committerJacob Anders <janders@redhat.com>2021-09-30 20:46:17 +1000
commitb385d9ae5bda45d683e51d0140be7c0b2c1d6aca (patch)
tree814a338c13b667431596cc474006b3ae7ed8b98e
parent62439d5633ced36f949b9c0bbf340f8b791613d6 (diff)
downloadironic-b385d9ae5bda45d683e51d0140be7c0b2c1d6aca.tar.gz
Add support for verify steps
This change adds support for verify steps in Ironic. Verify steps allow executing actions on transition from "verifying" to "managable" state and can perform actions such as cleaning BMC job queue or resetting the BMC on supported platforms. Verify steps are similar to deploy and clean steps, just simpler. Story: 2009025 Task: 42751 Change-Id: Iee27199a0315b8609e629bac272998c28274802b
-rw-r--r--ironic/common/exception.py4
-rw-r--r--ironic/conductor/steps.py59
-rw-r--r--ironic/conductor/utils.py26
-rw-r--r--ironic/conductor/verify.py36
-rw-r--r--ironic/conf/conductor.py10
-rw-r--r--ironic/drivers/base.py89
-rw-r--r--ironic/tests/unit/conductor/test_steps.py161
-rw-r--r--releasenotes/notes/add-verify-steps-support-2b34a74e86f89cb4.yaml6
8 files changed, 389 insertions, 2 deletions
diff --git a/ironic/common/exception.py b/ironic/common/exception.py
index b3dcdc673..1f19d6e6b 100644
--- a/ironic/common/exception.py
+++ b/ironic/common/exception.py
@@ -830,3 +830,7 @@ class NodeHistoryNotFound(NotFound):
class IncorrectConfiguration(IronicException):
_msg_fmt = _("Supplied configuration is incorrect and must be fixed. "
"Error: %(error)s")
+
+
+class NodeVerifyFailure(IronicException):
+ _msg_fmt = _("Failed to verify node %(node)s: %(reason)s")
diff --git a/ironic/conductor/steps.py b/ironic/conductor/steps.py
index 2a6636aca..05e07ced0 100644
--- a/ironic/conductor/steps.py
+++ b/ironic/conductor/steps.py
@@ -50,6 +50,25 @@ DEPLOYING_INTERFACE_PRIORITY = {
'raid': 1,
}
+VERIFYING_INTERFACE_PRIORITY = {
+ # When two verify steps have the same priority, their order is determined
+ # by which interface is implementing the verify step. The verifying step of
+ # the interface with the highest value here, will be executed first in
+ # that case.
+ 'power': 12,
+ 'management': 11,
+ 'boot': 8,
+ 'inspect': 10,
+ 'deploy': 9,
+ 'bios': 7,
+ 'raid': 6,
+ 'vendor': 5,
+ 'network': 4,
+ 'storage': 3,
+ 'console': 2,
+ 'rescue': 1,
+}
+
def _clean_step_key(step):
"""Sort by priority, then interface priority in event of tie.
@@ -69,6 +88,15 @@ def _deploy_step_key(step):
DEPLOYING_INTERFACE_PRIORITY[step.get('interface')])
+def _verify_step_key(step):
+ """Sort by priority, then interface priority in event of tie.
+
+ :param step: verify step dict to get priority for.
+ """
+ return (step.get('priority'),
+ VERIFYING_INTERFACE_PRIORITY[step.get('interface')])
+
+
def _sorted_steps(steps, sort_step_key):
"""Return a sorted list of steps.
@@ -194,6 +222,37 @@ def _get_deployment_steps(task, enabled=False, sort=True):
enabled=enabled, sort_step_key=sort_key)
+def _get_verify_steps(task, enabled=False, sort=True):
+ """Get verify steps for task.node.
+
+ :param task: A TaskManager object
+ :param enabled: If True, returns only enabled (priority > 0) steps. If
+ False, returns all verify steps.
+ :param sort: If True, the steps are sorted from highest priority to lowest
+ priority. For steps having the same priority, they are sorted from
+ highest interface priority to lowest.
+ :raises: NodeVerifyFailure if there was a problem getting the
+ verify steps.
+ :returns: A list of verify step dictionaries
+ """
+ sort_key = _verify_step_key if sort else None
+ if CONF.conductor.verify_step_priority_override:
+ vsp_override = {}
+ for element in CONF.conductor.verify_step_priority_override:
+ vsp_override.update(element)
+
+ verify_steps = _get_steps(task, VERIFYING_INTERFACE_PRIORITY,
+ 'get_verify_steps', enabled=enabled,
+ sort_step_key=sort_key,
+ prio_overrides=vsp_override)
+
+ else:
+ verify_steps = _get_steps(task, VERIFYING_INTERFACE_PRIORITY,
+ 'get_verify_steps', enabled=enabled,
+ sort_step_key=sort_key)
+ return verify_steps
+
+
def set_node_cleaning_steps(task, disable_ramdisk=False):
"""Set up the node with clean step information for cleaning.
diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py
index 59b60a5da..fa63ba094 100644
--- a/ironic/conductor/utils.py
+++ b/ironic/conductor/utils.py
@@ -631,6 +631,32 @@ def fail_on_error(error_callback, msg, *error_args, **error_kwargs):
return wrapper
+def verifying_error_handler(task, logmsg, errmsg=None, traceback=False):
+
+ """Handle errors during verification steps
+
+ :param task: the task
+ :param logmsg: message to be logged
+ :param errmsg: message for the user
+ :param traceback: Boolean; True to log a traceback
+ """
+ errmsg = errmsg or logmsg
+ node = task.node
+ LOG.error(logmsg, exc_info=traceback)
+ node_history_record(node, event=errmsg, event_type=states.VERIFYING,
+ error=True)
+ node.save()
+
+ node.refresh()
+ if node.provision_state in (
+ states.VERIFYING):
+ # Clear verifying step; we leave the list of verify steps
+ # in node.driver_internal_info for debugging purposes.
+ node.verify_step = {}
+
+ node.save()
+
+
@task_manager.require_exclusive_lock
def abort_on_conductor_take_over(task):
"""Set node's state when a task was aborted due to conductor take over.
diff --git a/ironic/conductor/verify.py b/ironic/conductor/verify.py
index b611e7e4e..ee1b38784 100644
--- a/ironic/conductor/verify.py
+++ b/ironic/conductor/verify.py
@@ -18,6 +18,7 @@ from ironic.common import exception
from ironic.common.i18n import _
from ironic.common import states
from ironic.conductor import notification_utils as notify_utils
+from ironic.conductor import steps as conductor_steps
from ironic.conductor import task_manager
from ironic.conductor import utils
@@ -49,6 +50,37 @@ def do_node_verify(task):
{'node': node.uuid, 'msg': e})
log_traceback = not isinstance(e, exception.IronicException)
LOG.error(error, exc_info=log_traceback)
+
+ verify_steps = conductor_steps._get_verify_steps(task,
+ enabled=True,
+ sort=True)
+ for step in verify_steps:
+ interface = getattr(task.driver, step.get('interface'))
+ LOG.info('Executing %(step)s on node %(node)s',
+ {'step': step, 'node': node.uuid})
+ try:
+ result = interface.execute_verify_step(task, step)
+ except Exception as e:
+ error = ('Node %(node)s failed verify step %(step)s '
+ 'with unexpected error: %(err)s' %
+ {'node': node.uuid, 'step': node.verify_step,
+ 'err': e})
+ utils.verifying_error_handler(
+ task, error,
+ _("Failed to verify. Exception: %s") % e,
+ traceback=True)
+ if result is not None:
+ # NOTE(rloo): This is an internal/dev error; shouldn't
+ # happen.
+ error = (_('While executing verify step %(step)s on '
+ 'node %(node)s, step returned unexpected '
+ 'state: %(val)s')
+ % {'step': step, 'node': node.uuid,
+ 'val': result})
+ utils.verifying_error_handler(
+ task, error,
+ _("Failed to verify: %s") % node.deploy_step)
+
if error is None:
# NOTE(janders) this can eventually move to driver-specific
# verify steps, will leave this for a follow-up change.
@@ -67,6 +99,10 @@ def do_node_verify(task):
else:
task.process_event('done')
+
+ LOG.info('Successfully verified node %(node)s',
+ {'node': node.uuid})
+
else:
utils.node_history_record(task.node, event=error,
event_type=states.VERIFY,
diff --git a/ironic/conf/conductor.py b/ironic/conf/conductor.py
index 61d6b3247..a2d168fd0 100644
--- a/ironic/conf/conductor.py
+++ b/ironic/conf/conductor.py
@@ -334,6 +334,16 @@ opts = [
'node_history_max_entries setting as users of '
'this setting are anticipated to need to retain '
'history by policy.')),
+ cfg.MultiOpt('verify_step_priority_override',
+ item_type=types.Dict(),
+ default={},
+ mutable=True,
+ help=_('Priority to run automated verify steps '
+ 'provided in interface.step_name:priority format,'
+ 'e.g. management.clear_job_queue:123. The option can '
+ 'be specified multiple times to define priorities '
+ 'for multiple steps. If set to 0, this specific step '
+ 'will not run during cleaning. ')),
]
diff --git a/ironic/drivers/base.py b/ironic/drivers/base.py
index 2b1700e38..bdd017b91 100644
--- a/ironic/drivers/base.py
+++ b/ironic/drivers/base.py
@@ -226,8 +226,9 @@ class BaseInterface(object, metaclass=abc.ABCMeta):
"""
def __new__(cls, *args, **kwargs):
- # Get the list of clean steps and deploy steps, when the interface is
- # initialized by the conductor. We use __new__ instead of __init___
+ # Get the list of clean steps, deploy steps and verify steps when the
+ # interface is initialized by the conductor.
+ # We use __new__ instead of __init___
# to avoid breaking backwards compatibility with all the drivers.
# We want to return all steps, regardless of priority.
@@ -238,6 +239,7 @@ class BaseInterface(object, metaclass=abc.ABCMeta):
instance = super_new(cls, *args, **kwargs)
instance.clean_steps = []
instance.deploy_steps = []
+ instance.verify_steps = []
for n, method in inspect.getmembers(instance, inspect.ismethod):
if getattr(method, '_is_clean_step', False):
# Create a CleanStep to represent this method
@@ -256,6 +258,13 @@ class BaseInterface(object, metaclass=abc.ABCMeta):
'argsinfo': method._deploy_step_argsinfo,
'interface': instance.interface_type}
instance.deploy_steps.append(step)
+ if getattr(method, '_is_verify_step', False):
+ # Create a VerifyStep to represent this method
+ step = {'step': method.__name__,
+ 'priority': method._verify_step_priority,
+ 'interface': instance.interface_type}
+ instance.verify_steps.append(step)
+
if instance.clean_steps:
LOG.debug('Found clean steps %(steps)s for interface '
'%(interface)s',
@@ -266,6 +275,12 @@ class BaseInterface(object, metaclass=abc.ABCMeta):
'%(interface)s',
{'steps': instance.deploy_steps,
'interface': instance.interface_type})
+ if instance.verify_steps:
+ LOG.debug('Found verify steps %(steps)s for interface '
+ '%(interface)s',
+ {'steps': instance.deploy_steps,
+ 'interface': instance.interface_type})
+
return instance
def _execute_step(self, task, step):
@@ -360,6 +375,35 @@ class BaseInterface(object, metaclass=abc.ABCMeta):
"""
return self._execute_step(task, step)
+ def get_verify_steps(self, task):
+ """Get a list of (enabled and disabled) verify steps for the interface.
+
+ This function will return all verify steps (both enabled and disabled)
+ for the interface, in an unordered list.
+
+ :param task: A TaskManager object, useful for interfaces overriding
+ this function
+ :raises NodeVerifyFailure: if there is a problem getting the steps
+ from the driver. For example, when a node (using an agent driver)
+ has just been enrolled and the agent isn't alive yet to be queried
+ for the available verify steps.
+ :returns: A list of deploy step dictionaries
+ """
+ return self.verify_steps
+
+ def execute_verify_step(self, task, step):
+ """Execute the verify step on task.node.
+
+ A verify step must take a single positional argument: a TaskManager
+ object. It does not take keyword variable arguments.
+
+ :param task: A TaskManager object
+ :param step: The deploy step dictionary representing the step to
+ execute
+ :returns: None if this method has completed synchronously
+ """
+ return self._execute_step(task, step)
+
class DeployInterface(BaseInterface):
"""Interface for deploy-related actions."""
@@ -1868,3 +1912,44 @@ def deploy_step(priority, argsinfo=None):
func._deploy_step_argsinfo = argsinfo
return func
return decorator
+
+
+def verify_step(priority):
+ """Decorator for verify steps.
+
+ Only steps with priorities greater than 0 are used. These steps are
+ ordered by priority from highest value to lowest value.
+ For steps with the same priority, they are ordered by driver
+ interface priority (see conductor.steps.VERIFY_INTERFACE_PRIORITY).
+ execute_verify_step() will be called on each step.
+
+ Decorated verify steps must take as the only positional argument, a
+ TaskManager object.
+
+ Verify steps are synchronous and should return `None` when finished,
+ and the conductor will continue on to the next step. While the verify step
+ is executing, the node will be in `states.VERIFYING` provision state.
+
+ Examples::
+
+ class MyInterface(base.BaseInterface):
+ @base.verify_step(priority=100)
+ def example_verifying(self, task):
+ # do some verifying
+
+ :param priority: an integer (>=0) priority; used for determining the order
+ in which the step is run in the verification process.
+
+ :raises InvalidParameterValue: if any of the arguments are invalid
+ """
+ def decorator(func):
+ func._is_verify_step = True
+ if isinstance(priority, int) and priority >= 0:
+ func._verify_step_priority = priority
+ else:
+ raise exception.InvalidParameterValue(
+ _('"priority" must be an integer value >= 0, instead of "%s"')
+ % priority)
+
+ return func
+ return decorator
diff --git a/ironic/tests/unit/conductor/test_steps.py b/ironic/tests/unit/conductor/test_steps.py
index 1cba2c7c0..d96670303 100644
--- a/ironic/tests/unit/conductor/test_steps.py
+++ b/ironic/tests/unit/conductor/test_steps.py
@@ -19,6 +19,7 @@ from ironic.common import exception
from ironic.common import states
from ironic.conductor import steps as conductor_steps
from ironic.conductor import task_manager
+from ironic.conductor import verify as verify_steps
from ironic import objects
from ironic.tests.unit.db import base as db_base
from ironic.tests.unit.db import utils as db_utils
@@ -1138,3 +1139,163 @@ class ValidateUserDeployStepsTestCase(db_base.DbTestCase):
result = conductor_steps._get_validated_user_deploy_steps(task)
self.assertEqual([], result)
mock_validated.assert_not_called()
+
+
+class NodeVerifyStepsTestCase(db_base.DbTestCase):
+ def setUp(self):
+ super(NodeVerifyStepsTestCase, self).setUp()
+
+ self.management_reset_idrac = {
+ 'step': 'reset_idrac', 'priority': 0,
+ 'interface': 'management'}
+ self.management_clear_job_queue = {
+ 'step': 'clear_job_queue', 'priority': 10,
+ 'interface': 'management'}
+ self.verify_steps = [self.management_clear_job_queue,
+ self.management_reset_idrac]
+ self.verify_steps_only_enabled = [self.management_clear_job_queue]
+
+ @mock.patch('ironic.drivers.modules.fake.FakeManagement.get_verify_steps',
+ autospec=True)
+ def test__get_verify_steps_priority_override_ok(self,
+ mock_verify_steps):
+ # Test verify_step_priority_override
+ cfg.CONF.set_override('verify_step_priority_override',
+ ["management.clear_job_queue:9"],
+ 'conductor')
+
+ node = obj_utils.create_test_node(
+ self.context, driver='fake-hardware',
+ provision_state=states.ENROLL,
+ target_provision_state=states.MANAGEABLE)
+
+ mock_verify_steps.return_value = [self.management_clear_job_queue]
+
+ with task_manager.acquire(
+ self.context, node.uuid, shared=True) as task:
+ steps = conductor_steps._get_verify_steps(task, enabled=True)
+
+ expected_step_priorities = [{'interface': 'management', 'priority': 9,
+ 'step': 'clear_job_queue'}]
+
+ self.assertEqual(expected_step_priorities, steps)
+
+ @mock.patch('ironic.drivers.modules.fake.FakeManagement.get_verify_steps',
+ autospec=True)
+ def test__get_verify_steps_priority_override_fail(self,
+ mock_verify_steps):
+ # Test verify_step_priority_override for failure
+ cfg.CONF.set_override('verify_step_priority_override',
+ ["management.clear_job_queue:9"],
+ 'conductor')
+
+ node = obj_utils.create_test_node(
+ self.context, driver='fake-hardware',
+ provision_state=states.ENROLL,
+ target_provision_state=states.MANAGEABLE)
+
+ mock_verify_steps.return_value = [self.management_clear_job_queue]
+
+ with task_manager.acquire(
+ self.context, node.uuid, shared=True) as task:
+ steps = conductor_steps._get_verify_steps(task, enabled=True)
+
+ expected_step_priorities = [{'interface': 'management', 'priority': 10,
+ 'step': 'clear_job_queue'}]
+
+ self.assertNotEqual(expected_step_priorities, steps)
+
+ @mock.patch('ironic.drivers.modules.fake.FakeManagement.get_verify_steps',
+ autospec=True)
+ def test__get_verify_steps_priority_override_off(self,
+ mock_verify_steps):
+ # Test disabling steps with verify_step_priority_override
+ cfg.CONF.set_override('verify_step_priority_override',
+ ["management.clear_job_queue:0"],
+ 'conductor')
+
+ node = obj_utils.create_test_node(
+ self.context, driver='fake-hardware',
+ provision_state=states.ENROLL,
+ target_provision_state=states.MANAGEABLE)
+
+ mock_verify_steps.return_value = [self.management_clear_job_queue]
+
+ with task_manager.acquire(
+ self.context, node.uuid, shared=True) as task:
+ steps = conductor_steps._get_verify_steps(task, enabled=True)
+
+ expected_step_priorities = []
+
+ self.assertEqual(expected_step_priorities, steps)
+
+ @mock.patch('ironic.drivers.modules.fake.FakeManagement.get_verify_steps',
+ autospec=True)
+ def test__get_verify_steps(self, mock_verify_steps):
+ node = obj_utils.create_test_node(
+ self.context, driver='fake-hardware',
+ provision_state=states.ENROLL,
+ target_provision_state=states.MANAGEABLE)
+
+ mock_verify_steps.return_value = [self.management_clear_job_queue,
+ self.management_reset_idrac]
+
+ with task_manager.acquire(
+ self.context, node.uuid, shared=False) as task:
+ steps = conductor_steps._get_verify_steps(task, enabled=False)
+
+ self.assertEqual(self.verify_steps, steps)
+
+ @mock.patch('ironic.drivers.modules.fake.FakeManagement.get_verify_steps',
+ autospec=True)
+ def test__get_verify_steps_unsorted(self, mock_verify_steps):
+ node = obj_utils.create_test_node(
+ self.context, driver='fake-hardware',
+ provision_state=states.ENROLL,
+ target_provision_state=states.MANAGEABLE)
+
+ mock_verify_steps.return_value = [self.management_clear_job_queue,
+ self.management_reset_idrac]
+
+ with task_manager.acquire(
+ self.context, node.uuid, shared=False) as task:
+ steps = conductor_steps._get_verify_steps(task, enabled=False,
+ sort=False)
+
+ self.assertEqual(self.verify_steps, steps)
+
+ @mock.patch('ironic.drivers.modules.fake.FakeManagement.get_verify_steps',
+ autospec=True)
+ def test__get_verify_steps_only_enabled(self, mock_verify_steps):
+ node = obj_utils.create_test_node(
+ self.context, driver='fake-hardware',
+ provision_state=states.ENROLL,
+ target_provision_state=states.MANAGEABLE)
+
+ mock_verify_steps.return_value = [self.management_clear_job_queue]
+
+ with task_manager.acquire(
+ self.context, node.uuid, shared=False) as task:
+ steps = conductor_steps._get_verify_steps(task, enabled=True)
+
+ self.assertEqual(self.verify_steps_only_enabled, steps)
+
+ @mock.patch('ironic.drivers.modules.fake.FakeManagement.'
+ 'execute_verify_step',
+ autospec=True)
+ @mock.patch.object(conductor_steps, '_get_verify_steps', autospec=True)
+ def test_execute_verify_step(self, mock_steps, mock_execute):
+ mock_steps.return_value = self.verify_steps
+
+ node = obj_utils.create_test_node(
+ self.context, driver='fake-hardware',
+ provision_state=states.VERIFYING,
+ target_provision_state=states.MANAGEABLE,
+ last_error=None,
+ clean_step=None)
+
+ with task_manager.acquire(
+ self.context, node.uuid, shared=False) as task:
+ verify_steps.do_node_verify(task)
+ node.refresh()
+ self.assertTrue(mock_execute.called)
diff --git a/releasenotes/notes/add-verify-steps-support-2b34a74e86f89cb4.yaml b/releasenotes/notes/add-verify-steps-support-2b34a74e86f89cb4.yaml
new file mode 100644
index 000000000..8f4711db9
--- /dev/null
+++ b/releasenotes/notes/add-verify-steps-support-2b34a74e86f89cb4.yaml
@@ -0,0 +1,6 @@
+---
+features:
+ - |
+ Adds support for verify steps - a mechanism for running optional,
+ actions pre-defined in the driver while the node is in transition from
+ enroll to managable state, prior to inspection.