diff options
author | Zuul <zuul@review.opendev.org> | 2020-04-23 14:17:39 +0000 |
---|---|---|
committer | Gerrit Code Review <review@openstack.org> | 2020-04-23 14:17:39 +0000 |
commit | 04649ce9555769e0288de1ab720c8acff475182c (patch) | |
tree | 8abed29821cd62dd82043ef686490a3474087f4f /ironic | |
parent | 687da97255364c9b41656d1cf353e3ce53304d25 (diff) | |
parent | ff2030ce98dbe470f8d51d308e184cc065d491e0 (diff) | |
download | ironic-04649ce9555769e0288de1ab720c8acff475182c.tar.gz |
Merge "Improve the command status checks in the agent's process_next_step"
Diffstat (limited to 'ironic')
-rw-r--r-- | ironic/conductor/steps.py | 6 | ||||
-rw-r--r-- | ironic/drivers/modules/agent_base.py | 52 | ||||
-rw-r--r-- | ironic/tests/unit/drivers/modules/test_agent_base.py | 15 |
3 files changed, 51 insertions, 22 deletions
diff --git a/ironic/conductor/steps.py b/ironic/conductor/steps.py index e67f6e3bb..3fb539e34 100644 --- a/ironic/conductor/steps.py +++ b/ironic/conductor/steps.py @@ -81,6 +81,12 @@ def _sorted_steps(steps, sort_step_key): return sorted(steps, key=sort_step_key, reverse=True) +def is_equivalent(step1, step2): + """Compare steps, ignoring their priority.""" + return (step1.get('interface') == step2.get('interface') + and step1.get('step') == step2.get('step')) + + def _get_steps(task, interfaces, get_method, enabled=False, sort_step_key=None): """Get steps for task.node. diff --git a/ironic/drivers/modules/agent_base.py b/ironic/drivers/modules/agent_base.py index be7a7dd73..6eb2312b4 100644 --- a/ironic/drivers/modules/agent_base.py +++ b/ironic/drivers/modules/agent_base.py @@ -29,6 +29,7 @@ from ironic.common import exception from ironic.common.i18n import _ from ironic.common import image_service from ironic.common import states +from ironic.common import utils from ironic.conductor import steps as conductor_steps from ironic.conductor import utils as manager_utils from ironic.conf import CONF @@ -213,6 +214,20 @@ def _post_step_reboot(task, step_type): task.node.save() +def _freshly_booted(commands, step_type): + """Check if the ramdisk has just started. + + On the very first boot we fetch the available steps, hence the only command + agent executed will be get_XXX_steps. For later reboots the list of + commands will be empty. + """ + return ( + not commands + or (len(commands) == 1 + and commands[0]['command_name'] == 'get_%s_steps' % step_type) + ) + + def _get_completed_command(task, commands, step_type): """Returns None or a completed clean/deploy command from the agent. @@ -220,8 +235,7 @@ def _get_completed_command(task, commands, step_type): :param commands: a set of command results from the agent, typically fetched with agent_client.get_commands_status(). """ - if not commands: - return + assert commands, 'BUG: _get_completed_command called with no commands' last_command = commands[-1] @@ -229,8 +243,8 @@ def _get_completed_command(task, commands, step_type): # catches race condition where execute_step is still # processing so the command hasn't started yet LOG.debug('Expected agent last command to be "execute_%(type)s_step" ' - 'for node %(node)s, instead got "%(command)s". Waiting ' - 'for next heartbeat.', + 'for node %(node)s, instead got "%(command)s". An out-of-' + 'band step may be running. Waiting for next heartbeat.', {'node': task.node.uuid, 'command': last_command['command_name'], 'type': step_type}) @@ -246,11 +260,14 @@ def _get_completed_command(task, commands, step_type): 'type': step_type.capitalize()}) return elif (last_command['command_status'] == 'SUCCEEDED' - and last_step != current_step): + and (not last_step + or not conductor_steps.is_equivalent(last_step, current_step))): # A previous step was running, the new command has not yet started. - LOG.debug('%(type)s step not yet started for node %(node)s: %(step)s', - {'step': last_step, 'node': task.node.uuid, - 'type': step_type.capitalize()}) + LOG.debug('%(type)s step %(step)s is not currently running for node ' + '%(node)s. Not yet started or an out-of-band step is in ' + 'progress. The last finished step is %(previous)s.', + {'step': current_step, 'node': task.node.uuid, + 'type': step_type.capitalize(), 'previous': last_step}) return else: return last_command @@ -807,22 +824,13 @@ class AgentDeployMixin(HeartbeatMixin): node = task.node agent_commands = self._client.get_commands_status(task.node) - if not agent_commands: + if _freshly_booted(agent_commands, step_type): field = ('cleaning_reboot' if step_type == 'clean' else 'deployment_reboot') - if task.node.driver_internal_info.get(field): - # Node finished a cleaning step that requested a reboot, and - # this is the first heartbeat after booting. Continue cleaning. - info = task.node.driver_internal_info - info.pop(field, None) - task.node.driver_internal_info = info - task.node.save() - manager_utils.notify_conductor_resume_operation(task, - step_type) - return - else: - # Agent has no commands whatsoever - return + utils.pop_node_nested_field( + task.node, 'driver_internal_info', field) + manager_utils.notify_conductor_resume_operation(task, step_type) + return current_step = (node.clean_step if step_type == 'clean' else node.deploy_step) diff --git a/ironic/tests/unit/drivers/modules/test_agent_base.py b/ironic/tests/unit/drivers/modules/test_agent_base.py index 30a0cb9dd..c4f4f722c 100644 --- a/ironic/tests/unit/drivers/modules/test_agent_base.py +++ b/ironic/tests/unit/drivers/modules/test_agent_base.py @@ -1806,6 +1806,21 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): self.deploy.continue_cleaning(task) self.assertFalse(notify_mock.called) + @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', + autospec=True) + @mock.patch.object(agent_client.AgentClient, 'get_commands_status', + autospec=True) + def test_continue_cleaning_no_step_running(self, status_mock, notify_mock): + status_mock.return_value = [{ + 'command_status': 'SUCCEEDED', + 'command_name': 'get_clean_steps', + 'command_result': [] + }] + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.deploy.continue_cleaning(task) + notify_mock.assert_called_once_with(task, 'clean') + @mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True) @mock.patch.object(agent_client.AgentClient, 'get_commands_status', autospec=True) |