summaryrefslogtreecommitdiff
path: root/ironic
diff options
context:
space:
mode:
authorZuul <zuul@review.opendev.org>2020-04-23 14:17:39 +0000
committerGerrit Code Review <review@openstack.org>2020-04-23 14:17:39 +0000
commit04649ce9555769e0288de1ab720c8acff475182c (patch)
tree8abed29821cd62dd82043ef686490a3474087f4f /ironic
parent687da97255364c9b41656d1cf353e3ce53304d25 (diff)
parentff2030ce98dbe470f8d51d308e184cc065d491e0 (diff)
downloadironic-04649ce9555769e0288de1ab720c8acff475182c.tar.gz
Merge "Improve the command status checks in the agent's process_next_step"
Diffstat (limited to 'ironic')
-rw-r--r--ironic/conductor/steps.py6
-rw-r--r--ironic/drivers/modules/agent_base.py52
-rw-r--r--ironic/tests/unit/drivers/modules/test_agent_base.py15
3 files changed, 51 insertions, 22 deletions
diff --git a/ironic/conductor/steps.py b/ironic/conductor/steps.py
index e67f6e3bb..3fb539e34 100644
--- a/ironic/conductor/steps.py
+++ b/ironic/conductor/steps.py
@@ -81,6 +81,12 @@ def _sorted_steps(steps, sort_step_key):
return sorted(steps, key=sort_step_key, reverse=True)
+def is_equivalent(step1, step2):
+ """Compare steps, ignoring their priority."""
+ return (step1.get('interface') == step2.get('interface')
+ and step1.get('step') == step2.get('step'))
+
+
def _get_steps(task, interfaces, get_method, enabled=False,
sort_step_key=None):
"""Get steps for task.node.
diff --git a/ironic/drivers/modules/agent_base.py b/ironic/drivers/modules/agent_base.py
index be7a7dd73..6eb2312b4 100644
--- a/ironic/drivers/modules/agent_base.py
+++ b/ironic/drivers/modules/agent_base.py
@@ -29,6 +29,7 @@ from ironic.common import exception
from ironic.common.i18n import _
from ironic.common import image_service
from ironic.common import states
+from ironic.common import utils
from ironic.conductor import steps as conductor_steps
from ironic.conductor import utils as manager_utils
from ironic.conf import CONF
@@ -213,6 +214,20 @@ def _post_step_reboot(task, step_type):
task.node.save()
+def _freshly_booted(commands, step_type):
+ """Check if the ramdisk has just started.
+
+ On the very first boot we fetch the available steps, hence the only command
+ agent executed will be get_XXX_steps. For later reboots the list of
+ commands will be empty.
+ """
+ return (
+ not commands
+ or (len(commands) == 1
+ and commands[0]['command_name'] == 'get_%s_steps' % step_type)
+ )
+
+
def _get_completed_command(task, commands, step_type):
"""Returns None or a completed clean/deploy command from the agent.
@@ -220,8 +235,7 @@ def _get_completed_command(task, commands, step_type):
:param commands: a set of command results from the agent, typically
fetched with agent_client.get_commands_status().
"""
- if not commands:
- return
+ assert commands, 'BUG: _get_completed_command called with no commands'
last_command = commands[-1]
@@ -229,8 +243,8 @@ def _get_completed_command(task, commands, step_type):
# catches race condition where execute_step is still
# processing so the command hasn't started yet
LOG.debug('Expected agent last command to be "execute_%(type)s_step" '
- 'for node %(node)s, instead got "%(command)s". Waiting '
- 'for next heartbeat.',
+ 'for node %(node)s, instead got "%(command)s". An out-of-'
+ 'band step may be running. Waiting for next heartbeat.',
{'node': task.node.uuid,
'command': last_command['command_name'],
'type': step_type})
@@ -246,11 +260,14 @@ def _get_completed_command(task, commands, step_type):
'type': step_type.capitalize()})
return
elif (last_command['command_status'] == 'SUCCEEDED'
- and last_step != current_step):
+ and (not last_step
+ or not conductor_steps.is_equivalent(last_step, current_step))):
# A previous step was running, the new command has not yet started.
- LOG.debug('%(type)s step not yet started for node %(node)s: %(step)s',
- {'step': last_step, 'node': task.node.uuid,
- 'type': step_type.capitalize()})
+ LOG.debug('%(type)s step %(step)s is not currently running for node '
+ '%(node)s. Not yet started or an out-of-band step is in '
+ 'progress. The last finished step is %(previous)s.',
+ {'step': current_step, 'node': task.node.uuid,
+ 'type': step_type.capitalize(), 'previous': last_step})
return
else:
return last_command
@@ -807,22 +824,13 @@ class AgentDeployMixin(HeartbeatMixin):
node = task.node
agent_commands = self._client.get_commands_status(task.node)
- if not agent_commands:
+ if _freshly_booted(agent_commands, step_type):
field = ('cleaning_reboot' if step_type == 'clean'
else 'deployment_reboot')
- if task.node.driver_internal_info.get(field):
- # Node finished a cleaning step that requested a reboot, and
- # this is the first heartbeat after booting. Continue cleaning.
- info = task.node.driver_internal_info
- info.pop(field, None)
- task.node.driver_internal_info = info
- task.node.save()
- manager_utils.notify_conductor_resume_operation(task,
- step_type)
- return
- else:
- # Agent has no commands whatsoever
- return
+ utils.pop_node_nested_field(
+ task.node, 'driver_internal_info', field)
+ manager_utils.notify_conductor_resume_operation(task, step_type)
+ return
current_step = (node.clean_step if step_type == 'clean'
else node.deploy_step)
diff --git a/ironic/tests/unit/drivers/modules/test_agent_base.py b/ironic/tests/unit/drivers/modules/test_agent_base.py
index 30a0cb9dd..c4f4f722c 100644
--- a/ironic/tests/unit/drivers/modules/test_agent_base.py
+++ b/ironic/tests/unit/drivers/modules/test_agent_base.py
@@ -1806,6 +1806,21 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest):
self.deploy.continue_cleaning(task)
self.assertFalse(notify_mock.called)
+ @mock.patch.object(manager_utils, 'notify_conductor_resume_operation',
+ autospec=True)
+ @mock.patch.object(agent_client.AgentClient, 'get_commands_status',
+ autospec=True)
+ def test_continue_cleaning_no_step_running(self, status_mock, notify_mock):
+ status_mock.return_value = [{
+ 'command_status': 'SUCCEEDED',
+ 'command_name': 'get_clean_steps',
+ 'command_result': []
+ }]
+ with task_manager.acquire(self.context, self.node['uuid'],
+ shared=False) as task:
+ self.deploy.continue_cleaning(task)
+ notify_mock.assert_called_once_with(task, 'clean')
+
@mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True)
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True)