summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDevananda van der Veen <devananda.vdv@gmail.com>2014-09-25 16:25:57 -0700
committerLucas Alvares Gomes <lucasagomes@gmail.com>2014-09-29 10:38:01 +0100
commitf33a26fd81b1bfd57f6b3c437f10c83e5059f878 (patch)
tree397a84689b3e9d22157b2d8a5c9d6114d9abcb29
parent94dc44882567ecbd6fbf88668f8b29168bdbfce2 (diff)
downloadironic-f33a26fd81b1bfd57f6b3c437f10c83e5059f878.tar.gz
Conductor changes target_power_state before starting work
A race was discovered between when Nova requests a power state change and when that request is visible in Ironic's API, which could lead to Nova believing the request was completely satisfied before it actually started. A partial fix has been proposed to Nova: https://review.openstack.org/124162 This also requires that Ironic change the node's target_power_state from within ConductorManager *prior* to spawning the background thread which performs the actual work. Looking into this exposed another potential race in Ironic whereby a power state change request could be responded with 202-Accepted, but the background thread might fail to be started, and no error would be reported. The new behavior is to set node.last_error in the same way that this is handled for provision state changes that fail in the same way. Change-Id: I4d85c9230bcd9b6e36ffa9a326fda9908fee7e76 Closes-bug: #1310135
-rw-r--r--ironic/conductor/manager.py38
-rw-r--r--ironic/conductor/utils.py25
-rw-r--r--ironic/tests/conductor/test_manager.py2
3 files changed, 49 insertions, 16 deletions
diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py
index 2038eb909..9bc50f736 100644
--- a/ironic/conductor/manager.py
+++ b/ironic/conductor/manager.py
@@ -261,19 +261,40 @@ class ConductorManager(periodic_task.PeriodicTasks):
# instance_uuid needs to be unset, and handle it.
if 'instance_uuid' in delta:
task.driver.power.validate(task)
- node_obj['power_state'] = \
+ node_obj.power_state = \
task.driver.power.get_power_state(task)
- if node_obj['power_state'] != states.POWER_OFF:
+ if node_obj.power_state != states.POWER_OFF:
raise exception.NodeInWrongPowerState(
node=node_id,
- pstate=node_obj['power_state'])
+ pstate=node_obj.power_state)
# update any remaining parameters, then save
node_obj.save()
return node_obj
+ def _power_state_error_handler(self, e, node, power_state):
+ """Set the node's power states if error occurs.
+
+ This hook gets called upon an execption being raised when spawning
+ the worker thread to change the power state of a node.
+
+ :param e: the exception object that was raised.
+ :param node: an Ironic node object.
+ :param power_state: the power state to set on the node.
+
+ """
+ if isinstance(e, exception.NoFreeConductorWorker):
+ node.power_state = power_state
+ node.target_power_state = states.NOSTATE
+ node.last_error = (_("No free conductor workers available"))
+ node.save()
+ LOG.warning(_LW("No free conductor workers available to perform "
+ "an action on node %(node)s, setting node's "
+ "power state back to %(power_state)s.",
+ {'node': node.uuid, 'power_state': power_state}))
+
@messaging.expected_exceptions(exception.InvalidParameterValue,
exception.MissingParameterValue,
exception.NoFreeConductorWorker,
@@ -300,6 +321,17 @@ class ConductorManager(periodic_task.PeriodicTasks):
with task_manager.acquire(context, node_id, shared=False) as task:
task.driver.power.validate(task)
+ # Set the target_power_state and clear any last_error, since we're
+ # starting a new operation. This will expose to other processes
+ # and clients that work is in progress.
+ if new_state == states.REBOOT:
+ task.node.target_power_state = states.POWER_ON
+ else:
+ task.node.target_power_state = new_state
+ task.node.last_error = None
+ task.node.save()
+ task.set_spawn_error_hook(self._power_state_error_handler,
+ task.node, task.node.power_state)
task.spawn_after(self._spawn_worker, utils.node_power_action,
task, new_state)
diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py
index c66a56292..aa8b3d312 100644
--- a/ironic/conductor/utils.py
+++ b/ironic/conductor/utils.py
@@ -45,13 +45,13 @@ def node_set_boot_device(task, device, persistent=False):
@task_manager.require_exclusive_lock
-def node_power_action(task, state):
+def node_power_action(task, new_state):
"""Change power state or reset for a node.
Perform the requested power action if the transition is required.
:param task: a TaskManager instance containing the node to act on.
- :param state: Any power state from ironic.common.states. If the
+ :param new_state: Any power state from ironic.common.states. If the
state is 'REBOOT' then a reboot will be attempted, otherwise
the node power state is directly set to 'state'.
:raises: InvalidParameterValue when the wrong state is specified
@@ -61,9 +61,9 @@ def node_power_action(task, state):
"""
node = task.node
- new_state = states.POWER_ON if state == states.REBOOT else state
+ target_state = states.POWER_ON if new_state == states.REBOOT else new_state
- if state != states.REBOOT:
+ if new_state != states.REBOOT:
try:
curr_state = task.driver.power.get_power_state(task)
except Exception as e:
@@ -96,16 +96,17 @@ def node_power_action(task, state):
LOG.warn(_LW("Driver returns ERROR power state for node %s."),
node.uuid)
- # Set the target_power_state and clear any last_error, since we're
+ # Set the target_power_state and clear any last_error, if we're
# starting a new operation. This will expose to other processes
# and clients that work is in progress.
- node['target_power_state'] = new_state
- node['last_error'] = None
- node.save()
+ if node['target_power_state'] != target_state:
+ node['target_power_state'] = target_state
+ node['last_error'] = None
+ node.save()
# take power action
try:
- if state != states.REBOOT:
+ if new_state != states.REBOOT:
task.driver.power.set_power_state(task, new_state)
else:
task.driver.power.reboot(task)
@@ -114,13 +115,13 @@ def node_power_action(task, state):
node['last_error'] = \
_("Failed to change power state to '%(target)s'. "
"Error: %(error)s") % {
- 'target': new_state, 'error': e}
+ 'target': target_state, 'error': e}
else:
# success!
- node['power_state'] = new_state
+ node['power_state'] = target_state
LOG.info(_LI('Succesfully set node %(node)s power state to '
'%(state)s.'),
- {'node': node.uuid, 'state': new_state})
+ {'node': node.uuid, 'state': target_state})
finally:
node['target_power_state'] = states.NOSTATE
node.save()
diff --git a/ironic/tests/conductor/test_manager.py b/ironic/tests/conductor/test_manager.py
index d953c9e43..f25954318 100644
--- a/ironic/tests/conductor/test_manager.py
+++ b/ironic/tests/conductor/test_manager.py
@@ -315,7 +315,7 @@ class ChangeNodePowerStateTestCase(_ServiceSetUpMixin,
node.refresh()
self.assertEqual(initial_state, node.power_state)
self.assertIsNone(node.target_power_state)
- self.assertIsNone(node.last_error)
+ self.assertIsNotNone(node.last_error)
# Verify the picked reservation has been cleared due to full pool.
self.assertIsNone(node.reservation)