diff options
author | Devananda van der Veen <devananda.vdv@gmail.com> | 2014-09-25 16:25:57 -0700 |
---|---|---|
committer | Lucas Alvares Gomes <lucasagomes@gmail.com> | 2014-09-29 10:38:01 +0100 |
commit | f33a26fd81b1bfd57f6b3c437f10c83e5059f878 (patch) | |
tree | 397a84689b3e9d22157b2d8a5c9d6114d9abcb29 | |
parent | 94dc44882567ecbd6fbf88668f8b29168bdbfce2 (diff) | |
download | ironic-f33a26fd81b1bfd57f6b3c437f10c83e5059f878.tar.gz |
Conductor changes target_power_state before starting work
A race was discovered between when Nova requests a power state change
and when that request is visible in Ironic's API, which could lead to
Nova believing the request was completely satisfied before it
actually started.
A partial fix has been proposed to Nova:
https://review.openstack.org/124162
This also requires that Ironic change the node's target_power_state from
within ConductorManager *prior* to spawning the background thread which
performs the actual work.
Looking into this exposed another potential race in Ironic whereby
a power state change request could be responded with 202-Accepted,
but the background thread might fail to be started, and no error would
be reported. The new behavior is to set node.last_error in the same
way that this is handled for provision state changes that fail in the
same way.
Change-Id: I4d85c9230bcd9b6e36ffa9a326fda9908fee7e76
Closes-bug: #1310135
-rw-r--r-- | ironic/conductor/manager.py | 38 | ||||
-rw-r--r-- | ironic/conductor/utils.py | 25 | ||||
-rw-r--r-- | ironic/tests/conductor/test_manager.py | 2 |
3 files changed, 49 insertions, 16 deletions
diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index 2038eb909..9bc50f736 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -261,19 +261,40 @@ class ConductorManager(periodic_task.PeriodicTasks): # instance_uuid needs to be unset, and handle it. if 'instance_uuid' in delta: task.driver.power.validate(task) - node_obj['power_state'] = \ + node_obj.power_state = \ task.driver.power.get_power_state(task) - if node_obj['power_state'] != states.POWER_OFF: + if node_obj.power_state != states.POWER_OFF: raise exception.NodeInWrongPowerState( node=node_id, - pstate=node_obj['power_state']) + pstate=node_obj.power_state) # update any remaining parameters, then save node_obj.save() return node_obj + def _power_state_error_handler(self, e, node, power_state): + """Set the node's power states if error occurs. + + This hook gets called upon an execption being raised when spawning + the worker thread to change the power state of a node. + + :param e: the exception object that was raised. + :param node: an Ironic node object. + :param power_state: the power state to set on the node. + + """ + if isinstance(e, exception.NoFreeConductorWorker): + node.power_state = power_state + node.target_power_state = states.NOSTATE + node.last_error = (_("No free conductor workers available")) + node.save() + LOG.warning(_LW("No free conductor workers available to perform " + "an action on node %(node)s, setting node's " + "power state back to %(power_state)s.", + {'node': node.uuid, 'power_state': power_state})) + @messaging.expected_exceptions(exception.InvalidParameterValue, exception.MissingParameterValue, exception.NoFreeConductorWorker, @@ -300,6 +321,17 @@ class ConductorManager(periodic_task.PeriodicTasks): with task_manager.acquire(context, node_id, shared=False) as task: task.driver.power.validate(task) + # Set the target_power_state and clear any last_error, since we're + # starting a new operation. This will expose to other processes + # and clients that work is in progress. + if new_state == states.REBOOT: + task.node.target_power_state = states.POWER_ON + else: + task.node.target_power_state = new_state + task.node.last_error = None + task.node.save() + task.set_spawn_error_hook(self._power_state_error_handler, + task.node, task.node.power_state) task.spawn_after(self._spawn_worker, utils.node_power_action, task, new_state) diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py index c66a56292..aa8b3d312 100644 --- a/ironic/conductor/utils.py +++ b/ironic/conductor/utils.py @@ -45,13 +45,13 @@ def node_set_boot_device(task, device, persistent=False): @task_manager.require_exclusive_lock -def node_power_action(task, state): +def node_power_action(task, new_state): """Change power state or reset for a node. Perform the requested power action if the transition is required. :param task: a TaskManager instance containing the node to act on. - :param state: Any power state from ironic.common.states. If the + :param new_state: Any power state from ironic.common.states. If the state is 'REBOOT' then a reboot will be attempted, otherwise the node power state is directly set to 'state'. :raises: InvalidParameterValue when the wrong state is specified @@ -61,9 +61,9 @@ def node_power_action(task, state): """ node = task.node - new_state = states.POWER_ON if state == states.REBOOT else state + target_state = states.POWER_ON if new_state == states.REBOOT else new_state - if state != states.REBOOT: + if new_state != states.REBOOT: try: curr_state = task.driver.power.get_power_state(task) except Exception as e: @@ -96,16 +96,17 @@ def node_power_action(task, state): LOG.warn(_LW("Driver returns ERROR power state for node %s."), node.uuid) - # Set the target_power_state and clear any last_error, since we're + # Set the target_power_state and clear any last_error, if we're # starting a new operation. This will expose to other processes # and clients that work is in progress. - node['target_power_state'] = new_state - node['last_error'] = None - node.save() + if node['target_power_state'] != target_state: + node['target_power_state'] = target_state + node['last_error'] = None + node.save() # take power action try: - if state != states.REBOOT: + if new_state != states.REBOOT: task.driver.power.set_power_state(task, new_state) else: task.driver.power.reboot(task) @@ -114,13 +115,13 @@ def node_power_action(task, state): node['last_error'] = \ _("Failed to change power state to '%(target)s'. " "Error: %(error)s") % { - 'target': new_state, 'error': e} + 'target': target_state, 'error': e} else: # success! - node['power_state'] = new_state + node['power_state'] = target_state LOG.info(_LI('Succesfully set node %(node)s power state to ' '%(state)s.'), - {'node': node.uuid, 'state': new_state}) + {'node': node.uuid, 'state': target_state}) finally: node['target_power_state'] = states.NOSTATE node.save() diff --git a/ironic/tests/conductor/test_manager.py b/ironic/tests/conductor/test_manager.py index d953c9e43..f25954318 100644 --- a/ironic/tests/conductor/test_manager.py +++ b/ironic/tests/conductor/test_manager.py @@ -315,7 +315,7 @@ class ChangeNodePowerStateTestCase(_ServiceSetUpMixin, node.refresh() self.assertEqual(initial_state, node.power_state) self.assertIsNone(node.target_power_state) - self.assertIsNone(node.last_error) + self.assertIsNotNone(node.last_error) # Verify the picked reservation has been cleared due to full pool. self.assertIsNone(node.reservation) |