summaryrefslogtreecommitdiff
path: root/ironic/drivers/modules/agent_power.py
diff options
context:
space:
mode:
authorDmitry Tantsur <dtantsur@protonmail.com>2020-06-15 15:30:39 +0200
committerDmitry Tantsur <dtantsur@protonmail.com>2020-07-20 09:42:05 +0200
commit46f8c857524e8a45935836ac97463928a2d9a0a9 (patch)
treec6a758f5e9e2405dfbe071b0d849cbf85bac9450 /ironic/drivers/modules/agent_power.py
parente804f6c56bbfc9c71dda9096710ad6288b5d618a (diff)
downloadironic-46f8c857524e8a45935836ac97463928a2d9a0a9.tar.gz
Add agent power interface
This change adds a new 'agent' power interface that can be used together with fast-track to deploy nodes without knowing their power credentials. It relies on the agent staying powered on during the whole pre-deployment and deployment process. Story: #2007771 Task: #39995 Change-Id: I3d7157c1c4464b650adebbd7f894ee33d0f8f25b
Diffstat (limited to 'ironic/drivers/modules/agent_power.py')
-rw-r--r--ironic/drivers/modules/agent_power.py220
1 files changed, 220 insertions, 0 deletions
diff --git a/ironic/drivers/modules/agent_power.py b/ironic/drivers/modules/agent_power.py
new file mode 100644
index 000000000..11ef5711a
--- /dev/null
+++ b/ironic/drivers/modules/agent_power.py
@@ -0,0 +1,220 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+"""
+The agent power interface.
+"""
+
+import time
+
+from oslo_config import cfg
+from oslo_log import log
+import retrying
+
+from ironic.common import exception
+from ironic.common.i18n import _
+from ironic.common import states
+from ironic.conductor import utils as cond_utils
+from ironic.drivers import base
+from ironic.drivers.modules import agent_client
+
+
+CONF = cfg.CONF
+
+LOG = log.getLogger(__name__)
+
+_POWER_WAIT = 30
+
+
+class AgentPower(base.PowerInterface):
+ """Power interface using the running agent for power actions."""
+
+ def __init__(self):
+ super(AgentPower, self).__init__()
+ if not CONF.deploy.fast_track:
+ raise exception.InvalidParameterValue(
+ _('[deploy]fast_track must be True to enable the agent '
+ 'power interface'))
+ self._client = agent_client.AgentClient()
+
+ def get_properties(self):
+ """Return the properties of the interface.
+
+ :returns: dictionary of <property name>:<property description> entries.
+ """
+ return {}
+
+ def validate(self, task):
+ """Validate the driver-specific Node deployment info.
+
+ :param task: A TaskManager instance containing the node to act on.
+ :raises: InvalidParameterValue on malformed parameter(s)
+ """
+ # NOTE(dtantsur): the fast_track option is mutable, so we have to check
+ # it again on validation.
+ if not CONF.deploy.fast_track:
+ raise exception.InvalidParameterValue(
+ _('[deploy]fast_track must be True to enable the agent '
+ 'power interface'))
+ # TODO(dtantsur): support ACTIVE nodes
+ if not cond_utils.agent_is_alive(task.node):
+ raise exception.InvalidParameterValue(
+ _('Agent seems offline for node %s, the agent power interface '
+ 'cannot be used') % task.node.uuid)
+
+ def supports_power_sync(self, task):
+ """Check if power sync is supported for the given node.
+
+ Not supported for the agent power since it is not possible to power
+ on/off nodes.
+
+ :param task: A TaskManager instance containing the node to act on
+ with a **shared** lock.
+ :returns: boolean, whether power sync is supported.
+ """
+ return False
+
+ def get_supported_power_states(self, task):
+ """Get a list of the supported power states.
+
+ Only contains REBOOT.
+
+ :param task: A TaskManager instance containing the node to act on.
+ :returns: A list with the supported power states defined
+ in :mod:`ironic.common.states`.
+ """
+ return [states.REBOOT, states.SOFT_REBOOT]
+
+ def get_power_state(self, task):
+ """Return the power state of the task's node.
+
+ Essentially, the only known state is POWER ON, everything else is
+ an error (or more precisely ``None``).
+
+ :param task: A TaskManager instance containing the node to act on.
+ :returns: A power state. One of :mod:`ironic.common.states`.
+ """
+ # TODO(dtantsur): support ACTIVE nodes
+ if cond_utils.agent_is_alive(task.node):
+ return states.POWER_ON
+ else:
+ LOG.error('Node %s is not fast-track-able, cannot determine '
+ 'its power state via the "agent" power interface',
+ task.node.uuid)
+ return None
+
+ def set_power_state(self, task, power_state, timeout=None):
+ """Set the power state of the task's node.
+
+ :param task: A TaskManager instance containing the node to act on.
+ :param power_state: Power state from :mod:`ironic.common.states`.
+ Only REBOOT and SOFT_REBOOT are supported and are synonymous.
+ :param timeout: timeout (in seconds) positive integer (> 0) for any
+ power state. ``None`` indicates to use default timeout.
+ :raises: PowerStateFailure on non-supported power state.
+ """
+ if power_state in (states.REBOOT, states.SOFT_REBOOT):
+ return self.reboot(task)
+ else:
+ LOG.error('Power state %(state)s is not implemented for node '
+ '%(node)s using the "agent" power interface',
+ {'node': task.node.uuid, 'state': power_state})
+ raise exception.PowerStateFailure(pstate=power_state)
+
+ def reboot(self, task, timeout=None):
+ """Perform a reboot of the task's node.
+
+ Only soft reboot is implemented.
+
+ :param task: A TaskManager instance containing the node to act on.
+ :param timeout: timeout (in seconds) positive integer (> 0) for any
+ power state. ``None`` indicates to use default timeout.
+ """
+ node = task.node
+
+ self._client.reboot(node)
+
+ info = node.driver_internal_info
+ # NOTE(dtantsur): wipe the agent token, otherwise the rebooted agent
+ # won't be able to heartbeat. This is mostly a precaution since the
+ # calling code in conductor is expected to handle it.
+ if not info.get('agent_secret_token_pregenerated'):
+ info.pop('agent_secret_token', None)
+ # NOTE(dtantsur): the URL may change on reboot, wipe it as well (but
+ # only after we call reboot).
+ info.pop('agent_url', None)
+ node.driver_internal_info = info
+ node.save()
+
+ LOG.debug('Requested reboot of node %(node)s via the agent, waiting '
+ '%(wait)d seconds for the node to power down',
+ {'node': task.node.uuid, 'wait': _POWER_WAIT})
+ time.sleep(_POWER_WAIT)
+
+ if (node.provision_state in (states.DEPLOYING, states.CLEANING)
+ and (node.driver_internal_info.get('deployment_reboot')
+ or node.driver_internal_info.get('cleaning_reboot'))):
+ # NOTE(dtantsur): we need to downgrade the lock otherwise
+ # heartbeats won't be processed. It should not have side effects
+ # for nodes in DEPLOYING/CLEANING.
+ task.downgrade_lock()
+
+ try:
+ self._wait_for_reboot(task, timeout)
+ finally:
+ # The caller probably expects a lock, so re-acquire it
+ task.upgrade_lock()
+
+ def _wait_for_reboot(self, task, timeout):
+ wait = CONF.agent.post_deploy_get_power_state_retry_interval
+ if not timeout:
+ timeout = CONF.agent.post_deploy_get_power_state_retries * wait
+
+ @retrying.retry(
+ stop_max_delay=timeout,
+ retry_on_result=lambda result: not result,
+ retry_on_exception=(
+ lambda e: isinstance(e, exception.AgentConnectionFailed)),
+ wait_fixed=wait * 1000
+ )
+ def _wait_until_rebooted(task):
+ try:
+ status = self._client.get_commands_status(
+ task.node, retry_connection=False, expect_errors=True)
+ except exception.AgentConnectionFailed:
+ LOG.debug('Still waiting for the agent to come back on the '
+ 'node %s', task.node.uuid)
+ raise
+
+ if any(cmd['command_name'] == agent_client.REBOOT_COMMAND
+ for cmd in status):
+ LOG.debug('Still waiting for the agent to power off on the '
+ 'node %s', task.node.uuid)
+ return False
+
+ return True
+
+ try:
+ _wait_until_rebooted(task)
+ except exception.AgentConnectionFailed as exc:
+ msg = _('Agent failed to come back on %(node)s with the "agent" '
+ 'power interface: %(exc)s') % {
+ 'node': task.node.uuid, 'exc': exc}
+ LOG.error(msg)
+ raise exception.PowerStateFailure(msg)
+ except Exception as exc:
+ LOG.error('Could not reboot node %(node)s with the "agent" power '
+ 'interface: %(exc)s',
+ {'node': task.node.uuid, 'exc': exc})
+ raise exception.PowerStateFailure(
+ _('Unexpected error when rebooting through the agent: %s')
+ % exc)