summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLucas Alvares Gomes <lucasagomes@gmail.com>2016-03-14 12:20:58 +0000
committerLucas Alvares Gomes <lucasagomes@gmail.com>2016-03-22 10:48:59 +0000
commit824ad1676bd8032fb4a4eb8ffc7625a376a64371 (patch)
tree08932c54407962d975e69f41a7c39697884d3321
parent0ad5b13b5a5aeffcf12dd6c37afc99857b7012dd (diff)
downloadironic-824ad1676bd8032fb4a4eb8ffc7625a376a64371.tar.gz
Agent: Out-of-band power off on deploy
The patch is adding a new property called "deploy_forces_oob_reboot" for the Agent deployment methodology. If the property is specified in the node's driver_info with the value of True, Ironic will call IPA to flush the file system and then issue a out-of-band reboot at the end of the deployment to the node to boot into the user's image. Some hardware/firmware may have problems doing a power off in-band (usually faulty hardware/bad firmware, see the bug this commit is fixing) therefore we need Ironic to do it directly in the BMC. This patch also closes one more consistency gap between the IPA deploy model and the now deprecated bash ramdisk one which used the hard reboot to finish the deployment by default. Depends-On: I5cd1d1b821426e995dc584452494b93ab23917e0 Closes-Bug: #1512492 Change-Id: I5e389c5245826f86466aedfcb9730117e24b59db
-rw-r--r--doc/source/drivers/ipa.rst20
-rw-r--r--ironic/drivers/modules/agent_base_vendor.py52
-rw-r--r--ironic/drivers/modules/agent_client.py7
-rw-r--r--ironic/drivers/modules/ilo/vendor.py3
-rw-r--r--ironic/drivers/modules/iscsi_deploy.py3
-rw-r--r--ironic/tests/unit/conductor/test_manager.py24
-rw-r--r--ironic/tests/unit/drivers/modules/test_agent_base_vendor.py58
-rw-r--r--ironic/tests/unit/drivers/modules/test_agent_client.py6
-rw-r--r--ironic/tests/unit/drivers/modules/test_pxe.py2
-rw-r--r--releasenotes/notes/oob-power-off-7bbdf5947ed24bf8.yaml7
10 files changed, 154 insertions, 28 deletions
diff --git a/doc/source/drivers/ipa.rst b/doc/source/drivers/ipa.rst
index aa5ecf7a0..bebbafba5 100644
--- a/doc/source/drivers/ipa.rst
+++ b/doc/source/drivers/ipa.rst
@@ -104,3 +104,23 @@ Steps to enable proxies
``image_no_proxy`` to driver_info properties in each node that will use the
proxy. Please refer to ``ironic driver-properties`` output of the
``agent_*`` driver you're using for descriptions of these properties.
+
+Advanced configuration
+======================
+
+Out-of-band Vs. in-band power off on deploy
+-------------------------------------------
+
+After deploying an image onto the node's hard disk Ironic will reboot
+the machine into the new image. By default this power action happens
+``in-band``, meaning that the ironic-conductor will instruct the IPA
+ramdisk to power itself off.
+
+Some hardware may have a problem with the default approach and
+would require Ironic to talk directly to the management controller
+to switch the power off and on again. In order to tell Ironic to do
+that you have to update the node's ``driver_info`` field and set the
+``deploy_forces_oob_reboot`` parameter with the value of **True**. For
+example, the below command sets this configuration in a specific node::
+
+ ironic node-update <UUID or name> add driver_info/deploy_forces_oob_reboot=True
diff --git a/ironic/drivers/modules/agent_base_vendor.py b/ironic/drivers/modules/agent_base_vendor.py
index bf10d1a35..9df0cd392 100644
--- a/ironic/drivers/modules/agent_base_vendor.py
+++ b/ironic/drivers/modules/agent_base_vendor.py
@@ -22,6 +22,7 @@ import time
from oslo_config import cfg
from oslo_log import log
from oslo_utils import excutils
+from oslo_utils import strutils
from oslo_utils import timeutils
import retrying
@@ -82,6 +83,13 @@ LOG = log.getLogger(__name__)
# completing 'delete_configuration' of raid interface.
POST_CLEAN_STEP_HOOKS = {}
+VENDOR_PROPERTIES = {
+ 'deploy_forces_oob_reboot': _(
+ 'Whether Ironic should force a reboot of the Node via the out-of-band '
+ 'channel after deployment is complete. Provides compatiblity with '
+ 'older deploy ramdisks. Defaults to False. Optional.')
+}
+
def _get_client():
client = agent_client.AgentClient()
@@ -178,9 +186,7 @@ class BaseAgentVendor(base.VendorInterface):
:returns: dictionary of <property name>:<property description> entries.
"""
- # NOTE(jroll) all properties are set by the driver,
- # not by the operator.
- return {}
+ return VENDOR_PROPERTIES
def validate(self, task, method, **kwargs):
"""Validate the driver-specific Node deployment info.
@@ -688,18 +694,38 @@ class BaseAgentVendor(base.VendorInterface):
return task.driver.power.get_power_state(task)
node = task.node
+ # Whether ironic should power off the node via out-of-band or
+ # in-band methods
+ oob_power_off = strutils.bool_from_string(
+ node.driver_info.get('deploy_forces_oob_reboot', False))
try:
- try:
- self._client.power_off(node)
- _wait_until_powered_off(task)
- except Exception as e:
- LOG.warning(
- _LW('Failed to soft power off node %(node_uuid)s '
- 'in at least %(timeout)d seconds. Error: %(error)s'),
- {'node_uuid': node.uuid,
- 'timeout': (wait * (attempts - 1)) / 1000,
- 'error': e})
+ if not oob_power_off:
+ try:
+ self._client.power_off(node)
+ _wait_until_powered_off(task)
+ except Exception as e:
+ LOG.warning(
+ _LW('Failed to soft power off node %(node_uuid)s '
+ 'in at least %(timeout)d seconds. '
+ 'Error: %(error)s'),
+ {'node_uuid': node.uuid,
+ 'timeout': (wait * (attempts - 1)) / 1000,
+ 'error': e})
+ else:
+ # Flush the file system prior to hard rebooting the node
+ result = self._client.sync(node)
+ error = result.get('faultstring')
+ if error:
+ if 'Unknown command' in error:
+ error = _('The version of the IPA ramdisk used in '
+ 'the deployment do not support the '
+ 'command "sync"')
+ LOG.warning(_LW(
+ 'Failed to flush the file system prior to hard '
+ 'rebooting the node %(node)s. Error: %(error)s'),
+ {'node': node.uuid, 'error': error})
+
manager_utils.node_power_action(task, states.REBOOT)
except Exception as e:
msg = (_('Error rebooting node %(node)s after deploy. '
diff --git a/ironic/drivers/modules/agent_client.py b/ironic/drivers/modules/agent_client.py
index 86c985e59..d27c8dbd1 100644
--- a/ironic/drivers/modules/agent_client.py
+++ b/ironic/drivers/modules/agent_client.py
@@ -174,3 +174,10 @@ class AgentClient(object):
return self._command(node=node,
method='standby.power_off',
params={})
+
+ def sync(self, node):
+ """Flush file system buffers forcing changed blocks to disk."""
+ return self._command(node=node,
+ method='standby.sync',
+ params={},
+ wait=True)
diff --git a/ironic/drivers/modules/ilo/vendor.py b/ironic/drivers/modules/ilo/vendor.py
index d991a145d..981afa599 100644
--- a/ironic/drivers/modules/ilo/vendor.py
+++ b/ironic/drivers/modules/ilo/vendor.py
@@ -57,9 +57,6 @@ class IloVirtualMediaAgentVendorInterface(agent.AgentVendorInterface):
class VendorPassthru(iscsi_deploy.VendorPassthru):
"""Vendor-specific interfaces for iLO deploy drivers."""
- def get_properties(self):
- return {}
-
def validate(self, task, method, **kwargs):
"""Validate vendor-specific actions.
diff --git a/ironic/drivers/modules/iscsi_deploy.py b/ironic/drivers/modules/iscsi_deploy.py
index 0206b738d..bc111643e 100644
--- a/ironic/drivers/modules/iscsi_deploy.py
+++ b/ironic/drivers/modules/iscsi_deploy.py
@@ -701,9 +701,6 @@ class ISCSIDeploy(base.DeployInterface):
class VendorPassthru(agent_base_vendor.BaseAgentVendor):
"""Interface to mix IPMI and PXE vendor-specific interfaces."""
- def get_properties(self):
- return {}
-
def validate(self, task, method, **kwargs):
"""Validates the inputs for a vendor passthru.
diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py
index 48cf4c038..ffd1ec20d 100644
--- a/ironic/tests/unit/conductor/test_manager.py
+++ b/ironic/tests/unit/conductor/test_manager.py
@@ -3803,7 +3803,8 @@ class ManagerTestProperties(tests_db_base.DbTestCase):
self._check_driver_properties("fake_ssh", expected)
def test_driver_properties_fake_pxe(self):
- expected = ['deploy_kernel', 'deploy_ramdisk']
+ expected = ['deploy_kernel', 'deploy_ramdisk',
+ 'deploy_forces_oob_reboot']
self._check_driver_properties("fake_pxe", expected)
def test_driver_properties_fake_seamicro(self):
@@ -3824,34 +3825,37 @@ class ManagerTestProperties(tests_db_base.DbTestCase):
'ipmi_transit_address', 'ipmi_target_channel',
'ipmi_target_address', 'ipmi_local_address',
'deploy_kernel', 'deploy_ramdisk', 'ipmi_protocol_version',
- 'ipmi_force_boot_device'
- ]
+ 'ipmi_force_boot_device', 'deploy_forces_oob_reboot']
self._check_driver_properties("pxe_ipmitool", expected)
def test_driver_properties_pxe_ipminative(self):
expected = ['ipmi_address', 'ipmi_password', 'ipmi_username',
'deploy_kernel', 'deploy_ramdisk',
- 'ipmi_terminal_port', 'ipmi_force_boot_device']
+ 'ipmi_terminal_port', 'ipmi_force_boot_device',
+ 'deploy_forces_oob_reboot']
self._check_driver_properties("pxe_ipminative", expected)
def test_driver_properties_pxe_ssh(self):
expected = ['deploy_kernel', 'deploy_ramdisk',
'ssh_address', 'ssh_username', 'ssh_virt_type',
'ssh_key_contents', 'ssh_key_filename',
- 'ssh_password', 'ssh_port', 'ssh_terminal_port']
+ 'ssh_password', 'ssh_port', 'ssh_terminal_port',
+ 'deploy_forces_oob_reboot']
self._check_driver_properties("pxe_ssh", expected)
def test_driver_properties_pxe_seamicro(self):
expected = ['deploy_kernel', 'deploy_ramdisk',
'seamicro_api_endpoint', 'seamicro_password',
'seamicro_server_id', 'seamicro_username',
- 'seamicro_api_version', 'seamicro_terminal_port']
+ 'seamicro_api_version', 'seamicro_terminal_port',
+ 'deploy_forces_oob_reboot']
self._check_driver_properties("pxe_seamicro", expected)
def test_driver_properties_pxe_snmp(self):
expected = ['deploy_kernel', 'deploy_ramdisk',
'snmp_driver', 'snmp_address', 'snmp_port', 'snmp_version',
- 'snmp_community', 'snmp_security', 'snmp_outlet']
+ 'snmp_community', 'snmp_security', 'snmp_outlet',
+ 'deploy_forces_oob_reboot']
self._check_driver_properties("pxe_snmp", expected)
def test_driver_properties_fake_ilo(self):
@@ -3862,13 +3866,15 @@ class ManagerTestProperties(tests_db_base.DbTestCase):
def test_driver_properties_ilo_iscsi(self):
expected = ['ilo_address', 'ilo_username', 'ilo_password',
'client_port', 'client_timeout', 'ilo_deploy_iso',
- 'console_port', 'ilo_change_password']
+ 'console_port', 'ilo_change_password',
+ 'deploy_forces_oob_reboot']
self._check_driver_properties("iscsi_ilo", expected)
def test_driver_properties_agent_ilo(self):
expected = ['ilo_address', 'ilo_username', 'ilo_password',
'client_port', 'client_timeout', 'ilo_deploy_iso',
- 'console_port', 'ilo_change_password']
+ 'console_port', 'ilo_change_password',
+ 'deploy_forces_oob_reboot']
self._check_driver_properties("agent_ilo", expected)
def test_driver_properties_fail(self):
diff --git a/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py b/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py
index b4c1ded17..29f4ee94b 100644
--- a/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py
+++ b/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py
@@ -684,6 +684,60 @@ class TestBaseAgentVendor(db_base.DbTestCase):
self.assertEqual(states.DEPLOYFAIL, task.node.provision_state)
self.assertEqual(states.ACTIVE, task.node.target_provision_state)
+ @mock.patch.object(manager_utils, 'node_power_action', autospec=True)
+ @mock.patch.object(agent_client.AgentClient, 'sync',
+ spec=types.FunctionType)
+ def test_reboot_and_finish_deploy_power_action_oob_power_off(
+ self, sync_mock, node_power_action_mock):
+ # Enable force power off
+ driver_info = self.node.driver_info
+ driver_info['deploy_forces_oob_reboot'] = True
+ self.node.driver_info = driver_info
+
+ self.node.provision_state = states.DEPLOYING
+ self.node.target_provision_state = states.ACTIVE
+ self.node.save()
+ with task_manager.acquire(self.context, self.node.uuid,
+ shared=True) as task:
+ self.passthru.reboot_and_finish_deploy(task)
+
+ sync_mock.assert_called_once_with(task.node)
+ node_power_action_mock.assert_called_once_with(
+ task, states.REBOOT)
+ self.assertEqual(states.ACTIVE, task.node.provision_state)
+ self.assertEqual(states.NOSTATE, task.node.target_provision_state)
+
+ @mock.patch.object(agent_base_vendor.LOG, 'warning', autospec=True)
+ @mock.patch.object(manager_utils, 'node_power_action', autospec=True)
+ @mock.patch.object(agent_client.AgentClient, 'sync',
+ spec=types.FunctionType)
+ def test_reboot_and_finish_deploy_power_action_oob_power_off_failed(
+ self, sync_mock, node_power_action_mock, log_mock):
+ # Enable force power off
+ driver_info = self.node.driver_info
+ driver_info['deploy_forces_oob_reboot'] = True
+ self.node.driver_info = driver_info
+
+ self.node.provision_state = states.DEPLOYING
+ self.node.target_provision_state = states.ACTIVE
+ self.node.save()
+ with task_manager.acquire(self.context, self.node.uuid,
+ shared=True) as task:
+ sync_mock.return_value = {'faultstring': 'Unknown command: blah'}
+ self.passthru.reboot_and_finish_deploy(task)
+
+ sync_mock.assert_called_once_with(task.node)
+ node_power_action_mock.assert_called_once_with(
+ task, states.REBOOT)
+ self.assertEqual(states.ACTIVE, task.node.provision_state)
+ self.assertEqual(states.NOSTATE, task.node.target_provision_state)
+ log_error = ('The version of the IPA ramdisk used in the '
+ 'deployment do not support the command "sync"')
+ log_mock.assert_called_once_with(
+ 'Failed to flush the file system prior to hard rebooting the '
+ 'node %(node)s. Error: %(error)s',
+ {'node': task.node.uuid, 'error': log_error})
+
@mock.patch.object(agent_client.AgentClient, 'install_bootloader',
autospec=True)
@mock.patch.object(deploy_utils, 'try_set_boot_device', autospec=True)
@@ -1276,3 +1330,7 @@ class TestRefreshCleanSteps(TestBaseAgentVendor):
task)
client_mock.assert_called_once_with(mock.ANY, task.node,
task.ports)
+
+ def test_get_properties(self):
+ expected = agent_base_vendor.VENDOR_PROPERTIES
+ self.assertEqual(expected, self.passthru.get_properties())
diff --git a/ironic/tests/unit/drivers/modules/test_agent_client.py b/ironic/tests/unit/drivers/modules/test_agent_client.py
index fe5be2dba..aead8915a 100644
--- a/ironic/tests/unit/drivers/modules/test_agent_client.py
+++ b/ironic/tests/unit/drivers/modules/test_agent_client.py
@@ -236,3 +236,9 @@ class TestAgentClient(base.TestCase):
self.client.power_off(self.node)
self.client._command.assert_called_once_with(
node=self.node, method='standby.power_off', params={})
+
+ def test_sync(self):
+ self.client._command = mock.MagicMock(spec_set=[])
+ self.client.sync(self.node)
+ self.client._command.assert_called_once_with(
+ node=self.node, method='standby.sync', params={}, wait=True)
diff --git a/ironic/tests/unit/drivers/modules/test_pxe.py b/ironic/tests/unit/drivers/modules/test_pxe.py
index 745626240..9e023ea18 100644
--- a/ironic/tests/unit/drivers/modules/test_pxe.py
+++ b/ironic/tests/unit/drivers/modules/test_pxe.py
@@ -32,6 +32,7 @@ from ironic.common.glance_service import base_image_service
from ironic.common import pxe_utils
from ironic.common import states
from ironic.conductor import task_manager
+from ironic.drivers.modules import agent_base_vendor
from ironic.drivers.modules import deploy_utils
from ironic.drivers.modules import pxe
from ironic.tests.unit.conductor import mgr_utils
@@ -549,6 +550,7 @@ class PXEBootTestCase(db_base.DbTestCase):
def test_get_properties(self):
expected = pxe.COMMON_PROPERTIES
+ expected.update(agent_base_vendor.VENDOR_PROPERTIES)
with task_manager.acquire(self.context, self.node.uuid,
shared=True) as task:
self.assertEqual(expected, task.driver.get_properties())
diff --git a/releasenotes/notes/oob-power-off-7bbdf5947ed24bf8.yaml b/releasenotes/notes/oob-power-off-7bbdf5947ed24bf8.yaml
new file mode 100644
index 000000000..ddbbf152a
--- /dev/null
+++ b/releasenotes/notes/oob-power-off-7bbdf5947ed24bf8.yaml
@@ -0,0 +1,7 @@
+---
+fixes:
+ - Fixes a problem where some hardware/firmware (specially faulty ones)
+ won't come back online after an in-band ACPI soft power off by adding
+ a new driver property called "deploy_forces_oob_reboot" that can be set
+ to the nodes being deployed by the IPA ramdisk. If the value of this
+ property is True, Ironic will power cycle the node via out-of-band.