diff options
author | Lucas Alvares Gomes <lucasagomes@gmail.com> | 2016-03-14 12:20:58 +0000 |
---|---|---|
committer | Lucas Alvares Gomes <lucasagomes@gmail.com> | 2016-03-22 10:48:59 +0000 |
commit | 824ad1676bd8032fb4a4eb8ffc7625a376a64371 (patch) | |
tree | 08932c54407962d975e69f41a7c39697884d3321 | |
parent | 0ad5b13b5a5aeffcf12dd6c37afc99857b7012dd (diff) | |
download | ironic-824ad1676bd8032fb4a4eb8ffc7625a376a64371.tar.gz |
Agent: Out-of-band power off on deploy
The patch is adding a new property called "deploy_forces_oob_reboot"
for the Agent deployment methodology. If the property is specified in
the node's driver_info with the value of True, Ironic will call IPA to
flush the file system and then issue a out-of-band reboot at the end of
the deployment to the node to boot into the user's image.
Some hardware/firmware may have problems doing a power off in-band
(usually faulty hardware/bad firmware, see the bug this commit is fixing)
therefore we need Ironic to do it directly in the BMC.
This patch also closes one more consistency gap between the IPA deploy
model and the now deprecated bash ramdisk one which used the hard reboot
to finish the deployment by default.
Depends-On: I5cd1d1b821426e995dc584452494b93ab23917e0
Closes-Bug: #1512492
Change-Id: I5e389c5245826f86466aedfcb9730117e24b59db
-rw-r--r-- | doc/source/drivers/ipa.rst | 20 | ||||
-rw-r--r-- | ironic/drivers/modules/agent_base_vendor.py | 52 | ||||
-rw-r--r-- | ironic/drivers/modules/agent_client.py | 7 | ||||
-rw-r--r-- | ironic/drivers/modules/ilo/vendor.py | 3 | ||||
-rw-r--r-- | ironic/drivers/modules/iscsi_deploy.py | 3 | ||||
-rw-r--r-- | ironic/tests/unit/conductor/test_manager.py | 24 | ||||
-rw-r--r-- | ironic/tests/unit/drivers/modules/test_agent_base_vendor.py | 58 | ||||
-rw-r--r-- | ironic/tests/unit/drivers/modules/test_agent_client.py | 6 | ||||
-rw-r--r-- | ironic/tests/unit/drivers/modules/test_pxe.py | 2 | ||||
-rw-r--r-- | releasenotes/notes/oob-power-off-7bbdf5947ed24bf8.yaml | 7 |
10 files changed, 154 insertions, 28 deletions
diff --git a/doc/source/drivers/ipa.rst b/doc/source/drivers/ipa.rst index aa5ecf7a0..bebbafba5 100644 --- a/doc/source/drivers/ipa.rst +++ b/doc/source/drivers/ipa.rst @@ -104,3 +104,23 @@ Steps to enable proxies ``image_no_proxy`` to driver_info properties in each node that will use the proxy. Please refer to ``ironic driver-properties`` output of the ``agent_*`` driver you're using for descriptions of these properties. + +Advanced configuration +====================== + +Out-of-band Vs. in-band power off on deploy +------------------------------------------- + +After deploying an image onto the node's hard disk Ironic will reboot +the machine into the new image. By default this power action happens +``in-band``, meaning that the ironic-conductor will instruct the IPA +ramdisk to power itself off. + +Some hardware may have a problem with the default approach and +would require Ironic to talk directly to the management controller +to switch the power off and on again. In order to tell Ironic to do +that you have to update the node's ``driver_info`` field and set the +``deploy_forces_oob_reboot`` parameter with the value of **True**. For +example, the below command sets this configuration in a specific node:: + + ironic node-update <UUID or name> add driver_info/deploy_forces_oob_reboot=True diff --git a/ironic/drivers/modules/agent_base_vendor.py b/ironic/drivers/modules/agent_base_vendor.py index bf10d1a35..9df0cd392 100644 --- a/ironic/drivers/modules/agent_base_vendor.py +++ b/ironic/drivers/modules/agent_base_vendor.py @@ -22,6 +22,7 @@ import time from oslo_config import cfg from oslo_log import log from oslo_utils import excutils +from oslo_utils import strutils from oslo_utils import timeutils import retrying @@ -82,6 +83,13 @@ LOG = log.getLogger(__name__) # completing 'delete_configuration' of raid interface. POST_CLEAN_STEP_HOOKS = {} +VENDOR_PROPERTIES = { + 'deploy_forces_oob_reboot': _( + 'Whether Ironic should force a reboot of the Node via the out-of-band ' + 'channel after deployment is complete. Provides compatiblity with ' + 'older deploy ramdisks. Defaults to False. Optional.') +} + def _get_client(): client = agent_client.AgentClient() @@ -178,9 +186,7 @@ class BaseAgentVendor(base.VendorInterface): :returns: dictionary of <property name>:<property description> entries. """ - # NOTE(jroll) all properties are set by the driver, - # not by the operator. - return {} + return VENDOR_PROPERTIES def validate(self, task, method, **kwargs): """Validate the driver-specific Node deployment info. @@ -688,18 +694,38 @@ class BaseAgentVendor(base.VendorInterface): return task.driver.power.get_power_state(task) node = task.node + # Whether ironic should power off the node via out-of-band or + # in-band methods + oob_power_off = strutils.bool_from_string( + node.driver_info.get('deploy_forces_oob_reboot', False)) try: - try: - self._client.power_off(node) - _wait_until_powered_off(task) - except Exception as e: - LOG.warning( - _LW('Failed to soft power off node %(node_uuid)s ' - 'in at least %(timeout)d seconds. Error: %(error)s'), - {'node_uuid': node.uuid, - 'timeout': (wait * (attempts - 1)) / 1000, - 'error': e}) + if not oob_power_off: + try: + self._client.power_off(node) + _wait_until_powered_off(task) + except Exception as e: + LOG.warning( + _LW('Failed to soft power off node %(node_uuid)s ' + 'in at least %(timeout)d seconds. ' + 'Error: %(error)s'), + {'node_uuid': node.uuid, + 'timeout': (wait * (attempts - 1)) / 1000, + 'error': e}) + else: + # Flush the file system prior to hard rebooting the node + result = self._client.sync(node) + error = result.get('faultstring') + if error: + if 'Unknown command' in error: + error = _('The version of the IPA ramdisk used in ' + 'the deployment do not support the ' + 'command "sync"') + LOG.warning(_LW( + 'Failed to flush the file system prior to hard ' + 'rebooting the node %(node)s. Error: %(error)s'), + {'node': node.uuid, 'error': error}) + manager_utils.node_power_action(task, states.REBOOT) except Exception as e: msg = (_('Error rebooting node %(node)s after deploy. ' diff --git a/ironic/drivers/modules/agent_client.py b/ironic/drivers/modules/agent_client.py index 86c985e59..d27c8dbd1 100644 --- a/ironic/drivers/modules/agent_client.py +++ b/ironic/drivers/modules/agent_client.py @@ -174,3 +174,10 @@ class AgentClient(object): return self._command(node=node, method='standby.power_off', params={}) + + def sync(self, node): + """Flush file system buffers forcing changed blocks to disk.""" + return self._command(node=node, + method='standby.sync', + params={}, + wait=True) diff --git a/ironic/drivers/modules/ilo/vendor.py b/ironic/drivers/modules/ilo/vendor.py index d991a145d..981afa599 100644 --- a/ironic/drivers/modules/ilo/vendor.py +++ b/ironic/drivers/modules/ilo/vendor.py @@ -57,9 +57,6 @@ class IloVirtualMediaAgentVendorInterface(agent.AgentVendorInterface): class VendorPassthru(iscsi_deploy.VendorPassthru): """Vendor-specific interfaces for iLO deploy drivers.""" - def get_properties(self): - return {} - def validate(self, task, method, **kwargs): """Validate vendor-specific actions. diff --git a/ironic/drivers/modules/iscsi_deploy.py b/ironic/drivers/modules/iscsi_deploy.py index 0206b738d..bc111643e 100644 --- a/ironic/drivers/modules/iscsi_deploy.py +++ b/ironic/drivers/modules/iscsi_deploy.py @@ -701,9 +701,6 @@ class ISCSIDeploy(base.DeployInterface): class VendorPassthru(agent_base_vendor.BaseAgentVendor): """Interface to mix IPMI and PXE vendor-specific interfaces.""" - def get_properties(self): - return {} - def validate(self, task, method, **kwargs): """Validates the inputs for a vendor passthru. diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py index 48cf4c038..ffd1ec20d 100644 --- a/ironic/tests/unit/conductor/test_manager.py +++ b/ironic/tests/unit/conductor/test_manager.py @@ -3803,7 +3803,8 @@ class ManagerTestProperties(tests_db_base.DbTestCase): self._check_driver_properties("fake_ssh", expected) def test_driver_properties_fake_pxe(self): - expected = ['deploy_kernel', 'deploy_ramdisk'] + expected = ['deploy_kernel', 'deploy_ramdisk', + 'deploy_forces_oob_reboot'] self._check_driver_properties("fake_pxe", expected) def test_driver_properties_fake_seamicro(self): @@ -3824,34 +3825,37 @@ class ManagerTestProperties(tests_db_base.DbTestCase): 'ipmi_transit_address', 'ipmi_target_channel', 'ipmi_target_address', 'ipmi_local_address', 'deploy_kernel', 'deploy_ramdisk', 'ipmi_protocol_version', - 'ipmi_force_boot_device' - ] + 'ipmi_force_boot_device', 'deploy_forces_oob_reboot'] self._check_driver_properties("pxe_ipmitool", expected) def test_driver_properties_pxe_ipminative(self): expected = ['ipmi_address', 'ipmi_password', 'ipmi_username', 'deploy_kernel', 'deploy_ramdisk', - 'ipmi_terminal_port', 'ipmi_force_boot_device'] + 'ipmi_terminal_port', 'ipmi_force_boot_device', + 'deploy_forces_oob_reboot'] self._check_driver_properties("pxe_ipminative", expected) def test_driver_properties_pxe_ssh(self): expected = ['deploy_kernel', 'deploy_ramdisk', 'ssh_address', 'ssh_username', 'ssh_virt_type', 'ssh_key_contents', 'ssh_key_filename', - 'ssh_password', 'ssh_port', 'ssh_terminal_port'] + 'ssh_password', 'ssh_port', 'ssh_terminal_port', + 'deploy_forces_oob_reboot'] self._check_driver_properties("pxe_ssh", expected) def test_driver_properties_pxe_seamicro(self): expected = ['deploy_kernel', 'deploy_ramdisk', 'seamicro_api_endpoint', 'seamicro_password', 'seamicro_server_id', 'seamicro_username', - 'seamicro_api_version', 'seamicro_terminal_port'] + 'seamicro_api_version', 'seamicro_terminal_port', + 'deploy_forces_oob_reboot'] self._check_driver_properties("pxe_seamicro", expected) def test_driver_properties_pxe_snmp(self): expected = ['deploy_kernel', 'deploy_ramdisk', 'snmp_driver', 'snmp_address', 'snmp_port', 'snmp_version', - 'snmp_community', 'snmp_security', 'snmp_outlet'] + 'snmp_community', 'snmp_security', 'snmp_outlet', + 'deploy_forces_oob_reboot'] self._check_driver_properties("pxe_snmp", expected) def test_driver_properties_fake_ilo(self): @@ -3862,13 +3866,15 @@ class ManagerTestProperties(tests_db_base.DbTestCase): def test_driver_properties_ilo_iscsi(self): expected = ['ilo_address', 'ilo_username', 'ilo_password', 'client_port', 'client_timeout', 'ilo_deploy_iso', - 'console_port', 'ilo_change_password'] + 'console_port', 'ilo_change_password', + 'deploy_forces_oob_reboot'] self._check_driver_properties("iscsi_ilo", expected) def test_driver_properties_agent_ilo(self): expected = ['ilo_address', 'ilo_username', 'ilo_password', 'client_port', 'client_timeout', 'ilo_deploy_iso', - 'console_port', 'ilo_change_password'] + 'console_port', 'ilo_change_password', + 'deploy_forces_oob_reboot'] self._check_driver_properties("agent_ilo", expected) def test_driver_properties_fail(self): diff --git a/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py b/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py index b4c1ded17..29f4ee94b 100644 --- a/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py +++ b/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py @@ -684,6 +684,60 @@ class TestBaseAgentVendor(db_base.DbTestCase): self.assertEqual(states.DEPLOYFAIL, task.node.provision_state) self.assertEqual(states.ACTIVE, task.node.target_provision_state) + @mock.patch.object(manager_utils, 'node_power_action', autospec=True) + @mock.patch.object(agent_client.AgentClient, 'sync', + spec=types.FunctionType) + def test_reboot_and_finish_deploy_power_action_oob_power_off( + self, sync_mock, node_power_action_mock): + # Enable force power off + driver_info = self.node.driver_info + driver_info['deploy_forces_oob_reboot'] = True + self.node.driver_info = driver_info + + self.node.provision_state = states.DEPLOYING + self.node.target_provision_state = states.ACTIVE + self.node.save() + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + self.passthru.reboot_and_finish_deploy(task) + + sync_mock.assert_called_once_with(task.node) + node_power_action_mock.assert_called_once_with( + task, states.REBOOT) + self.assertEqual(states.ACTIVE, task.node.provision_state) + self.assertEqual(states.NOSTATE, task.node.target_provision_state) + + @mock.patch.object(agent_base_vendor.LOG, 'warning', autospec=True) + @mock.patch.object(manager_utils, 'node_power_action', autospec=True) + @mock.patch.object(agent_client.AgentClient, 'sync', + spec=types.FunctionType) + def test_reboot_and_finish_deploy_power_action_oob_power_off_failed( + self, sync_mock, node_power_action_mock, log_mock): + # Enable force power off + driver_info = self.node.driver_info + driver_info['deploy_forces_oob_reboot'] = True + self.node.driver_info = driver_info + + self.node.provision_state = states.DEPLOYING + self.node.target_provision_state = states.ACTIVE + self.node.save() + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + sync_mock.return_value = {'faultstring': 'Unknown command: blah'} + self.passthru.reboot_and_finish_deploy(task) + + sync_mock.assert_called_once_with(task.node) + node_power_action_mock.assert_called_once_with( + task, states.REBOOT) + self.assertEqual(states.ACTIVE, task.node.provision_state) + self.assertEqual(states.NOSTATE, task.node.target_provision_state) + log_error = ('The version of the IPA ramdisk used in the ' + 'deployment do not support the command "sync"') + log_mock.assert_called_once_with( + 'Failed to flush the file system prior to hard rebooting the ' + 'node %(node)s. Error: %(error)s', + {'node': task.node.uuid, 'error': log_error}) + @mock.patch.object(agent_client.AgentClient, 'install_bootloader', autospec=True) @mock.patch.object(deploy_utils, 'try_set_boot_device', autospec=True) @@ -1276,3 +1330,7 @@ class TestRefreshCleanSteps(TestBaseAgentVendor): task) client_mock.assert_called_once_with(mock.ANY, task.node, task.ports) + + def test_get_properties(self): + expected = agent_base_vendor.VENDOR_PROPERTIES + self.assertEqual(expected, self.passthru.get_properties()) diff --git a/ironic/tests/unit/drivers/modules/test_agent_client.py b/ironic/tests/unit/drivers/modules/test_agent_client.py index fe5be2dba..aead8915a 100644 --- a/ironic/tests/unit/drivers/modules/test_agent_client.py +++ b/ironic/tests/unit/drivers/modules/test_agent_client.py @@ -236,3 +236,9 @@ class TestAgentClient(base.TestCase): self.client.power_off(self.node) self.client._command.assert_called_once_with( node=self.node, method='standby.power_off', params={}) + + def test_sync(self): + self.client._command = mock.MagicMock(spec_set=[]) + self.client.sync(self.node) + self.client._command.assert_called_once_with( + node=self.node, method='standby.sync', params={}, wait=True) diff --git a/ironic/tests/unit/drivers/modules/test_pxe.py b/ironic/tests/unit/drivers/modules/test_pxe.py index 745626240..9e023ea18 100644 --- a/ironic/tests/unit/drivers/modules/test_pxe.py +++ b/ironic/tests/unit/drivers/modules/test_pxe.py @@ -32,6 +32,7 @@ from ironic.common.glance_service import base_image_service from ironic.common import pxe_utils from ironic.common import states from ironic.conductor import task_manager +from ironic.drivers.modules import agent_base_vendor from ironic.drivers.modules import deploy_utils from ironic.drivers.modules import pxe from ironic.tests.unit.conductor import mgr_utils @@ -549,6 +550,7 @@ class PXEBootTestCase(db_base.DbTestCase): def test_get_properties(self): expected = pxe.COMMON_PROPERTIES + expected.update(agent_base_vendor.VENDOR_PROPERTIES) with task_manager.acquire(self.context, self.node.uuid, shared=True) as task: self.assertEqual(expected, task.driver.get_properties()) diff --git a/releasenotes/notes/oob-power-off-7bbdf5947ed24bf8.yaml b/releasenotes/notes/oob-power-off-7bbdf5947ed24bf8.yaml new file mode 100644 index 000000000..ddbbf152a --- /dev/null +++ b/releasenotes/notes/oob-power-off-7bbdf5947ed24bf8.yaml @@ -0,0 +1,7 @@ +--- +fixes: + - Fixes a problem where some hardware/firmware (specially faulty ones) + won't come back online after an in-band ACPI soft power off by adding + a new driver property called "deploy_forces_oob_reboot" that can be set + to the nodes being deployed by the IPA ramdisk. If the value of this + property is True, Ironic will power cycle the node via out-of-band. |