diff options
author | Ramakrishnan G <rameshg87@gmail.com> | 2015-05-26 14:32:02 +0100 |
---|---|---|
committer | Ramakrishnan G <rameshg87@gmail.com> | 2015-06-12 07:58:34 +0000 |
commit | 7adbf0622c8c2c3c52cdb24b235fca4b3afef8d3 (patch) | |
tree | 22430814d79ccc50181effe8b057048034ed9f9d /ironic | |
parent | 309707bab00a8a30d473815687515ac906ffbeb2 (diff) | |
download | ironic-7adbf0622c8c2c3c52cdb24b235fca4b3afef8d3.tar.gz |
IPA: Do a soft power off at the end of deployment
For IPA we currently do a hard reboot at the end of the deployment,
but that doesn't always play nice with some systems specially when they
have to deal with UEFI. So this patch is changing the code in Ironic to
actually tell the ramdisk to power off itself instead of hard powering
off the machine from BMC. This will give the kernel the necessary
time to finish sync'ing up whatever it has to sync before rebooting
the node.
Depends-On: I2a5f984af26bbbe03002bb8c367c8c6af8d91434
Co-Authored-By: Lucas Alvares Gomes <lucasagomes@gmail.com>
Closes-Bug: #1451310
Change-Id: I831d8dc1d15c82a0caab94173c2b9f147017500f
Diffstat (limited to 'ironic')
-rw-r--r-- | ironic/drivers/modules/agent_base_vendor.py | 42 | ||||
-rw-r--r-- | ironic/drivers/modules/agent_client.py | 6 | ||||
-rw-r--r-- | ironic/tests/drivers/test_agent.py | 19 | ||||
-rw-r--r-- | ironic/tests/drivers/test_agent_base_vendor.py | 113 | ||||
-rw-r--r-- | ironic/tests/drivers/test_agent_client.py | 6 |
5 files changed, 168 insertions, 18 deletions
diff --git a/ironic/drivers/modules/agent_base_vendor.py b/ironic/drivers/modules/agent_base_vendor.py index fd2869691..fc7d05bd2 100644 --- a/ironic/drivers/modules/agent_base_vendor.py +++ b/ironic/drivers/modules/agent_base_vendor.py @@ -22,6 +22,7 @@ import time from oslo_config import cfg from oslo_log import log from oslo_utils import excutils +import retrying from ironic.common import boot_devices from ironic.common import exception @@ -43,6 +44,15 @@ agent_opts = [ cfg.IntOpt('heartbeat_timeout', default=300, help='Maximum interval (in seconds) for agent heartbeats.'), + cfg.IntOpt('post_deploy_get_power_state_retries', + default=6, + help='Number of times to retry getting power state to check ' + 'if bare metal node has been powered off after a soft ' + 'power off.'), + cfg.IntOpt('post_deploy_get_power_state_retry_interval', + default=5, + help='Amount of time (in seconds) to wait between polling ' + 'power state after trigger soft poweroff.'), ] CONF = cfg.CONF @@ -438,11 +448,37 @@ class BaseAgentVendor(base.VendorInterface): :param task: a TaskManager object containing the node :raises: InstanceDeployFailure, if node reboot failed. """ + wait = CONF.agent.post_deploy_get_power_state_retry_interval * 1000 + attempts = CONF.agent.post_deploy_get_power_state_retries + 1 + + @retrying.retry( + stop_max_attempt_number=attempts, + retry_on_result=lambda state: state != states.POWER_OFF, + wait_fixed=wait + ) + def _wait_until_powered_off(task): + return task.driver.power.get_power_state(task) + + node = task.node + try: - manager_utils.node_power_action(task, states.REBOOT) + try: + self._client.power_off(node) + _wait_until_powered_off(task) + except Exception as e: + LOG.warning( + _LW('Failed to soft power off node %(node_uuid)s ' + 'in at least %(timeout)d seconds. Error: %(error)s'), + {'node_uuid': node.uuid, + 'timeout': wait * (attempts - 1), + 'error': e}) + manager_utils.node_power_action(task, states.REBOOT) + else: + manager_utils.node_power_action(task, states.POWER_ON) except Exception as e: - msg = (_('Error rebooting node %(node)s. Error: %(error)s') % - {'node': task.node.uuid, 'error': e}) + msg = (_('Error rebooting node %(node)s after deploy. ' + 'Error: %(error)s') % + {'node': node.uuid, 'error': e}) self._log_and_raise_deployment_error(task, msg) task.process_event('done') diff --git a/ironic/drivers/modules/agent_client.py b/ironic/drivers/modules/agent_client.py index ba19a27be..9f550e364 100644 --- a/ironic/drivers/modules/agent_client.py +++ b/ironic/drivers/modules/agent_client.py @@ -142,3 +142,9 @@ class AgentClient(object): method='clean.execute_clean_step', params=params, wait=False) + + def power_off(self, node): + """Soft powers off the bare metal node by shutting down ramdisk OS.""" + return self._command(node=node, + method='standby.power_off', + params={}) diff --git a/ironic/tests/drivers/test_agent.py b/ironic/tests/drivers/test_agent.py index 694aa9004..8d9c9b131 100644 --- a/ironic/tests/drivers/test_agent.py +++ b/ironic/tests/drivers/test_agent.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import types + import mock from oslo_config import cfg @@ -22,8 +24,10 @@ from ironic.common import keystone from ironic.common import pxe_utils from ironic.common import states from ironic.conductor import task_manager +from ironic.conductor import utils as manager_utils from ironic.drivers.modules import agent from ironic.drivers.modules import agent_client +from ironic.drivers.modules import fake from ironic.tests.conductor import utils as mgr_utils from ironic.tests.db import base as db_base from ironic.tests.db import utils as db_utils @@ -421,12 +425,17 @@ class TestAgentVendor(db_base.DbTestCase): self.assertEqual(states.ACTIVE, task.node.target_provision_state) - @mock.patch('ironic.conductor.utils.node_power_action', autospec=True) + @mock.patch.object(manager_utils, 'node_power_action', autospec=True) + @mock.patch.object(fake.FakePower, 'get_power_state', + spec=types.FunctionType) + @mock.patch.object(agent_client.AgentClient, 'power_off', + spec=types.FunctionType) @mock.patch('ironic.conductor.utils.node_set_boot_device', autospec=True) @mock.patch('ironic.drivers.modules.agent.AgentVendorInterface' '.check_deploy_success', autospec=True) def test_reboot_to_instance(self, check_deploy_mock, bootdev_mock, - power_mock): + power_off_mock, get_power_state_mock, + node_power_action_mock): check_deploy_mock.return_value = None self.node.provision_state = states.DEPLOYING @@ -435,11 +444,15 @@ class TestAgentVendor(db_base.DbTestCase): with task_manager.acquire(self.context, self.node.uuid, shared=False) as task: + get_power_state_mock.return_value = states.POWER_OFF self.passthru.reboot_to_instance(task) check_deploy_mock.assert_called_once_with(mock.ANY, task.node) bootdev_mock.assert_called_once_with(task, 'disk', persistent=True) - power_mock.assert_called_once_with(task, states.REBOOT) + power_off_mock.assert_called_once_with(task.node) + get_power_state_mock.assert_called_once_with(task) + node_power_action_mock.assert_called_once_with( + task, states.POWER_ON) self.assertEqual(states.ACTIVE, task.node.provision_state) self.assertEqual(states.NOSTATE, task.node.target_provision_state) diff --git a/ironic/tests/drivers/test_agent_base_vendor.py b/ironic/tests/drivers/test_agent_base_vendor.py index 410d55fa4..b686f41b8 100644 --- a/ironic/tests/drivers/test_agent_base_vendor.py +++ b/ironic/tests/drivers/test_agent_base_vendor.py @@ -15,6 +15,7 @@ # License for the specific language governing permissions and limitations # under the License. +import time import types import mock @@ -27,6 +28,7 @@ from ironic.conductor import utils as manager_utils from ironic.drivers.modules import agent_base_vendor from ironic.drivers.modules import agent_client from ironic.drivers.modules import deploy_utils +from ironic.drivers.modules import fake from ironic import objects from ironic.tests.conductor import utils as mgr_utils from ironic.tests.db import base as db_base @@ -332,31 +334,118 @@ class TestBaseAgentVendor(db_base.DbTestCase): self.assertIsInstance(driver_routes, dict) self.assertEqual(expected, list(driver_routes)) + @mock.patch.object(time, 'sleep', lambda seconds: None) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - def test_reboot_and_finish_deploy_success(self, node_power_action_mock): + @mock.patch.object(fake.FakePower, 'get_power_state', + spec=types.FunctionType) + @mock.patch.object(agent_client.AgentClient, 'power_off', + spec=types.FunctionType) + def test_reboot_and_finish_deploy(self, power_off_mock, + get_power_state_mock, + node_power_action_mock): self.node.provision_state = states.DEPLOYING self.node.target_provision_state = states.ACTIVE self.node.save() - with task_manager.acquire(self.context, self.node['uuid'], - shared=False) as task: + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + get_power_state_mock.side_effect = [states.POWER_ON, + states.POWER_OFF] self.passthru.reboot_and_finish_deploy(task) - node_power_action_mock.assert_called_once_with(task, states.REBOOT) + power_off_mock.assert_called_once_with(task.node) + self.assertEqual(2, get_power_state_mock.call_count) + node_power_action_mock.assert_called_once_with( + task, states.POWER_ON) self.assertEqual(states.ACTIVE, task.node.provision_state) self.assertEqual(states.NOSTATE, task.node.target_provision_state) + @mock.patch.object(time, 'sleep', lambda seconds: None) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - def test_reboot_and_finish_deploy_reboot_failure(self, - node_power_action_mock): - exc = exception.PowerStateFailure(pstate=states.REBOOT) + @mock.patch.object(fake.FakePower, 'get_power_state', + spec=types.FunctionType) + @mock.patch.object(agent_client.AgentClient, 'power_off', + spec=types.FunctionType) + def test_reboot_and_finish_deploy_soft_poweroff_doesnt_complete( + self, power_off_mock, get_power_state_mock, + node_power_action_mock): self.node.provision_state = states.DEPLOYING self.node.target_provision_state = states.ACTIVE self.node.save() - node_power_action_mock.side_effect = exc - with task_manager.acquire(self.context, self.node['uuid'], - shared=False) as task: + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + get_power_state_mock.return_value = states.POWER_ON + self.passthru.reboot_and_finish_deploy(task) + power_off_mock.assert_called_once_with(task.node) + self.assertEqual(7, get_power_state_mock.call_count) + node_power_action_mock.assert_called_once_with( + task, states.REBOOT) + self.assertEqual(states.ACTIVE, task.node.provision_state) + self.assertEqual(states.NOSTATE, task.node.target_provision_state) + + @mock.patch.object(manager_utils, 'node_power_action', autospec=True) + @mock.patch.object(agent_client.AgentClient, 'power_off', + spec=types.FunctionType) + def test_reboot_and_finish_deploy_soft_poweroff_fails( + self, power_off_mock, node_power_action_mock): + power_off_mock.side_effect = RuntimeError("boom") + self.node.provision_state = states.DEPLOYING + self.node.target_provision_state = states.ACTIVE + self.node.save() + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + self.passthru.reboot_and_finish_deploy(task) + power_off_mock.assert_called_once_with(task.node) + node_power_action_mock.assert_called_once_with( + task, states.REBOOT) + self.assertEqual(states.ACTIVE, task.node.provision_state) + self.assertEqual(states.NOSTATE, task.node.target_provision_state) + + @mock.patch.object(time, 'sleep', lambda seconds: None) + @mock.patch.object(manager_utils, 'node_power_action', autospec=True) + @mock.patch.object(fake.FakePower, 'get_power_state', + spec=types.FunctionType) + @mock.patch.object(agent_client.AgentClient, 'power_off', + spec=types.FunctionType) + def test_reboot_and_finish_deploy_get_power_state_fails( + self, power_off_mock, get_power_state_mock, + node_power_action_mock): + self.node.provision_state = states.DEPLOYING + self.node.target_provision_state = states.ACTIVE + self.node.save() + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + get_power_state_mock.side_effect = RuntimeError("boom") + self.passthru.reboot_and_finish_deploy(task) + power_off_mock.assert_called_once_with(task.node) + self.assertEqual(7, get_power_state_mock.call_count) + node_power_action_mock.assert_called_once_with( + task, states.REBOOT) + self.assertEqual(states.ACTIVE, task.node.provision_state) + self.assertEqual(states.NOSTATE, task.node.target_provision_state) + + @mock.patch.object(time, 'sleep', lambda seconds: None) + @mock.patch.object(manager_utils, 'node_power_action', autospec=True) + @mock.patch.object(fake.FakePower, 'get_power_state', + spec=types.FunctionType) + @mock.patch.object(agent_client.AgentClient, 'power_off', + spec=types.FunctionType) + def test_reboot_and_finish_deploy_power_action_fails( + self, power_off_mock, get_power_state_mock, + node_power_action_mock): + self.node.provision_state = states.DEPLOYING + self.node.target_provision_state = states.ACTIVE + self.node.save() + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + get_power_state_mock.return_value = states.POWER_ON + node_power_action_mock.side_effect = RuntimeError("boom") self.assertRaises(exception.InstanceDeployFailure, - self.passthru.reboot_and_finish_deploy, task) - node_power_action_mock.assert_any_call(task, states.REBOOT) + self.passthru.reboot_and_finish_deploy, + task) + power_off_mock.assert_called_once_with(task.node) + self.assertEqual(7, get_power_state_mock.call_count) + node_power_action_mock.assert_has_calls([ + mock.call(task, states.REBOOT), + mock.call(task, states.POWER_OFF)]) self.assertEqual(states.DEPLOYFAIL, task.node.provision_state) self.assertEqual(states.ACTIVE, task.node.target_provision_state) diff --git a/ironic/tests/drivers/test_agent_client.py b/ironic/tests/drivers/test_agent_client.py index 92662511f..9fb1e43a1 100644 --- a/ironic/tests/drivers/test_agent_client.py +++ b/ironic/tests/drivers/test_agent_client.py @@ -210,3 +210,9 @@ class TestAgentClient(base.TestCase): self.client._command.assert_called_once_with( node=self.node, method='clean.execute_clean_step', params=expected_params, wait=False) + + def test_power_off(self): + self.client._command = mock.MagicMock(spec_set=[]) + self.client.power_off(self.node) + self.client._command.assert_called_once_with( + node=self.node, method='standby.power_off', params={}) |