diff options
author | Julia Kreger <juliaashleykreger@gmail.com> | 2019-10-23 17:54:29 -0700 |
---|---|---|
committer | Julia Kreger <juliaashleykreger@gmail.com> | 2019-12-16 22:17:22 +0000 |
commit | 25cc871450e6b2ebe4206bad54ed09105abed2a1 (patch) | |
tree | 0935b1bcf5568f4bb2637c355e24779084a2eb35 /ironic | |
parent | 3e39d7becb75bc92fc4ab00c48b930e924ead86d (diff) | |
download | ironic-25cc871450e6b2ebe4206bad54ed09105abed2a1.tar.gz |
Block ability update callback_url
A malicious user with:
* API access normally reserved for the provisioning,
cleaning, rescue networks.
* Insight about a node, such as a MAC address, or baremetal node
UUID.
* Insight into the state of the node, such as the access provided
to Compute API users, or other Bare Metal API users.
Can submit an erroneous ``heartbeat`` to the ironic-api endpoint
with a ``callback_url`` that is not of the actual intended agent.
This can potentially cause a rescue, cleaning, or deployment
operation to be derailed, or at worst commands to be sent to
to an endpoint the malicious user controls.
Story: 2006773
Task: 37295
Change-Id: I1a5e3c2b34d45c06fb74e82d0f30735ce9041914
(cherry picked from commit 931c12598296dad676b2d64b66fef6e95ad3939b)
Diffstat (limited to 'ironic')
-rw-r--r-- | ironic/api/controllers/v1/ramdisk.py | 13 | ||||
-rw-r--r-- | ironic/conductor/manager.py | 21 | ||||
-rw-r--r-- | ironic/conductor/utils.py | 15 | ||||
-rw-r--r-- | ironic/tests/unit/api/controllers/v1/test_ramdisk.py | 12 | ||||
-rw-r--r-- | ironic/tests/unit/conductor/test_manager.py | 38 | ||||
-rw-r--r-- | ironic/tests/unit/conductor/test_utils.py | 8 | ||||
-rw-r--r-- | ironic/tests/unit/drivers/modules/test_agent_base_vendor.py | 5 |
7 files changed, 97 insertions, 15 deletions
diff --git a/ironic/api/controllers/v1/ramdisk.py b/ironic/api/controllers/v1/ramdisk.py index d29f2ace0..cab6ef28c 100644 --- a/ironic/api/controllers/v1/ramdisk.py +++ b/ironic/api/controllers/v1/ramdisk.py @@ -184,6 +184,19 @@ class HeartbeatController(rest.RestController): policy.authorize('baremetal:node:ipa_heartbeat', cdict, cdict) rpc_node = api_utils.get_rpc_node_with_suffix(node_ident) + dii = rpc_node['driver_internal_info'] + agent_url = dii.get('agent_url') + # If we have an agent_url on file, and we get something different + # we should fail because this is unexpected behavior of the agent. + if (agent_url is not None + and agent_url != callback_url): + LOG.error('Received heartbeat for node %(node)s with ' + 'callback URL %(url)s. This is not expected, ' + 'and the heartbeat will not be processed.', + {'node': rpc_node.uuid, 'url': callback_url}) + raise exception.Invalid( + _('Detected change in ramdisk provided ' + '"callback_url"')) try: topic = api.request.rpcapi.get_topic_for(rpc_node) diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index 15e79dd78..0e4cbb28e 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -598,6 +598,8 @@ class ConductorManager(base_manager.BaseConductorManager): node_id, purpose='node rescue') as task: node = task.node + # Record of any pre-existing agent_url should be removed. + utils.remove_agent_url(node) if node.maintenance: raise exception.NodeInMaintenance(op=_('rescuing'), node=node.uuid) @@ -697,6 +699,9 @@ class ConductorManager(base_manager.BaseConductorManager): with task_manager.acquire(context, node_id, purpose='node unrescue') as task: node = task.node + # Record of any pre-existing agent_url should be removed, + # Not that there should be. + utils.remove_agent_url(node) if node.maintenance: raise exception.NodeInMaintenance(op=_('unrescuing'), node=node.uuid) @@ -776,6 +781,7 @@ class ConductorManager(base_manager.BaseConductorManager): info_message = _('Rescue operation aborted for node %s.') % node.uuid last_error = _('By request, the rescue operation was aborted.') node.refresh() + utils.remove_agent_url(node) node.last_error = last_error node.save() LOG.info(info_message) @@ -819,6 +825,8 @@ class ConductorManager(base_manager.BaseConductorManager): with task_manager.acquire(context, node_id, shared=False, purpose='node deployment') as task: node = task.node + # Record of any pre-existing agent_url should be removed. + utils.remove_agent_url(node) if node.maintenance: raise exception.NodeInMaintenance(op=_('provisioning'), node=node.uuid) @@ -972,6 +980,8 @@ class ConductorManager(base_manager.BaseConductorManager): with task_manager.acquire(context, node_id, shared=False, purpose='node tear down') as task: + # Record of any pre-existing agent_url should be removed. + utils.remove_agent_url(task.node) if task.node.protected: raise exception.NodeProtected(node=task.node.uuid) @@ -1168,7 +1178,8 @@ class ConductorManager(base_manager.BaseConductorManager): with task_manager.acquire(context, node_id, shared=False, purpose='node manual cleaning') as task: node = task.node - + # Record of any pre-existing agent_url should be removed. + utils.remove_agent_url(node) if node.maintenance: raise exception.NodeInMaintenance(op=_('cleaning'), node=node.uuid) @@ -1473,6 +1484,8 @@ class ConductorManager(base_manager.BaseConductorManager): driver_internal_info.pop('clean_step_index', None) driver_internal_info.pop('cleaning_reboot', None) driver_internal_info.pop('cleaning_polling', None) + # Remove agent_url + driver_internal_info.pop('agent_url', None) node.driver_internal_info = driver_internal_info node.save() try: @@ -1562,6 +1575,7 @@ class ConductorManager(base_manager.BaseConductorManager): info.pop('cleaning_reboot', None) info.pop('cleaning_polling', None) info.pop('skip_current_clean_step', None) + info.pop('agent_url', None) node.driver_internal_info = info node.save() LOG.info(info_message) @@ -3979,6 +3993,8 @@ def _do_next_deploy_step(task, step_index, conductor_id): driver_internal_info.pop('deploy_step_index', None) driver_internal_info.pop('deployment_reboot', None) driver_internal_info.pop('deployment_polling', None) + # Remove the agent_url cached from the deployment. + driver_internal_info.pop('agent_url', None) node.driver_internal_info = driver_internal_info node.save() @@ -4202,6 +4218,9 @@ def _do_inspect_hardware(task): log_func("Failed to inspect node %(node)s: %(err)s", {'node': node.uuid, 'err': e}) + # Remove agent_url, while not strictly needed for the inspection path, + # lets just remove it out of good practice. + utils.remove_agent_url(node) try: new_state = task.driver.inspect.inspect_hardware(task) except exception.IronicException as e: diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py index 1c6898c44..43aaeee7e 100644 --- a/ironic/conductor/utils.py +++ b/ironic/conductor/utils.py @@ -417,6 +417,9 @@ def cleaning_error_handler(task, msg, tear_down_cleaning=True, info.pop('cleaning_reboot', None) info.pop('cleaning_polling', None) info.pop('skip_current_clean_step', None) + # We don't need to keep the old agent URL + # as it should change upon the next cleaning attempt. + info.pop('agent_url', None) node.driver_internal_info = info # For manual cleaning, the target provision state is MANAGEABLE, whereas # for automated cleaning, it is AVAILABLE. @@ -477,6 +480,9 @@ def deploying_error_handler(task, logmsg, errmsg, traceback=False, info.pop('deployment_reboot', None) info.pop('deployment_polling', None) info.pop('skip_current_deploy_step', None) + # Remove agent_url since it will be re-asserted + # upon the next deployment attempt. + info.pop('agent_url', None) node.driver_internal_info = info if cleanup_err: @@ -521,6 +527,7 @@ def rescuing_error_handler(task, msg, set_fail_state=True): try: node_power_action(task, states.POWER_OFF) task.driver.rescue.clean_up(task) + remove_agent_url(node) node.last_error = msg except exception.IronicException as e: node.last_error = (_('Rescue operation was unsuccessful, clean up ' @@ -535,6 +542,7 @@ def rescuing_error_handler(task, msg, set_fail_state=True): LOG.exception('Rescue failed for node %(node)s, an exception was ' 'encountered while aborting.', {'node': node.uuid}) finally: + remove_agent_url(node) node.save() if set_fail_state: @@ -913,3 +921,10 @@ def is_fast_track(task): task.node.driver_internal_info.get('agent_last_heartbeat'), CONF.deploy.fast_track_timeout) and task.driver.power.get_power_state(task) == states.POWER_ON) + + +def remove_agent_url(node): + """Helper to remove the agent_url record.""" + info = node.driver_internal_info + info.pop('agent_url', None) + node.driver_internal_info = info diff --git a/ironic/tests/unit/api/controllers/v1/test_ramdisk.py b/ironic/tests/unit/api/controllers/v1/test_ramdisk.py index 43162adc1..0bdd9dfbe 100644 --- a/ironic/tests/unit/api/controllers/v1/test_ramdisk.py +++ b/ironic/tests/unit/api/controllers/v1/test_ramdisk.py @@ -255,3 +255,15 @@ class TestHeartbeat(test_api_base.BaseApiTest): headers={api_base.Version.string: '1.35'}, expect_errors=True) self.assertEqual(http_client.BAD_REQUEST, response.status_int) + + @mock.patch.object(rpcapi.ConductorAPI, 'heartbeat', autospec=True) + def test_heartbeat_rejects_different_callback_url(self, mock_heartbeat): + node = obj_utils.create_test_node( + self.context, + driver_internal_info={'agent_url': 'url'}) + response = self.post_json( + '/heartbeat/%s' % node.uuid, + {'callback_url': 'url2'}, + headers={api_base.Version.string: str(api_v1.max_version())}, + expect_errors=True) + self.assertEqual(http_client.BAD_REQUEST, response.status_int) diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py index ac0b60716..ffede6955 100644 --- a/ironic/tests/unit/conductor/test_manager.py +++ b/ironic/tests/unit/conductor/test_manager.py @@ -1352,9 +1352,11 @@ class ServiceDoNodeDeployTestCase(mgr_utils.ServiceSetUpMixin, autospec=True) as mock_spawn: mock_spawn.return_value = thread - node = obj_utils.create_test_node(self.context, - driver='fake-hardware', - provision_state=states.AVAILABLE) + node = obj_utils.create_test_node( + self.context, + driver='fake-hardware', + provision_state=states.AVAILABLE, + driver_internal_info={'agent_url': 'url'}) self.service.do_node_deploy(self.context, node.uuid) self._stop_service() @@ -1369,6 +1371,7 @@ class ServiceDoNodeDeployTestCase(mgr_utils.ServiceSetUpMixin, mock.ANY, None) mock_iwdi.assert_called_once_with(self.context, node.instance_info) self.assertFalse(node.driver_internal_info['is_whole_disk_image']) + self.assertNotIn('agent_url', node.driver_internal_info) @mock.patch('ironic.drivers.modules.fake.FakeDeploy.deploy') def test_do_node_deploy_rebuild_active_state_old(self, mock_deploy, @@ -2658,7 +2661,8 @@ class DoNextDeployStepTestCase(mgr_utils.ServiceSetUpMixin, def test__do_next_deploy_step_all(self, mock_execute): # Run all steps from start to finish (all synchronous) driver_internal_info = {'deploy_step_index': None, - 'deploy_steps': self.deploy_steps} + 'deploy_steps': self.deploy_steps, + 'agent_url': 'url'} self._start_service() node = obj_utils.create_test_node( self.context, driver='fake-hardware', @@ -2680,6 +2684,7 @@ class DoNextDeployStepTestCase(mgr_utils.ServiceSetUpMixin, self.assertIsNone(node.driver_internal_info['deploy_steps']) mock_execute.assert_has_calls = [mock.call(self.deploy_steps[0]), mock.call(self.deploy_steps[1])] + self.assertNotIn('agent_url', node.driver_internal_info) @mock.patch.object(conductor_utils, 'LOG', autospec=True) @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_deploy_step', @@ -3854,7 +3859,8 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): self.context, driver='fake-hardware', provision_state=states.CLEANING, target_provision_state=states.AVAILABLE, - last_error=None) + last_error=None, + driver_internal_info={'agent_url': 'url'}) with task_manager.acquire( self.context, node.uuid, shared=False) as task: self.service._do_node_clean(task) @@ -3864,6 +3870,7 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): # Assert that the node was cleaned self.assertTrue(mock_validate.called) self.assertIn('clean_steps', node.driver_internal_info) + self.assertNotIn('agent_url', node.driver_internal_info) @mock.patch('ironic.drivers.modules.fake.FakePower.validate', autospec=True) @@ -4599,7 +4606,8 @@ class DoNodeRescueTestCase(mgr_utils.CommonMixIn, mgr_utils.ServiceSetUpMixin, task = self._create_task( node_attrs=dict(driver='fake-hardware', provision_state=states.ACTIVE, - instance_info={})) + instance_info={}, + driver_internal_info={'agent_url': 'url'})) mock_acquire.side_effect = self._get_acquire_side_effect(task) self.service.do_node_rescue(self.context, task.node.uuid, "password") @@ -4609,6 +4617,7 @@ class DoNodeRescueTestCase(mgr_utils.CommonMixIn, mgr_utils.ServiceSetUpMixin, call_args=(self.service._do_node_rescue, task), err_handler=conductor_utils.spawn_rescue_error_handler) self.assertIn('rescue_password', task.node.instance_info) + self.assertNotIn('agent_url', task.node.driver_internal_info) def test_do_node_rescue_invalid_state(self): self._start_service() @@ -4740,9 +4749,12 @@ class DoNodeRescueTestCase(mgr_utils.CommonMixIn, mgr_utils.ServiceSetUpMixin, self._start_service() task = self._create_task( node_attrs=dict(driver='fake-hardware', - provision_state=states.RESCUE)) + provision_state=states.RESCUE, + driver_internal_info={'agent_url': 'url'})) mock_acquire.side_effect = self._get_acquire_side_effect(task) self.service.do_node_unrescue(self.context, task.node.uuid) + task.node.refresh() + self.assertNotIn('agent_url', task.node.driver_internal_info) task.process_event.assert_called_once_with( 'unrescue', callback=self.service._spawn_worker, @@ -4876,12 +4888,14 @@ class DoNodeRescueTestCase(mgr_utils.CommonMixIn, mgr_utils.ServiceSetUpMixin, node = obj_utils.create_test_node( self.context, driver='fake-hardware', provision_state=states.RESCUEFAIL, - target_provision_state=states.RESCUE) + target_provision_state=states.RESCUE, + driver_internal_info={'agent_url': 'url'}) with task_manager.acquire(self.context, node.uuid) as task: self.service._do_node_rescue_abort(task) clean_up_mock.assert_called_once_with(task.driver.rescue, task) self.assertIsNotNone(task.node.last_error) self.assertFalse(task.node.maintenance) + self.assertNotIn('agent_url', task.node.driver_internal_info) @mock.patch.object(fake.FakeRescue, 'clean_up', autospec=True) def test__do_node_rescue_abort_clean_up_fail(self, clean_up_mock): @@ -7872,8 +7886,10 @@ class NodeInspectHardware(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): @mock.patch('ironic.drivers.modules.fake.FakeInspect.inspect_hardware') def test_inspect_hardware_ok(self, mock_inspect): self._start_service() - node = obj_utils.create_test_node(self.context, driver='fake-hardware', - provision_state=states.INSPECTING) + node = obj_utils.create_test_node( + self.context, driver='fake-hardware', + provision_state=states.INSPECTING, + driver_internal_info={'agent_url': 'url'}) task = task_manager.TaskManager(self.context, node.uuid) mock_inspect.return_value = states.MANAGEABLE manager._do_inspect_hardware(task) @@ -7882,6 +7898,8 @@ class NodeInspectHardware(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): self.assertEqual(states.NOSTATE, node.target_provision_state) self.assertIsNone(node.last_error) mock_inspect.assert_called_once_with(mock.ANY) + task.node.refresh() + self.assertNotIn('agent_url', task.node.driver_internal_info) @mock.patch('ironic.drivers.modules.fake.FakeInspect.inspect_hardware') def test_inspect_hardware_return_inspecting(self, mock_inspect): diff --git a/ironic/tests/unit/conductor/test_utils.py b/ironic/tests/unit/conductor/test_utils.py index 1526b2ec9..86d51b705 100644 --- a/ironic/tests/unit/conductor/test_utils.py +++ b/ironic/tests/unit/conductor/test_utils.py @@ -920,6 +920,7 @@ class DeployingErrorHandlerTestCase(tests_base.TestCase): info['deployment_reboot'] = True info['deployment_polling'] = True info['skip_current_deploy_step'] = True + info['agent_url'] = 'url' conductor_utils.deploying_error_handler(self.task, self.logmsg, self.errmsg) @@ -932,6 +933,7 @@ class DeployingErrorHandlerTestCase(tests_base.TestCase): self.assertNotIn('deployment_polling', self.node.driver_internal_info) self.assertNotIn('skip_current_deploy_step', self.node.driver_internal_info) + self.assertNotIn('agent_url', self.node.driver_internal_info) self.task.process_event.assert_called_once_with('fail') def _test_deploying_error_handler_cleanup(self, exc, expected_str): @@ -1059,7 +1061,8 @@ class ErrorHandlersTestCase(tests_base.TestCase): 'cleaning_reboot': True, 'cleaning_polling': True, 'skip_current_clean_step': True, - 'clean_step_index': 0} + 'clean_step_index': 0, + 'agent_url': 'url'} msg = 'error bar' conductor_utils.cleaning_error_handler(self.task, msg) self.node.save.assert_called_once_with() @@ -1080,6 +1083,7 @@ class ErrorHandlersTestCase(tests_base.TestCase): else: self.task.process_event.assert_called_once_with('fail', target_state=None) + self.assertNotIn('agent_url', self.node.driver_internal_info) def test_cleaning_error_handler(self): self._test_cleaning_error_handler() @@ -1254,12 +1258,14 @@ class ErrorHandlersTestCase(tests_base.TestCase): def _test_rescuing_error_handler(self, node_power_mock, set_state=True): self.node.provision_state = states.RESCUEWAIT + self.node.driver_internal_info.update({'agent_url': 'url'}) conductor_utils.rescuing_error_handler(self.task, 'some exception for node', set_fail_state=set_state) node_power_mock.assert_called_once_with(mock.ANY, states.POWER_OFF) self.task.driver.rescue.clean_up.assert_called_once_with(self.task) self.node.save.assert_called_once_with() + self.assertNotIn('agent_url', self.node.driver_internal_info) if set_state: self.assertTrue(self.task.process_event.called) else: diff --git a/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py b/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py index afb14c02f..6710b9729 100644 --- a/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py +++ b/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py @@ -269,9 +269,8 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest): shared=True) as task: self.deploy.heartbeat(task, agent_url, '3.2.0') self.assertFalse(task.shared) - self.assertEqual( - agent_url, - task.node.driver_internal_info['agent_url']) + self.assertIsNone( + task.node.driver_internal_info.get('agent_url', None)) self.assertEqual( '3.2.0', task.node.driver_internal_info['agent_version']) |