summaryrefslogtreecommitdiff
path: root/ironic
diff options
context:
space:
mode:
authorJulia Kreger <juliaashleykreger@gmail.com>2019-10-23 17:54:29 -0700
committerJulia Kreger <juliaashleykreger@gmail.com>2019-12-16 22:17:22 +0000
commit25cc871450e6b2ebe4206bad54ed09105abed2a1 (patch)
tree0935b1bcf5568f4bb2637c355e24779084a2eb35 /ironic
parent3e39d7becb75bc92fc4ab00c48b930e924ead86d (diff)
downloadironic-25cc871450e6b2ebe4206bad54ed09105abed2a1.tar.gz
Block ability update callback_url
A malicious user with: * API access normally reserved for the provisioning, cleaning, rescue networks. * Insight about a node, such as a MAC address, or baremetal node UUID. * Insight into the state of the node, such as the access provided to Compute API users, or other Bare Metal API users. Can submit an erroneous ``heartbeat`` to the ironic-api endpoint with a ``callback_url`` that is not of the actual intended agent. This can potentially cause a rescue, cleaning, or deployment operation to be derailed, or at worst commands to be sent to to an endpoint the malicious user controls. Story: 2006773 Task: 37295 Change-Id: I1a5e3c2b34d45c06fb74e82d0f30735ce9041914 (cherry picked from commit 931c12598296dad676b2d64b66fef6e95ad3939b)
Diffstat (limited to 'ironic')
-rw-r--r--ironic/api/controllers/v1/ramdisk.py13
-rw-r--r--ironic/conductor/manager.py21
-rw-r--r--ironic/conductor/utils.py15
-rw-r--r--ironic/tests/unit/api/controllers/v1/test_ramdisk.py12
-rw-r--r--ironic/tests/unit/conductor/test_manager.py38
-rw-r--r--ironic/tests/unit/conductor/test_utils.py8
-rw-r--r--ironic/tests/unit/drivers/modules/test_agent_base_vendor.py5
7 files changed, 97 insertions, 15 deletions
diff --git a/ironic/api/controllers/v1/ramdisk.py b/ironic/api/controllers/v1/ramdisk.py
index d29f2ace0..cab6ef28c 100644
--- a/ironic/api/controllers/v1/ramdisk.py
+++ b/ironic/api/controllers/v1/ramdisk.py
@@ -184,6 +184,19 @@ class HeartbeatController(rest.RestController):
policy.authorize('baremetal:node:ipa_heartbeat', cdict, cdict)
rpc_node = api_utils.get_rpc_node_with_suffix(node_ident)
+ dii = rpc_node['driver_internal_info']
+ agent_url = dii.get('agent_url')
+ # If we have an agent_url on file, and we get something different
+ # we should fail because this is unexpected behavior of the agent.
+ if (agent_url is not None
+ and agent_url != callback_url):
+ LOG.error('Received heartbeat for node %(node)s with '
+ 'callback URL %(url)s. This is not expected, '
+ 'and the heartbeat will not be processed.',
+ {'node': rpc_node.uuid, 'url': callback_url})
+ raise exception.Invalid(
+ _('Detected change in ramdisk provided '
+ '"callback_url"'))
try:
topic = api.request.rpcapi.get_topic_for(rpc_node)
diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py
index 15e79dd78..0e4cbb28e 100644
--- a/ironic/conductor/manager.py
+++ b/ironic/conductor/manager.py
@@ -598,6 +598,8 @@ class ConductorManager(base_manager.BaseConductorManager):
node_id, purpose='node rescue') as task:
node = task.node
+ # Record of any pre-existing agent_url should be removed.
+ utils.remove_agent_url(node)
if node.maintenance:
raise exception.NodeInMaintenance(op=_('rescuing'),
node=node.uuid)
@@ -697,6 +699,9 @@ class ConductorManager(base_manager.BaseConductorManager):
with task_manager.acquire(context, node_id,
purpose='node unrescue') as task:
node = task.node
+ # Record of any pre-existing agent_url should be removed,
+ # Not that there should be.
+ utils.remove_agent_url(node)
if node.maintenance:
raise exception.NodeInMaintenance(op=_('unrescuing'),
node=node.uuid)
@@ -776,6 +781,7 @@ class ConductorManager(base_manager.BaseConductorManager):
info_message = _('Rescue operation aborted for node %s.') % node.uuid
last_error = _('By request, the rescue operation was aborted.')
node.refresh()
+ utils.remove_agent_url(node)
node.last_error = last_error
node.save()
LOG.info(info_message)
@@ -819,6 +825,8 @@ class ConductorManager(base_manager.BaseConductorManager):
with task_manager.acquire(context, node_id, shared=False,
purpose='node deployment') as task:
node = task.node
+ # Record of any pre-existing agent_url should be removed.
+ utils.remove_agent_url(node)
if node.maintenance:
raise exception.NodeInMaintenance(op=_('provisioning'),
node=node.uuid)
@@ -972,6 +980,8 @@ class ConductorManager(base_manager.BaseConductorManager):
with task_manager.acquire(context, node_id, shared=False,
purpose='node tear down') as task:
+ # Record of any pre-existing agent_url should be removed.
+ utils.remove_agent_url(task.node)
if task.node.protected:
raise exception.NodeProtected(node=task.node.uuid)
@@ -1168,7 +1178,8 @@ class ConductorManager(base_manager.BaseConductorManager):
with task_manager.acquire(context, node_id, shared=False,
purpose='node manual cleaning') as task:
node = task.node
-
+ # Record of any pre-existing agent_url should be removed.
+ utils.remove_agent_url(node)
if node.maintenance:
raise exception.NodeInMaintenance(op=_('cleaning'),
node=node.uuid)
@@ -1473,6 +1484,8 @@ class ConductorManager(base_manager.BaseConductorManager):
driver_internal_info.pop('clean_step_index', None)
driver_internal_info.pop('cleaning_reboot', None)
driver_internal_info.pop('cleaning_polling', None)
+ # Remove agent_url
+ driver_internal_info.pop('agent_url', None)
node.driver_internal_info = driver_internal_info
node.save()
try:
@@ -1562,6 +1575,7 @@ class ConductorManager(base_manager.BaseConductorManager):
info.pop('cleaning_reboot', None)
info.pop('cleaning_polling', None)
info.pop('skip_current_clean_step', None)
+ info.pop('agent_url', None)
node.driver_internal_info = info
node.save()
LOG.info(info_message)
@@ -3979,6 +3993,8 @@ def _do_next_deploy_step(task, step_index, conductor_id):
driver_internal_info.pop('deploy_step_index', None)
driver_internal_info.pop('deployment_reboot', None)
driver_internal_info.pop('deployment_polling', None)
+ # Remove the agent_url cached from the deployment.
+ driver_internal_info.pop('agent_url', None)
node.driver_internal_info = driver_internal_info
node.save()
@@ -4202,6 +4218,9 @@ def _do_inspect_hardware(task):
log_func("Failed to inspect node %(node)s: %(err)s",
{'node': node.uuid, 'err': e})
+ # Remove agent_url, while not strictly needed for the inspection path,
+ # lets just remove it out of good practice.
+ utils.remove_agent_url(node)
try:
new_state = task.driver.inspect.inspect_hardware(task)
except exception.IronicException as e:
diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py
index 1c6898c44..43aaeee7e 100644
--- a/ironic/conductor/utils.py
+++ b/ironic/conductor/utils.py
@@ -417,6 +417,9 @@ def cleaning_error_handler(task, msg, tear_down_cleaning=True,
info.pop('cleaning_reboot', None)
info.pop('cleaning_polling', None)
info.pop('skip_current_clean_step', None)
+ # We don't need to keep the old agent URL
+ # as it should change upon the next cleaning attempt.
+ info.pop('agent_url', None)
node.driver_internal_info = info
# For manual cleaning, the target provision state is MANAGEABLE, whereas
# for automated cleaning, it is AVAILABLE.
@@ -477,6 +480,9 @@ def deploying_error_handler(task, logmsg, errmsg, traceback=False,
info.pop('deployment_reboot', None)
info.pop('deployment_polling', None)
info.pop('skip_current_deploy_step', None)
+ # Remove agent_url since it will be re-asserted
+ # upon the next deployment attempt.
+ info.pop('agent_url', None)
node.driver_internal_info = info
if cleanup_err:
@@ -521,6 +527,7 @@ def rescuing_error_handler(task, msg, set_fail_state=True):
try:
node_power_action(task, states.POWER_OFF)
task.driver.rescue.clean_up(task)
+ remove_agent_url(node)
node.last_error = msg
except exception.IronicException as e:
node.last_error = (_('Rescue operation was unsuccessful, clean up '
@@ -535,6 +542,7 @@ def rescuing_error_handler(task, msg, set_fail_state=True):
LOG.exception('Rescue failed for node %(node)s, an exception was '
'encountered while aborting.', {'node': node.uuid})
finally:
+ remove_agent_url(node)
node.save()
if set_fail_state:
@@ -913,3 +921,10 @@ def is_fast_track(task):
task.node.driver_internal_info.get('agent_last_heartbeat'),
CONF.deploy.fast_track_timeout)
and task.driver.power.get_power_state(task) == states.POWER_ON)
+
+
+def remove_agent_url(node):
+ """Helper to remove the agent_url record."""
+ info = node.driver_internal_info
+ info.pop('agent_url', None)
+ node.driver_internal_info = info
diff --git a/ironic/tests/unit/api/controllers/v1/test_ramdisk.py b/ironic/tests/unit/api/controllers/v1/test_ramdisk.py
index 43162adc1..0bdd9dfbe 100644
--- a/ironic/tests/unit/api/controllers/v1/test_ramdisk.py
+++ b/ironic/tests/unit/api/controllers/v1/test_ramdisk.py
@@ -255,3 +255,15 @@ class TestHeartbeat(test_api_base.BaseApiTest):
headers={api_base.Version.string: '1.35'},
expect_errors=True)
self.assertEqual(http_client.BAD_REQUEST, response.status_int)
+
+ @mock.patch.object(rpcapi.ConductorAPI, 'heartbeat', autospec=True)
+ def test_heartbeat_rejects_different_callback_url(self, mock_heartbeat):
+ node = obj_utils.create_test_node(
+ self.context,
+ driver_internal_info={'agent_url': 'url'})
+ response = self.post_json(
+ '/heartbeat/%s' % node.uuid,
+ {'callback_url': 'url2'},
+ headers={api_base.Version.string: str(api_v1.max_version())},
+ expect_errors=True)
+ self.assertEqual(http_client.BAD_REQUEST, response.status_int)
diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py
index ac0b60716..ffede6955 100644
--- a/ironic/tests/unit/conductor/test_manager.py
+++ b/ironic/tests/unit/conductor/test_manager.py
@@ -1352,9 +1352,11 @@ class ServiceDoNodeDeployTestCase(mgr_utils.ServiceSetUpMixin,
autospec=True) as mock_spawn:
mock_spawn.return_value = thread
- node = obj_utils.create_test_node(self.context,
- driver='fake-hardware',
- provision_state=states.AVAILABLE)
+ node = obj_utils.create_test_node(
+ self.context,
+ driver='fake-hardware',
+ provision_state=states.AVAILABLE,
+ driver_internal_info={'agent_url': 'url'})
self.service.do_node_deploy(self.context, node.uuid)
self._stop_service()
@@ -1369,6 +1371,7 @@ class ServiceDoNodeDeployTestCase(mgr_utils.ServiceSetUpMixin,
mock.ANY, None)
mock_iwdi.assert_called_once_with(self.context, node.instance_info)
self.assertFalse(node.driver_internal_info['is_whole_disk_image'])
+ self.assertNotIn('agent_url', node.driver_internal_info)
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.deploy')
def test_do_node_deploy_rebuild_active_state_old(self, mock_deploy,
@@ -2658,7 +2661,8 @@ class DoNextDeployStepTestCase(mgr_utils.ServiceSetUpMixin,
def test__do_next_deploy_step_all(self, mock_execute):
# Run all steps from start to finish (all synchronous)
driver_internal_info = {'deploy_step_index': None,
- 'deploy_steps': self.deploy_steps}
+ 'deploy_steps': self.deploy_steps,
+ 'agent_url': 'url'}
self._start_service()
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
@@ -2680,6 +2684,7 @@ class DoNextDeployStepTestCase(mgr_utils.ServiceSetUpMixin,
self.assertIsNone(node.driver_internal_info['deploy_steps'])
mock_execute.assert_has_calls = [mock.call(self.deploy_steps[0]),
mock.call(self.deploy_steps[1])]
+ self.assertNotIn('agent_url', node.driver_internal_info)
@mock.patch.object(conductor_utils, 'LOG', autospec=True)
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_deploy_step',
@@ -3854,7 +3859,8 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
self.context, driver='fake-hardware',
provision_state=states.CLEANING,
target_provision_state=states.AVAILABLE,
- last_error=None)
+ last_error=None,
+ driver_internal_info={'agent_url': 'url'})
with task_manager.acquire(
self.context, node.uuid, shared=False) as task:
self.service._do_node_clean(task)
@@ -3864,6 +3870,7 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
# Assert that the node was cleaned
self.assertTrue(mock_validate.called)
self.assertIn('clean_steps', node.driver_internal_info)
+ self.assertNotIn('agent_url', node.driver_internal_info)
@mock.patch('ironic.drivers.modules.fake.FakePower.validate',
autospec=True)
@@ -4599,7 +4606,8 @@ class DoNodeRescueTestCase(mgr_utils.CommonMixIn, mgr_utils.ServiceSetUpMixin,
task = self._create_task(
node_attrs=dict(driver='fake-hardware',
provision_state=states.ACTIVE,
- instance_info={}))
+ instance_info={},
+ driver_internal_info={'agent_url': 'url'}))
mock_acquire.side_effect = self._get_acquire_side_effect(task)
self.service.do_node_rescue(self.context, task.node.uuid,
"password")
@@ -4609,6 +4617,7 @@ class DoNodeRescueTestCase(mgr_utils.CommonMixIn, mgr_utils.ServiceSetUpMixin,
call_args=(self.service._do_node_rescue, task),
err_handler=conductor_utils.spawn_rescue_error_handler)
self.assertIn('rescue_password', task.node.instance_info)
+ self.assertNotIn('agent_url', task.node.driver_internal_info)
def test_do_node_rescue_invalid_state(self):
self._start_service()
@@ -4740,9 +4749,12 @@ class DoNodeRescueTestCase(mgr_utils.CommonMixIn, mgr_utils.ServiceSetUpMixin,
self._start_service()
task = self._create_task(
node_attrs=dict(driver='fake-hardware',
- provision_state=states.RESCUE))
+ provision_state=states.RESCUE,
+ driver_internal_info={'agent_url': 'url'}))
mock_acquire.side_effect = self._get_acquire_side_effect(task)
self.service.do_node_unrescue(self.context, task.node.uuid)
+ task.node.refresh()
+ self.assertNotIn('agent_url', task.node.driver_internal_info)
task.process_event.assert_called_once_with(
'unrescue',
callback=self.service._spawn_worker,
@@ -4876,12 +4888,14 @@ class DoNodeRescueTestCase(mgr_utils.CommonMixIn, mgr_utils.ServiceSetUpMixin,
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
provision_state=states.RESCUEFAIL,
- target_provision_state=states.RESCUE)
+ target_provision_state=states.RESCUE,
+ driver_internal_info={'agent_url': 'url'})
with task_manager.acquire(self.context, node.uuid) as task:
self.service._do_node_rescue_abort(task)
clean_up_mock.assert_called_once_with(task.driver.rescue, task)
self.assertIsNotNone(task.node.last_error)
self.assertFalse(task.node.maintenance)
+ self.assertNotIn('agent_url', task.node.driver_internal_info)
@mock.patch.object(fake.FakeRescue, 'clean_up', autospec=True)
def test__do_node_rescue_abort_clean_up_fail(self, clean_up_mock):
@@ -7872,8 +7886,10 @@ class NodeInspectHardware(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
@mock.patch('ironic.drivers.modules.fake.FakeInspect.inspect_hardware')
def test_inspect_hardware_ok(self, mock_inspect):
self._start_service()
- node = obj_utils.create_test_node(self.context, driver='fake-hardware',
- provision_state=states.INSPECTING)
+ node = obj_utils.create_test_node(
+ self.context, driver='fake-hardware',
+ provision_state=states.INSPECTING,
+ driver_internal_info={'agent_url': 'url'})
task = task_manager.TaskManager(self.context, node.uuid)
mock_inspect.return_value = states.MANAGEABLE
manager._do_inspect_hardware(task)
@@ -7882,6 +7898,8 @@ class NodeInspectHardware(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
self.assertEqual(states.NOSTATE, node.target_provision_state)
self.assertIsNone(node.last_error)
mock_inspect.assert_called_once_with(mock.ANY)
+ task.node.refresh()
+ self.assertNotIn('agent_url', task.node.driver_internal_info)
@mock.patch('ironic.drivers.modules.fake.FakeInspect.inspect_hardware')
def test_inspect_hardware_return_inspecting(self, mock_inspect):
diff --git a/ironic/tests/unit/conductor/test_utils.py b/ironic/tests/unit/conductor/test_utils.py
index 1526b2ec9..86d51b705 100644
--- a/ironic/tests/unit/conductor/test_utils.py
+++ b/ironic/tests/unit/conductor/test_utils.py
@@ -920,6 +920,7 @@ class DeployingErrorHandlerTestCase(tests_base.TestCase):
info['deployment_reboot'] = True
info['deployment_polling'] = True
info['skip_current_deploy_step'] = True
+ info['agent_url'] = 'url'
conductor_utils.deploying_error_handler(self.task, self.logmsg,
self.errmsg)
@@ -932,6 +933,7 @@ class DeployingErrorHandlerTestCase(tests_base.TestCase):
self.assertNotIn('deployment_polling', self.node.driver_internal_info)
self.assertNotIn('skip_current_deploy_step',
self.node.driver_internal_info)
+ self.assertNotIn('agent_url', self.node.driver_internal_info)
self.task.process_event.assert_called_once_with('fail')
def _test_deploying_error_handler_cleanup(self, exc, expected_str):
@@ -1059,7 +1061,8 @@ class ErrorHandlersTestCase(tests_base.TestCase):
'cleaning_reboot': True,
'cleaning_polling': True,
'skip_current_clean_step': True,
- 'clean_step_index': 0}
+ 'clean_step_index': 0,
+ 'agent_url': 'url'}
msg = 'error bar'
conductor_utils.cleaning_error_handler(self.task, msg)
self.node.save.assert_called_once_with()
@@ -1080,6 +1083,7 @@ class ErrorHandlersTestCase(tests_base.TestCase):
else:
self.task.process_event.assert_called_once_with('fail',
target_state=None)
+ self.assertNotIn('agent_url', self.node.driver_internal_info)
def test_cleaning_error_handler(self):
self._test_cleaning_error_handler()
@@ -1254,12 +1258,14 @@ class ErrorHandlersTestCase(tests_base.TestCase):
def _test_rescuing_error_handler(self, node_power_mock,
set_state=True):
self.node.provision_state = states.RESCUEWAIT
+ self.node.driver_internal_info.update({'agent_url': 'url'})
conductor_utils.rescuing_error_handler(self.task,
'some exception for node',
set_fail_state=set_state)
node_power_mock.assert_called_once_with(mock.ANY, states.POWER_OFF)
self.task.driver.rescue.clean_up.assert_called_once_with(self.task)
self.node.save.assert_called_once_with()
+ self.assertNotIn('agent_url', self.node.driver_internal_info)
if set_state:
self.assertTrue(self.task.process_event.called)
else:
diff --git a/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py b/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py
index afb14c02f..6710b9729 100644
--- a/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py
+++ b/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py
@@ -269,9 +269,8 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest):
shared=True) as task:
self.deploy.heartbeat(task, agent_url, '3.2.0')
self.assertFalse(task.shared)
- self.assertEqual(
- agent_url,
- task.node.driver_internal_info['agent_url'])
+ self.assertIsNone(
+ task.node.driver_internal_info.get('agent_url', None))
self.assertEqual(
'3.2.0',
task.node.driver_internal_info['agent_version'])