diff options
author | Zuul <zuul@review.opendev.org> | 2020-08-07 16:32:30 +0000 |
---|---|---|
committer | Gerrit Code Review <review@openstack.org> | 2020-08-07 16:32:30 +0000 |
commit | 9f88a0cb59b5a86a7e2a3ee760068e21cd6195b4 (patch) | |
tree | 96a815c224934ded77fc2fa5b4792c80a0d29e08 | |
parent | cda546783941f857318a4c0623a47810cf493f11 (diff) | |
parent | 5eab9bced63b2b9a6753cbbf594dda7ef9d03a3a (diff) | |
download | ironic-python-agent-9f88a0cb59b5a86a7e2a3ee760068e21cd6195b4.tar.gz |
Merge "Fix TypeError on agent lookup failure"
3 files changed, 84 insertions, 4 deletions
diff --git a/ironic_python_agent/ironic_api_client.py b/ironic_python_agent/ironic_api_client.py index 9d986929..0b1a5431 100644 --- a/ironic_python_agent/ironic_api_client.py +++ b/ironic_python_agent/ironic_api_client.py @@ -167,12 +167,41 @@ class APIClient(object): 'GET', self.lookup_api, headers=self._get_ironic_api_version_header(), params=params) - except Exception as err: - LOG.exception( - 'Unhandled error looking up node with addresses %r at %s: %s', - params['addresses'], self.api_url, err, + except (requests.exceptions.Timeout, + requests.exceptions.ConnectTimeout, + requests.exceptions.ConnectionError, + requests.exceptions.ReadTimeout, + requests.exceptions.HTTPError) as err: + LOG.warning( + 'Error detected while attempting to perform lookup ' + 'with %s, retrying. Error: %s', self.api_url, err ) return False + except Exception as err: + # NOTE(TheJulia): If you're looking here, and you're wondering + # why the retry logic is not working or your investigating a weird + # error or even IPA just exiting, + # See https://storyboard.openstack.org/#!/story/2007968 + # To be clear, we're going to try to provide as much detail as + # possible in the exit handling + msg = ('Unhandled error looking up node with addresses {} at ' + '{}: {}'.format(params['addresses'], self.api_url, err)) + # No matter what we do at this point, IPA is going to exit. + # This is because we don't know why the exception occured and + # we likely should not try to retry as such. + # We will attempt to provide as much detail to the logs as + # possible as to what occured, although depending on the logging + # subsystem, additional errors can occur, thus the additional + # handling below. + try: + LOG.exception(msg) + return False + except Exception as exc_err: + LOG.error(msg) + exc_msg = ('Unexpected exception occured while trying to ' + 'log additional detail. Error: {}'.format(exc_err)) + LOG.error(exc_msg) + raise errors.LookupNodeError(msg) if response.status_code != requests.codes.OK: LOG.warning( diff --git a/ironic_python_agent/tests/unit/test_ironic_api_client.py b/ironic_python_agent/tests/unit/test_ironic_api_client.py index c0f8bb74..63e100ff 100644 --- a/ironic_python_agent/tests/unit/test_ironic_api_client.py +++ b/ironic_python_agent/tests/unit/test_ironic_api_client.py @@ -16,6 +16,7 @@ from unittest import mock from oslo_serialization import jsonutils from oslo_service import loopingcall +import requests from ironic_python_agent import errors from ironic_python_agent import hardware @@ -312,6 +313,47 @@ class TestBaseIronicPythonAgent(base.IronicAgentTest): 'node_uuid': 'someuuid'}, params) + @mock.patch.object(ironic_api_client, 'LOG', autospec=True) + def test_do_lookup_transient_exceptions(self, mock_log): + exc_list = [requests.exceptions.ConnectionError, + requests.exceptions.ReadTimeout, + requests.exceptions.HTTPError, + requests.exceptions.Timeout, + requests.exceptions.ConnectTimeout] + self.api_client.session.request = mock.Mock() + for exc in exc_list: + self.api_client.session.request.reset_mock() + mock_log.reset_mock() + self.api_client.session.request.side_effect = exc + error = self.api_client._do_lookup(self.hardware_info, + node_uuid=None) + self.assertFalse(error) + mock_log.error.assert_has_calls([]) + self.assertEqual(1, mock_log.warning.call_count) + + @mock.patch.object(ironic_api_client, 'LOG', autospec=True) + def test_do_lookup_unknown_exception(self, mock_log): + self.api_client.session.request = mock.Mock() + self.api_client.session.request.side_effect = \ + requests.exceptions.RequestException('meow') + self.assertFalse( + self.api_client._do_lookup(self.hardware_info, + node_uuid=None)) + self.assertEqual(1, mock_log.exception.call_count) + + @mock.patch.object(ironic_api_client, 'LOG', autospec=True) + def test_do_lookup_unknown_exception_fallback(self, mock_log): + mock_log.exception.side_effect = TypeError + self.api_client.session.request = mock.Mock() + self.api_client.session.request.side_effect = \ + requests.exceptions.RequestException('meow') + self.assertRaises(errors.LookupNodeError, + self.api_client._do_lookup, + self.hardware_info, + node_uuid=None) + self.assertEqual(1, mock_log.exception.call_count) + self.assertEqual(2, mock_log.error.call_count) + def test_do_lookup_bad_response_code(self): response = FakeResponse(status_code=400, content={ 'node': { diff --git a/releasenotes/notes/fixes-agent-lookup-retries-1b4bb90b8e783aca.yaml b/releasenotes/notes/fixes-agent-lookup-retries-1b4bb90b8e783aca.yaml new file mode 100644 index 00000000..05776501 --- /dev/null +++ b/releasenotes/notes/fixes-agent-lookup-retries-1b4bb90b8e783aca.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + Fixes retry logic issues with the Agent Lookup which can result in + the lookup failing prematurely before being completed, typically + resulting in an abrupt end to the agent logging and potentially + weird errors like TypeError being reported on the agent process + standard error output. For more information see + `bug 2007968 <https://storyboard.openstack.org/#!/story/2007968>`_. |