summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJulia Kreger <juliaashleykreger@gmail.com>2021-12-01 14:20:35 -0800
committerJulia Kreger <juliaashleykreger@gmail.com>2021-12-08 17:22:22 +0000
commitbb929d508582dae01e6b807d7e534fe5b0d20a11 (patch)
tree7090a244b57aab58358433a3d18a62f52ee4506a
parent1dbec7340cd8fce808f38730def55b419b8d25c0 (diff)
downloadironic-bb929d508582dae01e6b807d7e534fe5b0d20a11.tar.gz
Remove redfish cache entry upon errors
Some transient errors can ultimately cause the client to need to be completely restarted due to cached connection data. Ironic now explicitly removes the cache entry when a sushy AccessError or python AttributeError is detected originating from the library. This will now result in the prior cached connection object to be discarded, and upon the next attempt to interact with the same node, a new connection will be launched. This will result in new sessions being created, but in all likelihood the prior session had already timed out or had been administratively removed. Sushy's code, as of https://review.opendev.org/c/openstack/sushy/+/820076 will raise SessionService lookup access errors as AccessErrors. Prior to that change, they should have been raised as AttributeError as the previous call sould have returned None to be used as an object. *Also* Includes follow-up change Ia59f774c9340e3a6fa63418afedf12098c709052 squashed into this change, and necessary backport friendly mock of AccessError which had changed since this release of ironic. Change-Id: Icc6e5dd74d9f15e679a7e764fe49238ed6b8dc1e Story: 2009719 Task: 44107 (cherry picked from commit 1439af27ba2bd31fb85369754c648a45ee9ca14b) (cherry picked from commit 01997c8418b9e6ade47437b09dd9412310b90eac) (cherry picked from commit e3e7deaf48a5315fe10350e8a87b2fcc4e189406)
-rw-r--r--ironic/drivers/modules/redfish/utils.py27
-rw-r--r--ironic/tests/unit/drivers/modules/redfish/test_utils.py72
-rw-r--r--ironic/tests/unit/drivers/third_party_driver_mocks.py2
-rw-r--r--releasenotes/notes/redfish-connection-cache-pool-accesserror-743e39a2f017b990.yaml13
4 files changed, 113 insertions, 1 deletions
diff --git a/ironic/drivers/modules/redfish/utils.py b/ironic/drivers/modules/redfish/utils.py
index 607de4366..1dffe1a1a 100644
--- a/ironic/drivers/modules/redfish/utils.py
+++ b/ironic/drivers/modules/redfish/utils.py
@@ -240,6 +240,16 @@ class SessionCache(object):
# NOTE(etingof): perhaps this session token is no good
if isinstance(exc_val, sushy.exceptions.ConnectionError):
self.__class__._sessions.pop(self._session_key, None)
+ # NOTE(TheJulia): A hard access error has surfaced, we
+ # likely need to eliminate the session.
+ if isinstance(exc_val, sushy.exceptions.AccessError):
+ self.__class__._sessions.pop(self._session_key, None)
+ # NOTE(TheJulia): Something very bad has happened, such
+ # as the session is out of date, and refresh of the SessionService
+ # failed resulting in an AttributeError surfacing.
+ # https://storyboard.openstack.org/#!/story/2009719
+ if isinstance(exc_val, AttributeError):
+ self.__class__._sessions.pop(self._session_key, None)
@classmethod
def _expire_oldest_session(cls):
@@ -364,6 +374,23 @@ def _get_connection(node, lambda_fun, *args):
'auth_type': driver_info['auth_type'],
'node': node.uuid, 'error': e})
raise exception.RedfishConnectionError(node=node.uuid, error=e)
+ except sushy.exceptions.AccessError as e:
+ LOG.warning('For node %(node)s, we received an authentication '
+ 'access error from address %(address)s with auth_type '
+ '%(auth_type)s. The client will not be re-used upon '
+ 'the next re-attempt. Please ensure your using the '
+ 'correct credentials. Error: %(error)s',
+ {'address': driver_info['address'],
+ 'auth_type': driver_info['auth_type'],
+ 'node': node.uuid, 'error': e})
+ raise exception.RedfishError(node=node.uuid, error=e)
+ except AttributeError as e:
+ LOG.warning('For node %(node)s, we received at AttributeError '
+ 'when attempting to utilize the client. A new '
+ 'client session shall be used upon the next attempt.'
+ 'Attribute Error: %(error)s',
+ {'node': node.uuid, 'error': e})
+ raise exception.RedfishError(node=node.uuid, error=e)
try:
return _get_cached_connection(lambda_fun, *args)
diff --git a/ironic/tests/unit/drivers/modules/redfish/test_utils.py b/ironic/tests/unit/drivers/modules/redfish/test_utils.py
index 9bf59532f..ca8aba9da 100644
--- a/ironic/tests/unit/drivers/modules/redfish/test_utils.py
+++ b/ironic/tests/unit/drivers/modules/redfish/test_utils.py
@@ -426,8 +426,9 @@ class RedfishUtilsSystemTestCase(db_base.DbTestCase):
# Redfish specific configurations
self.config(connection_attempts=3, group='redfish')
- fake_conn = mock_sushy.return_value
+ fake_conn = mock.Mock()
fake_conn.get_system.side_effect = sushy.exceptions.ConnectionError()
+ mock_sushy.return_value = fake_conn
self.assertRaises(exception.RedfishConnectionError,
redfish_utils.get_system, self.node)
@@ -477,3 +478,72 @@ class RedfishUtilsSystemTestCase(db_base.DbTestCase):
redfish_utils.wait_until_get_system_ready, self.node)
self.assertEqual(fake_conn.get_system.call_count, 2)
+
+ @mock.patch.object(time, 'sleep', lambda seconds: None)
+ @mock.patch.object(sushy, 'Sushy', autospec=True)
+ @mock.patch('ironic.drivers.modules.redfish.utils.'
+ 'SessionCache._sessions', {})
+ def test_get_system_resource_access_error_retry(self, mock_sushy):
+
+ # Sushy access errors HTTP Errors
+ class fake_response(object):
+ status_code = 401
+ body = None
+
+ def json():
+ return {}
+
+ fake_conn = mock_sushy.return_value
+ fake_system = mock.Mock()
+ fake_conn.get_system.side_effect = iter(
+ [
+ sushy.exceptions.AccessError(
+ method='GET',
+ url='http://path/to/url',
+ response=fake_response),
+ fake_system,
+ ])
+
+ self.assertRaises(exception.RedfishError,
+ redfish_utils.get_system, self.node)
+ # Retry, as in next power sync perhaps
+ client = redfish_utils.get_system(self.node)
+ client('foo')
+
+ expected_get_system_calls = [
+ mock.call(self.parsed_driver_info['system_id']),
+ mock.call(self.parsed_driver_info['system_id']),
+ ]
+ fake_conn.get_system.assert_has_calls(expected_get_system_calls)
+ fake_system.assert_called_with('foo')
+ self.assertEqual(fake_conn.get_system.call_count, 2)
+
+ @mock.patch.object(time, 'sleep', lambda seconds: None)
+ @mock.patch.object(sushy, 'Sushy', autospec=True)
+ @mock.patch('ironic.drivers.modules.redfish.utils.'
+ 'SessionCache._sessions', {})
+ def test_get_system_resource_attribute_error(self, mock_sushy):
+
+ fake_conn = mock_sushy.return_value
+ fake_system = mock.Mock()
+ fake_conn.get_system.side_effect = iter(
+ [
+ AttributeError,
+ fake_system,
+ ])
+ # We need to check for AttributeError explicitly as
+ # otherwise we break existing tests if we try to catch
+ # it explicitly.
+ self.assertRaises(exception.RedfishError,
+ redfish_utils.get_system, self.node)
+ # Retry, as in next power sync perhaps
+ client = redfish_utils.get_system(self.node)
+ client('bar')
+ expected_get_system_calls = [
+ mock.call(self.parsed_driver_info['system_id']),
+ mock.call(self.parsed_driver_info['system_id']),
+ ]
+
+ fake_conn.get_system.assert_has_calls(expected_get_system_calls)
+ fake_system.assert_called_once_with('bar')
+ self.assertEqual(fake_conn.get_system.call_count, 2)
diff --git a/ironic/tests/unit/drivers/third_party_driver_mocks.py b/ironic/tests/unit/drivers/third_party_driver_mocks.py
index a7c7915d7..985bbc4c3 100644
--- a/ironic/tests/unit/drivers/third_party_driver_mocks.py
+++ b/ironic/tests/unit/drivers/third_party_driver_mocks.py
@@ -255,6 +255,8 @@ if not sushy:
type('SushyError', (MockKwargsException,), {}))
sushy.exceptions.ConnectionError = (
type('ConnectionError', (sushy.exceptions.SushyError,), {}))
+ sushy.exceptions.AccessError = (
+ type('AccessError', (sushy.exceptions.SushyError,), {}))
sushy.exceptions.ResourceNotFoundError = (
type('ResourceNotFoundError', (sushy.exceptions.SushyError,), {}))
sushy.exceptions.MissingAttributeError = (
diff --git a/releasenotes/notes/redfish-connection-cache-pool-accesserror-743e39a2f017b990.yaml b/releasenotes/notes/redfish-connection-cache-pool-accesserror-743e39a2f017b990.yaml
new file mode 100644
index 000000000..5bca75335
--- /dev/null
+++ b/releasenotes/notes/redfish-connection-cache-pool-accesserror-743e39a2f017b990.yaml
@@ -0,0 +1,13 @@
+---
+fixes:
+ - |
+ Fixes connection caching issues with Redfish BMCs where AccessErrors were
+ previously not disqualifying the cached connection from being re-used.
+ Ironic will now explicitly open a new connection instead of using the
+ previous connection in the cache. Under normal circumstances, the
+ ``sushy`` redfish library would detect and refresh sessions,
+ however a prior case exists where it may not detect a failure and contain
+ cached session credential data which is ultimately invalid, blocking
+ future access to the BMC via Redfish until the cache entry expired or
+ the ``ironic-conductor`` service was restarted. For more information
+ please see `story 2009719 <https://storyboard.openstack.org/#!/story/2009719>`_.