From ed3ebfc4f6c6c4f70a3be855165933c697469399 Mon Sep 17 00:00:00 2001 From: Chris Patterson Date: Wed, 29 Mar 2023 08:24:59 -0400 Subject: sources/azure/imds: don't count timeout errors as connection errors (#2074) When fetching metadata in _check_if_nic_is_primary() the retry count is 300, but fails out after 10 connection errors. In some cases, fetching from IMDS may fail with read timeout for more than 10 attempts, far sooner than the desired 300. Keeping the existing max_connection_errors = 10 is fine so long as it is truly a connection error. These generally shouldn't occur when using the primary NIC. Always retry on timeout errors (up until desired limit) and count only connections errors against max_connection_errors. Signed-off-by: Chris Patterson --- cloudinit/sources/azure/imds.py | 9 +++++---- tests/unittests/sources/azure/test_imds.py | 1 - tests/unittests/sources/test_azure.py | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cloudinit/sources/azure/imds.py b/cloudinit/sources/azure/imds.py index 5e4046d0..1f5cf008 100644 --- a/cloudinit/sources/azure/imds.py +++ b/cloudinit/sources/azure/imds.py @@ -51,13 +51,14 @@ class ReadUrlRetryHandler: # Check for connection errors which may occur early boot, but # are otherwise indicative that we are not connecting with the # primary NIC. - if isinstance( - exception.cause, (requests.ConnectionError, requests.Timeout) - ): + if isinstance(exception.cause, requests.ConnectionError): self.max_connection_errors -= 1 if self.max_connection_errors < 0: retry = False - elif exception.code not in self.retry_codes: + elif ( + exception.code is not None + and exception.code not in self.retry_codes + ): retry = False if self._request_count >= self._logging_threshold: diff --git a/tests/unittests/sources/azure/test_imds.py b/tests/unittests/sources/azure/test_imds.py index 03f66502..9a8aad88 100644 --- a/tests/unittests/sources/azure/test_imds.py +++ b/tests/unittests/sources/azure/test_imds.py @@ -549,7 +549,6 @@ class TestFetchReprovisionData: "terminal_error", [ requests.ConnectionError("Fake connection error"), - requests.Timeout("Fake connection timeout"), ], ) def test_retry_until_failure( diff --git a/tests/unittests/sources/test_azure.py b/tests/unittests/sources/test_azure.py index 9815c913..6c3e9281 100644 --- a/tests/unittests/sources/test_azure.py +++ b/tests/unittests/sources/test_azure.py @@ -2921,8 +2921,8 @@ class TestPreprovisioningHotAttachNics(CiTestCase): # Re-run tests to verify max connection error retries. m_request.reset_mock() m_request.side_effect = [ - requests.Timeout("Fake connection timeout") - ] * 9 + [requests.ConnectionError("Fake Network Unreachable")] * 9 + requests.ConnectionError("Fake Network Unreachable") + ] * 15 dsa = dsaz.DataSourceAzure({}, distro=distro, paths=self.paths) -- cgit v1.2.1