diff options
-rw-r--r-- | cloudinit/sources/DataSourceAzure.py | 9 | ||||
-rw-r--r-- | cloudinit/sources/azure/imds.py | 54 | ||||
-rw-r--r-- | tests/unittests/sources/azure/test_imds.py | 121 | ||||
-rw-r--r-- | tests/unittests/sources/test_azure.py | 70 |
4 files changed, 171 insertions, 83 deletions
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index a2b052b7..d0af2089 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -673,9 +673,12 @@ class DataSourceAzure(sources.DataSource): return crawled_data @azure_ds_telemetry_reporter - def get_metadata_from_imds(self, retries: int = 10) -> Dict: + def get_metadata_from_imds(self) -> Dict: + retry_deadline = time() + 300 try: - return imds.fetch_metadata_with_api_fallback(retries=retries) + return imds.fetch_metadata_with_api_fallback( + retry_deadline=retry_deadline + ) except (UrlError, ValueError) as error: report_diagnostic_event( "Ignoring IMDS metadata due to: %s" % error, @@ -981,7 +984,7 @@ class DataSourceAzure(sources.DataSource): # Primary nic detection will be optimized in the future. The fact that # primary nic is being attached first helps here. Otherwise each nic # could add several seconds of delay. - imds_md = self.get_metadata_from_imds(retries=300) + imds_md = self.get_metadata_from_imds() if imds_md: # Only primary NIC will get a response from IMDS. LOG.info("%s is the primary nic", ifname) diff --git a/cloudinit/sources/azure/imds.py b/cloudinit/sources/azure/imds.py index 1f5cf008..f0b6911c 100644 --- a/cloudinit/sources/azure/imds.py +++ b/cloudinit/sources/azure/imds.py @@ -2,7 +2,8 @@ # # This file is part of cloud-init. See LICENSE file for license information. -from typing import Dict +from time import time +from typing import Dict, Optional import requests @@ -17,21 +18,31 @@ IMDS_URL = "http://169.254.169.254/metadata" class ReadUrlRetryHandler: + """Manager for readurl retry behavior using exception_callback(). + + :param logging_backoff: Backoff to limit logging. + :param max_connection_errors: Number of connection errors to retry on. + :param retry_codes: Set of http codes to retry on. + :param retry_deadline: Optional time()-based deadline to retry until. + """ + def __init__( self, *, + logging_backoff: float = 1.0, + max_connection_errors: int = 10, retry_codes=( 404, # not found (yet) 410, # gone / unavailable (yet) 429, # rate-limited/throttled 500, # server error ), - max_connection_errors: int = 10, - logging_backoff: float = 1.0, + retry_deadline: Optional[float] = None, ) -> None: self.logging_backoff = logging_backoff self.max_connection_errors = max_connection_errors self.retry_codes = retry_codes + self.retry_deadline = retry_deadline self._logging_threshold = 1.0 self._request_count = 0 @@ -46,7 +57,10 @@ class ReadUrlRetryHandler: return False log = True - retry = True + if self.retry_deadline is not None and time() >= self.retry_deadline: + retry = False + else: + retry = True # Check for connection errors which may occur early boot, but # are otherwise indicative that we are not connecting with the @@ -76,22 +90,30 @@ class ReadUrlRetryHandler: def _fetch_url( - url: str, *, log_response: bool = True, retries: int = 10, timeout: int = 2 + url: str, + *, + retry_deadline: float, + log_response: bool = True, + timeout: int = 2, ) -> bytes: """Fetch URL from IMDS. + :param url: url to fetch. + :param log_response: log responses in readurl(). + :param retry_deadline: time()-based deadline to retry until. + :param timeout: Read/connection timeout in seconds for readurl(). + :raises UrlError: on error fetching metadata. """ - handler = ReadUrlRetryHandler() + handler = ReadUrlRetryHandler(retry_deadline=retry_deadline) try: response = readurl( url, exception_cb=handler.exception_callback, headers={"Metadata": "true"}, - infinite=False, + infinite=True, log_req_resp=log_response, - retries=retries, timeout=timeout, ) except UrlError as error: @@ -106,14 +128,17 @@ def _fetch_url( def _fetch_metadata( url: str, - retries: int = 10, + retry_deadline: float, ) -> Dict: """Fetch IMDS metadata. + :param url: url to fetch. + :param retry_deadline: time()-based deadline to retry until. + :raises UrlError: on error fetching metadata. :raises ValueError: on error parsing metadata. """ - metadata = _fetch_url(url, retries=retries) + metadata = _fetch_url(url, retry_deadline=retry_deadline) try: return util.load_json(metadata) @@ -125,15 +150,17 @@ def _fetch_metadata( raise -def fetch_metadata_with_api_fallback(retries: int = 10) -> Dict: +def fetch_metadata_with_api_fallback(retry_deadline: float) -> Dict: """Fetch extended metadata, falling back to non-extended as required. + :param retry_deadline: time()-based deadline to retry until. + :raises UrlError: on error fetching metadata. :raises ValueError: on error parsing metadata. """ try: url = IMDS_URL + "/instance?api-version=2021-08-01&extended=true" - return _fetch_metadata(url, retries=retries) + return _fetch_metadata(url, retry_deadline=retry_deadline) except UrlError as error: if error.code == 400: report_diagnostic_event( @@ -141,7 +168,7 @@ def fetch_metadata_with_api_fallback(retries: int = 10) -> Dict: logger_func=LOG.warning, ) url = IMDS_URL + "/instance?api-version=2019-06-01" - return _fetch_metadata(url, retries=retries) + return _fetch_metadata(url, retry_deadline=retry_deadline) raise @@ -159,6 +186,7 @@ def fetch_reprovision_data() -> bytes: 404, 410, ), + retry_deadline=None, ) response = readurl( url, diff --git a/tests/unittests/sources/azure/test_imds.py b/tests/unittests/sources/azure/test_imds.py index 9a8aad88..d8fdd563 100644 --- a/tests/unittests/sources/azure/test_imds.py +++ b/tests/unittests/sources/azure/test_imds.py @@ -36,6 +36,22 @@ def mock_requests_session_request(): yield m +@pytest.fixture(autouse=True) +def mock_time(): + with mock.patch.object(imds, "time", autospec=True) as m: + m.time_current = 0.0 + m.time_increment = 1.0 + + def fake_time(): + nonlocal m + current = m.time_current + m.time_current += m.time_increment + return current + + m.side_effect = fake_time + yield m + + @pytest.fixture def mock_url_helper_time_sleep(): with mock.patch("cloudinit.url_helper.time.sleep", autospec=True) as m: @@ -60,13 +76,14 @@ class TestFetchMetadataWithApiFallback: "http://169.254.169.254/metadata/instance?api-version=2019-06-01" ) headers = {"Metadata": "true"} - retries = 10 timeout = 2 + @pytest.mark.parametrize("retry_deadline", [0.0, 1.0, 60.0]) def test_basic( self, caplog, mock_requests_session_request, + retry_deadline, wrapped_readurl, ): fake_md = {"foo": {"bar": []}} @@ -74,7 +91,9 @@ class TestFetchMetadataWithApiFallback: mock.Mock(content=json.dumps(fake_md)), ] - md = imds.fetch_metadata_with_api_fallback() + md = imds.fetch_metadata_with_api_fallback( + retry_deadline=retry_deadline + ) assert md == fake_md assert wrapped_readurl.mock_calls == [ @@ -82,9 +101,8 @@ class TestFetchMetadataWithApiFallback: self.default_url, timeout=self.timeout, headers=self.headers, - retries=self.retries, exception_cb=mock.ANY, - infinite=False, + infinite=True, log_req_resp=True, ) ] @@ -92,7 +110,7 @@ class TestFetchMetadataWithApiFallback: ( "cloudinit.url_helper", logging.DEBUG, - StringMatch(r"\[0/11\] open.*"), + StringMatch(r"\[0/infinite\] open.*"), ), ( "cloudinit.url_helper", @@ -101,10 +119,12 @@ class TestFetchMetadataWithApiFallback: ), ] + @pytest.mark.parametrize("retry_deadline", [0.0, 1.0, 60.0]) def test_basic_fallback( self, caplog, mock_requests_session_request, + retry_deadline, wrapped_readurl, ): fake_md = {"foo": {"bar": []}} @@ -113,7 +133,9 @@ class TestFetchMetadataWithApiFallback: mock.Mock(content=json.dumps(fake_md)), ] - md = imds.fetch_metadata_with_api_fallback() + md = imds.fetch_metadata_with_api_fallback( + retry_deadline=retry_deadline + ) assert md == fake_md assert wrapped_readurl.mock_calls == [ @@ -121,18 +143,16 @@ class TestFetchMetadataWithApiFallback: self.default_url, timeout=self.timeout, headers=self.headers, - retries=self.retries, exception_cb=mock.ANY, - infinite=False, + infinite=True, log_req_resp=True, ), mock.call( self.fallback_url, timeout=self.timeout, headers=self.headers, - retries=self.retries, exception_cb=mock.ANY, - infinite=False, + infinite=True, log_req_resp=True, ), ] @@ -141,7 +161,7 @@ class TestFetchMetadataWithApiFallback: ( "cloudinit.url_helper", logging.DEBUG, - StringMatch(r"\[0/11\] open.*"), + StringMatch(r"\[0/infinite\] open.*"), ), ( LOG_PATH, @@ -156,7 +176,7 @@ class TestFetchMetadataWithApiFallback: ( "cloudinit.url_helper", logging.DEBUG, - StringMatch(r"\[0/11\] open.*"), + StringMatch(r"\[0/infinite\] open.*"), ), ( "cloudinit.url_helper", @@ -176,9 +196,12 @@ class TestFetchMetadataWithApiFallback: requests.Timeout("Fake connection timeout"), ], ) + @pytest.mark.parametrize("max_attempts,retry_deadline", [(2, 1.0)]) def test_will_retry_errors( self, caplog, + max_attempts, + retry_deadline, mock_requests_session_request, mock_url_helper_time_sleep, error, @@ -189,16 +212,18 @@ class TestFetchMetadataWithApiFallback: mock.Mock(content=json.dumps(fake_md)), ] - md = imds.fetch_metadata_with_api_fallback() + md = imds.fetch_metadata_with_api_fallback( + retry_deadline=retry_deadline + ) assert md == fake_md - assert len(mock_requests_session_request.mock_calls) == 2 + assert len(mock_requests_session_request.mock_calls) == max_attempts assert mock_url_helper_time_sleep.mock_calls == [mock.call(1)] assert caplog.record_tuples == [ ( "cloudinit.url_helper", logging.DEBUG, - StringMatch(r"\[0/11\] open.*"), + StringMatch(r"\[0/infinite\] open.*"), ), ( LOG_PATH, @@ -216,7 +241,7 @@ class TestFetchMetadataWithApiFallback: ( "cloudinit.url_helper", logging.DEBUG, - StringMatch(r"\[1/11\] open.*"), + StringMatch(r"\[1/infinite\] open.*"), ), ( "cloudinit.url_helper", @@ -225,9 +250,11 @@ class TestFetchMetadataWithApiFallback: ), ] + @pytest.mark.parametrize("retry_deadline", [3.0, 30.0]) def test_will_retry_errors_on_fallback( self, caplog, + retry_deadline, mock_requests_session_request, mock_url_helper_time_sleep, ): @@ -238,17 +265,20 @@ class TestFetchMetadataWithApiFallback: fake_http_error_for_code(429), mock.Mock(content=json.dumps(fake_md)), ] + max_attempts = len(mock_requests_session_request.side_effect) - md = imds.fetch_metadata_with_api_fallback() + md = imds.fetch_metadata_with_api_fallback( + retry_deadline=retry_deadline + ) assert md == fake_md - assert len(mock_requests_session_request.mock_calls) == 3 + assert len(mock_requests_session_request.mock_calls) == max_attempts assert mock_url_helper_time_sleep.mock_calls == [mock.call(1)] assert caplog.record_tuples == [ ( "cloudinit.url_helper", logging.DEBUG, - StringMatch(r"\[0/11\] open.*"), + StringMatch(r"\[0/infinite\] open.*"), ), ( LOG_PATH, @@ -271,7 +301,7 @@ class TestFetchMetadataWithApiFallback: ( "cloudinit.url_helper", logging.DEBUG, - StringMatch(r"\[0/11\] open.*"), + StringMatch(r"\[0/infinite\] open.*"), ), ( LOG_PATH, @@ -289,7 +319,7 @@ class TestFetchMetadataWithApiFallback: ( "cloudinit.url_helper", logging.DEBUG, - StringMatch(r"\[1/11\] open.*"), + StringMatch(r"\[1/infinite\] open.*"), ), ( "cloudinit.url_helper", @@ -309,25 +339,37 @@ class TestFetchMetadataWithApiFallback: requests.Timeout("Fake connection timeout"), ], ) + @pytest.mark.parametrize( + "max_attempts,retry_deadline", [(1, 0.0), (2, 1.0), (301, 300.0)] + ) def test_retry_until_failure( self, + error, + max_attempts, + retry_deadline, caplog, mock_requests_session_request, mock_url_helper_time_sleep, - error, ): - mock_requests_session_request.side_effect = [error] * (11) + mock_requests_session_request.side_effect = error with pytest.raises(UrlError) as exc_info: - imds.fetch_metadata_with_api_fallback() + imds.fetch_metadata_with_api_fallback( + retry_deadline=retry_deadline + ) assert exc_info.value.cause == error - assert len(mock_requests_session_request.mock_calls) == ( - self.retries + 1 + + # Connection errors max out at 11 attempts. + max_attempts = ( + 11 + if isinstance(error, requests.ConnectionError) + and max_attempts > 11 + else max_attempts ) - assert ( - mock_url_helper_time_sleep.mock_calls - == [mock.call(1)] * self.retries + assert len(mock_requests_session_request.mock_calls) == (max_attempts) + assert mock_url_helper_time_sleep.mock_calls == [mock.call(1)] * ( + max_attempts - 1 ) logs = [x for x in caplog.record_tuples if x[0] == LOG_PATH] @@ -340,7 +382,7 @@ class TestFetchMetadataWithApiFallback: f".*{error!s}.*" ), ) - for i in range(1, 12) + for i in range(1, max_attempts + 1) ] + [ ( LOG_PATH, @@ -356,12 +398,14 @@ class TestFetchMetadataWithApiFallback: fake_http_error_for_code(501), ], ) + @pytest.mark.parametrize("retry_deadline", [0.0, 1.0, 60.0]) def test_will_not_retry_errors( self, + error, + retry_deadline, caplog, mock_requests_session_request, mock_url_helper_time_sleep, - error, ): fake_md = {"foo": {"bar": []}} mock_requests_session_request.side_effect = [ @@ -370,7 +414,9 @@ class TestFetchMetadataWithApiFallback: ] with pytest.raises(UrlError) as exc_info: - imds.fetch_metadata_with_api_fallback() + imds.fetch_metadata_with_api_fallback( + retry_deadline=retry_deadline + ) assert exc_info.value.cause == error assert len(mock_requests_session_request.mock_calls) == 1 @@ -380,7 +426,7 @@ class TestFetchMetadataWithApiFallback: ( "cloudinit.url_helper", logging.DEBUG, - StringMatch(r"\[0/11\] open.*"), + StringMatch(r"\[0/infinite\] open.*"), ), ( LOG_PATH, @@ -397,8 +443,10 @@ class TestFetchMetadataWithApiFallback: ), ] + @pytest.mark.parametrize("retry_deadline", [0.0, 1.0, 60.0]) def test_non_json_repsonse( self, + retry_deadline, caplog, mock_requests_session_request, wrapped_readurl, @@ -408,16 +456,17 @@ class TestFetchMetadataWithApiFallback: ] with pytest.raises(ValueError): - imds.fetch_metadata_with_api_fallback() + imds.fetch_metadata_with_api_fallback( + retry_deadline=retry_deadline + ) assert wrapped_readurl.mock_calls == [ mock.call( self.default_url, timeout=self.timeout, headers=self.headers, - retries=self.retries, exception_cb=mock.ANY, - infinite=False, + infinite=True, log_req_resp=True, ), ] diff --git a/tests/unittests/sources/test_azure.py b/tests/unittests/sources/test_azure.py index 1c43aa66..b3f7b1bc 100644 --- a/tests/unittests/sources/test_azure.py +++ b/tests/unittests/sources/test_azure.py @@ -2805,8 +2805,10 @@ class TestPreprovisioningHotAttachNics(CiTestCase): @mock.patch("cloudinit.url_helper.time.sleep", autospec=True) @mock.patch("requests.Session.request", autospec=True) @mock.patch(MOCKPATH + "EphemeralDHCPv4", autospec=True) + @mock.patch(MOCKPATH + "time", autospec=True) + @mock.patch("cloudinit.sources.azure.imds.time", autospec=True) def test_check_if_nic_is_primary_retries_on_failures( - self, m_dhcpv4, m_request, m_sleep + self, m_imds_time, m_time, m_dhcpv4, m_request, m_sleep ): """Retry polling for network metadata on all failures except timeout and network unreachable errors""" @@ -2822,27 +2824,45 @@ class TestPreprovisioningHotAttachNics(CiTestCase): } m_req = mock.Mock(content=json.dumps({"not": "empty"})) - m_request.side_effect = ( + errors = ( [requests.Timeout("Fake connection timeout")] * 5 + [requests.ConnectionError("Fake Network Unreachable")] * 5 - + 290 * [fake_http_error_for_code(410)] + + [fake_http_error_for_code(410)] * 5 + [m_req] ) + m_request.side_effect = errors m_dhcpv4.return_value.lease = lease + fake_time = 0.0 + fake_time_increment = 5.0 + + def fake_timer(): + nonlocal fake_time, fake_time_increment + fake_time += fake_time_increment + return fake_time + + m_time.side_effect = fake_timer + m_imds_time.side_effect = fake_timer is_primary = dsa._check_if_nic_is_primary("eth0") self.assertEqual(True, is_primary) - assert len(m_request.mock_calls) == 301 + assert len(m_request.mock_calls) == len(errors) # Re-run tests to verify max http failures. + attempts = 3 + fake_time = 0.0 + fake_time_increment = 300 / attempts + + m_time.side_effect = fake_timer + m_imds_time.side_effect = fake_timer m_request.reset_mock() - m_request.side_effect = 305 * [fake_http_error_for_code(410)] + errors = 100 * [fake_http_error_for_code(410)] + m_request.side_effect = errors dsa = dsaz.DataSourceAzure({}, distro=distro, paths=self.paths) is_primary = dsa._check_if_nic_is_primary("eth1") self.assertEqual(False, is_primary) - assert len(m_request.mock_calls) == 301 + assert len(m_request.mock_calls) == attempts # Re-run tests to verify max connection error retries. m_request.reset_mock() @@ -2854,7 +2874,7 @@ class TestPreprovisioningHotAttachNics(CiTestCase): is_primary = dsa._check_if_nic_is_primary("eth1") self.assertEqual(False, is_primary) - assert len(m_request.mock_calls) == 11 + assert len(m_request.mock_calls) == attempts @mock.patch("cloudinit.distros.networking.LinuxNetworking.try_set_link_up") def test_wait_for_link_up_returns_if_already_up(self, m_is_link_up): @@ -3531,9 +3551,8 @@ class TestProvisioning: "api-version=2021-08-01&extended=true", timeout=2, headers={"Metadata": "true"}, - retries=10, exception_cb=mock.ANY, - infinite=False, + infinite=True, log_req_resp=True, ), ] @@ -3601,9 +3620,8 @@ class TestProvisioning: "api-version=2021-08-01&extended=true", exception_cb=mock.ANY, headers={"Metadata": "true"}, - infinite=False, + infinite=True, log_req_resp=True, - retries=10, timeout=2, ), mock.call( @@ -3620,9 +3638,8 @@ class TestProvisioning: "api-version=2021-08-01&extended=true", exception_cb=mock.ANY, headers={"Metadata": "true"}, - infinite=False, + infinite=True, log_req_resp=True, - retries=10, timeout=2, ), ] @@ -3712,9 +3729,8 @@ class TestProvisioning: "api-version=2021-08-01&extended=true", exception_cb=mock.ANY, headers={"Metadata": "true"}, - infinite=False, + infinite=True, log_req_resp=True, - retries=10, timeout=2, ), mock.call( @@ -3722,9 +3738,8 @@ class TestProvisioning: "api-version=2021-08-01&extended=true", exception_cb=mock.ANY, headers={"Metadata": "true"}, - infinite=False, + infinite=True, log_req_resp=True, - retries=300, timeout=2, ), mock.call( @@ -3741,9 +3756,8 @@ class TestProvisioning: "api-version=2021-08-01&extended=true", exception_cb=mock.ANY, headers={"Metadata": "true"}, - infinite=False, + infinite=True, log_req_resp=True, - retries=10, timeout=2, ), ] @@ -3871,9 +3885,8 @@ class TestProvisioning: "api-version=2021-08-01&extended=true", exception_cb=mock.ANY, headers={"Metadata": "true"}, - infinite=False, + infinite=True, log_req_resp=True, - retries=10, timeout=2, ), mock.call( @@ -3881,9 +3894,8 @@ class TestProvisioning: "api-version=2021-08-01&extended=true", exception_cb=mock.ANY, headers={"Metadata": "true"}, - infinite=False, + infinite=True, log_req_resp=True, - retries=300, timeout=2, ), mock.call( @@ -3900,9 +3912,8 @@ class TestProvisioning: "api-version=2021-08-01&extended=true", exception_cb=mock.ANY, headers={"Metadata": "true"}, - infinite=False, + infinite=True, log_req_resp=True, - retries=10, timeout=2, ), ] @@ -3988,9 +3999,8 @@ class TestProvisioning: "api-version=2021-08-01&extended=true", exception_cb=mock.ANY, headers={"Metadata": "true"}, - infinite=False, + infinite=True, log_req_resp=True, - retries=10, timeout=2, ), mock.call( @@ -4007,9 +4017,8 @@ class TestProvisioning: "api-version=2021-08-01&extended=true", exception_cb=mock.ANY, headers={"Metadata": "true"}, - infinite=False, + infinite=True, log_req_resp=True, - retries=10, timeout=2, ), ] @@ -4108,9 +4117,8 @@ class TestProvisioning: "api-version=2021-08-01&extended=true", exception_cb=mock.ANY, headers={"Metadata": "true"}, - infinite=False, + infinite=True, log_req_resp=True, - retries=10, timeout=2, ), ] |