diff options
author | Chris Patterson <cpatterson@microsoft.com> | 2023-04-19 21:54:04 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-19 20:54:04 -0500 |
commit | 4f3fe82305b709721a9725b25cbf0ca51b770fc6 (patch) | |
tree | bb28d3f1c3f8e071f6db7230e211ae19803b289d /tests | |
parent | 5942f4023e2581a43f31d547995095ca49954353 (diff) | |
download | cloud-init-git-4f3fe82305b709721a9725b25cbf0ca51b770fc6.tar.gz |
azure/imds: retry fetching metadata up to 300 seconds (#2121)
Instead of a fixed number of retries, allow up to 5 minutes to fetch
metadata from IMDS. The current approach allows for up to 11 attempts
depending on the path. Given the timeout setting, this can vary from
~11 seconds up to ~32 seconds depending on whether or not read/connection
timeouts are encountered.
Delaying boot on the rare occasion that IMDS is delayed is better than
ignoring the metadata as it ensures the VM is configured as expected.
This is a very conservative timeout and may be reduced in the future.
Signed-off-by: Chris Patterson <cpatterson@microsoft.com>
Diffstat (limited to 'tests')
-rw-r--r-- | tests/unittests/sources/azure/test_imds.py | 121 | ||||
-rw-r--r-- | tests/unittests/sources/test_azure.py | 70 |
2 files changed, 124 insertions, 67 deletions
diff --git a/tests/unittests/sources/azure/test_imds.py b/tests/unittests/sources/azure/test_imds.py index 9a8aad88..d8fdd563 100644 --- a/tests/unittests/sources/azure/test_imds.py +++ b/tests/unittests/sources/azure/test_imds.py @@ -36,6 +36,22 @@ def mock_requests_session_request(): yield m +@pytest.fixture(autouse=True) +def mock_time(): + with mock.patch.object(imds, "time", autospec=True) as m: + m.time_current = 0.0 + m.time_increment = 1.0 + + def fake_time(): + nonlocal m + current = m.time_current + m.time_current += m.time_increment + return current + + m.side_effect = fake_time + yield m + + @pytest.fixture def mock_url_helper_time_sleep(): with mock.patch("cloudinit.url_helper.time.sleep", autospec=True) as m: @@ -60,13 +76,14 @@ class TestFetchMetadataWithApiFallback: "http://169.254.169.254/metadata/instance?api-version=2019-06-01" ) headers = {"Metadata": "true"} - retries = 10 timeout = 2 + @pytest.mark.parametrize("retry_deadline", [0.0, 1.0, 60.0]) def test_basic( self, caplog, mock_requests_session_request, + retry_deadline, wrapped_readurl, ): fake_md = {"foo": {"bar": []}} @@ -74,7 +91,9 @@ class TestFetchMetadataWithApiFallback: mock.Mock(content=json.dumps(fake_md)), ] - md = imds.fetch_metadata_with_api_fallback() + md = imds.fetch_metadata_with_api_fallback( + retry_deadline=retry_deadline + ) assert md == fake_md assert wrapped_readurl.mock_calls == [ @@ -82,9 +101,8 @@ class TestFetchMetadataWithApiFallback: self.default_url, timeout=self.timeout, headers=self.headers, - retries=self.retries, exception_cb=mock.ANY, - infinite=False, + infinite=True, log_req_resp=True, ) ] @@ -92,7 +110,7 @@ class TestFetchMetadataWithApiFallback: ( "cloudinit.url_helper", logging.DEBUG, - StringMatch(r"\[0/11\] open.*"), + StringMatch(r"\[0/infinite\] open.*"), ), ( "cloudinit.url_helper", @@ -101,10 +119,12 @@ class TestFetchMetadataWithApiFallback: ), ] + @pytest.mark.parametrize("retry_deadline", [0.0, 1.0, 60.0]) def test_basic_fallback( self, caplog, mock_requests_session_request, + retry_deadline, wrapped_readurl, ): fake_md = {"foo": {"bar": []}} @@ -113,7 +133,9 @@ class TestFetchMetadataWithApiFallback: mock.Mock(content=json.dumps(fake_md)), ] - md = imds.fetch_metadata_with_api_fallback() + md = imds.fetch_metadata_with_api_fallback( + retry_deadline=retry_deadline + ) assert md == fake_md assert wrapped_readurl.mock_calls == [ @@ -121,18 +143,16 @@ class TestFetchMetadataWithApiFallback: self.default_url, timeout=self.timeout, headers=self.headers, - retries=self.retries, exception_cb=mock.ANY, - infinite=False, + infinite=True, log_req_resp=True, ), mock.call( self.fallback_url, timeout=self.timeout, headers=self.headers, - retries=self.retries, exception_cb=mock.ANY, - infinite=False, + infinite=True, log_req_resp=True, ), ] @@ -141,7 +161,7 @@ class TestFetchMetadataWithApiFallback: ( "cloudinit.url_helper", logging.DEBUG, - StringMatch(r"\[0/11\] open.*"), + StringMatch(r"\[0/infinite\] open.*"), ), ( LOG_PATH, @@ -156,7 +176,7 @@ class TestFetchMetadataWithApiFallback: ( "cloudinit.url_helper", logging.DEBUG, - StringMatch(r"\[0/11\] open.*"), + StringMatch(r"\[0/infinite\] open.*"), ), ( "cloudinit.url_helper", @@ -176,9 +196,12 @@ class TestFetchMetadataWithApiFallback: requests.Timeout("Fake connection timeout"), ], ) + @pytest.mark.parametrize("max_attempts,retry_deadline", [(2, 1.0)]) def test_will_retry_errors( self, caplog, + max_attempts, + retry_deadline, mock_requests_session_request, mock_url_helper_time_sleep, error, @@ -189,16 +212,18 @@ class TestFetchMetadataWithApiFallback: mock.Mock(content=json.dumps(fake_md)), ] - md = imds.fetch_metadata_with_api_fallback() + md = imds.fetch_metadata_with_api_fallback( + retry_deadline=retry_deadline + ) assert md == fake_md - assert len(mock_requests_session_request.mock_calls) == 2 + assert len(mock_requests_session_request.mock_calls) == max_attempts assert mock_url_helper_time_sleep.mock_calls == [mock.call(1)] assert caplog.record_tuples == [ ( "cloudinit.url_helper", logging.DEBUG, - StringMatch(r"\[0/11\] open.*"), + StringMatch(r"\[0/infinite\] open.*"), ), ( LOG_PATH, @@ -216,7 +241,7 @@ class TestFetchMetadataWithApiFallback: ( "cloudinit.url_helper", logging.DEBUG, - StringMatch(r"\[1/11\] open.*"), + StringMatch(r"\[1/infinite\] open.*"), ), ( "cloudinit.url_helper", @@ -225,9 +250,11 @@ class TestFetchMetadataWithApiFallback: ), ] + @pytest.mark.parametrize("retry_deadline", [3.0, 30.0]) def test_will_retry_errors_on_fallback( self, caplog, + retry_deadline, mock_requests_session_request, mock_url_helper_time_sleep, ): @@ -238,17 +265,20 @@ class TestFetchMetadataWithApiFallback: fake_http_error_for_code(429), mock.Mock(content=json.dumps(fake_md)), ] + max_attempts = len(mock_requests_session_request.side_effect) - md = imds.fetch_metadata_with_api_fallback() + md = imds.fetch_metadata_with_api_fallback( + retry_deadline=retry_deadline + ) assert md == fake_md - assert len(mock_requests_session_request.mock_calls) == 3 + assert len(mock_requests_session_request.mock_calls) == max_attempts assert mock_url_helper_time_sleep.mock_calls == [mock.call(1)] assert caplog.record_tuples == [ ( "cloudinit.url_helper", logging.DEBUG, - StringMatch(r"\[0/11\] open.*"), + StringMatch(r"\[0/infinite\] open.*"), ), ( LOG_PATH, @@ -271,7 +301,7 @@ class TestFetchMetadataWithApiFallback: ( "cloudinit.url_helper", logging.DEBUG, - StringMatch(r"\[0/11\] open.*"), + StringMatch(r"\[0/infinite\] open.*"), ), ( LOG_PATH, @@ -289,7 +319,7 @@ class TestFetchMetadataWithApiFallback: ( "cloudinit.url_helper", logging.DEBUG, - StringMatch(r"\[1/11\] open.*"), + StringMatch(r"\[1/infinite\] open.*"), ), ( "cloudinit.url_helper", @@ -309,25 +339,37 @@ class TestFetchMetadataWithApiFallback: requests.Timeout("Fake connection timeout"), ], ) + @pytest.mark.parametrize( + "max_attempts,retry_deadline", [(1, 0.0), (2, 1.0), (301, 300.0)] + ) def test_retry_until_failure( self, + error, + max_attempts, + retry_deadline, caplog, mock_requests_session_request, mock_url_helper_time_sleep, - error, ): - mock_requests_session_request.side_effect = [error] * (11) + mock_requests_session_request.side_effect = error with pytest.raises(UrlError) as exc_info: - imds.fetch_metadata_with_api_fallback() + imds.fetch_metadata_with_api_fallback( + retry_deadline=retry_deadline + ) assert exc_info.value.cause == error - assert len(mock_requests_session_request.mock_calls) == ( - self.retries + 1 + + # Connection errors max out at 11 attempts. + max_attempts = ( + 11 + if isinstance(error, requests.ConnectionError) + and max_attempts > 11 + else max_attempts ) - assert ( - mock_url_helper_time_sleep.mock_calls - == [mock.call(1)] * self.retries + assert len(mock_requests_session_request.mock_calls) == (max_attempts) + assert mock_url_helper_time_sleep.mock_calls == [mock.call(1)] * ( + max_attempts - 1 ) logs = [x for x in caplog.record_tuples if x[0] == LOG_PATH] @@ -340,7 +382,7 @@ class TestFetchMetadataWithApiFallback: f".*{error!s}.*" ), ) - for i in range(1, 12) + for i in range(1, max_attempts + 1) ] + [ ( LOG_PATH, @@ -356,12 +398,14 @@ class TestFetchMetadataWithApiFallback: fake_http_error_for_code(501), ], ) + @pytest.mark.parametrize("retry_deadline", [0.0, 1.0, 60.0]) def test_will_not_retry_errors( self, + error, + retry_deadline, caplog, mock_requests_session_request, mock_url_helper_time_sleep, - error, ): fake_md = {"foo": {"bar": []}} mock_requests_session_request.side_effect = [ @@ -370,7 +414,9 @@ class TestFetchMetadataWithApiFallback: ] with pytest.raises(UrlError) as exc_info: - imds.fetch_metadata_with_api_fallback() + imds.fetch_metadata_with_api_fallback( + retry_deadline=retry_deadline + ) assert exc_info.value.cause == error assert len(mock_requests_session_request.mock_calls) == 1 @@ -380,7 +426,7 @@ class TestFetchMetadataWithApiFallback: ( "cloudinit.url_helper", logging.DEBUG, - StringMatch(r"\[0/11\] open.*"), + StringMatch(r"\[0/infinite\] open.*"), ), ( LOG_PATH, @@ -397,8 +443,10 @@ class TestFetchMetadataWithApiFallback: ), ] + @pytest.mark.parametrize("retry_deadline", [0.0, 1.0, 60.0]) def test_non_json_repsonse( self, + retry_deadline, caplog, mock_requests_session_request, wrapped_readurl, @@ -408,16 +456,17 @@ class TestFetchMetadataWithApiFallback: ] with pytest.raises(ValueError): - imds.fetch_metadata_with_api_fallback() + imds.fetch_metadata_with_api_fallback( + retry_deadline=retry_deadline + ) assert wrapped_readurl.mock_calls == [ mock.call( self.default_url, timeout=self.timeout, headers=self.headers, - retries=self.retries, exception_cb=mock.ANY, - infinite=False, + infinite=True, log_req_resp=True, ), ] diff --git a/tests/unittests/sources/test_azure.py b/tests/unittests/sources/test_azure.py index 1c43aa66..b3f7b1bc 100644 --- a/tests/unittests/sources/test_azure.py +++ b/tests/unittests/sources/test_azure.py @@ -2805,8 +2805,10 @@ class TestPreprovisioningHotAttachNics(CiTestCase): @mock.patch("cloudinit.url_helper.time.sleep", autospec=True) @mock.patch("requests.Session.request", autospec=True) @mock.patch(MOCKPATH + "EphemeralDHCPv4", autospec=True) + @mock.patch(MOCKPATH + "time", autospec=True) + @mock.patch("cloudinit.sources.azure.imds.time", autospec=True) def test_check_if_nic_is_primary_retries_on_failures( - self, m_dhcpv4, m_request, m_sleep + self, m_imds_time, m_time, m_dhcpv4, m_request, m_sleep ): """Retry polling for network metadata on all failures except timeout and network unreachable errors""" @@ -2822,27 +2824,45 @@ class TestPreprovisioningHotAttachNics(CiTestCase): } m_req = mock.Mock(content=json.dumps({"not": "empty"})) - m_request.side_effect = ( + errors = ( [requests.Timeout("Fake connection timeout")] * 5 + [requests.ConnectionError("Fake Network Unreachable")] * 5 - + 290 * [fake_http_error_for_code(410)] + + [fake_http_error_for_code(410)] * 5 + [m_req] ) + m_request.side_effect = errors m_dhcpv4.return_value.lease = lease + fake_time = 0.0 + fake_time_increment = 5.0 + + def fake_timer(): + nonlocal fake_time, fake_time_increment + fake_time += fake_time_increment + return fake_time + + m_time.side_effect = fake_timer + m_imds_time.side_effect = fake_timer is_primary = dsa._check_if_nic_is_primary("eth0") self.assertEqual(True, is_primary) - assert len(m_request.mock_calls) == 301 + assert len(m_request.mock_calls) == len(errors) # Re-run tests to verify max http failures. + attempts = 3 + fake_time = 0.0 + fake_time_increment = 300 / attempts + + m_time.side_effect = fake_timer + m_imds_time.side_effect = fake_timer m_request.reset_mock() - m_request.side_effect = 305 * [fake_http_error_for_code(410)] + errors = 100 * [fake_http_error_for_code(410)] + m_request.side_effect = errors dsa = dsaz.DataSourceAzure({}, distro=distro, paths=self.paths) is_primary = dsa._check_if_nic_is_primary("eth1") self.assertEqual(False, is_primary) - assert len(m_request.mock_calls) == 301 + assert len(m_request.mock_calls) == attempts # Re-run tests to verify max connection error retries. m_request.reset_mock() @@ -2854,7 +2874,7 @@ class TestPreprovisioningHotAttachNics(CiTestCase): is_primary = dsa._check_if_nic_is_primary("eth1") self.assertEqual(False, is_primary) - assert len(m_request.mock_calls) == 11 + assert len(m_request.mock_calls) == attempts @mock.patch("cloudinit.distros.networking.LinuxNetworking.try_set_link_up") def test_wait_for_link_up_returns_if_already_up(self, m_is_link_up): @@ -3531,9 +3551,8 @@ class TestProvisioning: "api-version=2021-08-01&extended=true", timeout=2, headers={"Metadata": "true"}, - retries=10, exception_cb=mock.ANY, - infinite=False, + infinite=True, log_req_resp=True, ), ] @@ -3601,9 +3620,8 @@ class TestProvisioning: "api-version=2021-08-01&extended=true", exception_cb=mock.ANY, headers={"Metadata": "true"}, - infinite=False, + infinite=True, log_req_resp=True, - retries=10, timeout=2, ), mock.call( @@ -3620,9 +3638,8 @@ class TestProvisioning: "api-version=2021-08-01&extended=true", exception_cb=mock.ANY, headers={"Metadata": "true"}, - infinite=False, + infinite=True, log_req_resp=True, - retries=10, timeout=2, ), ] @@ -3712,9 +3729,8 @@ class TestProvisioning: "api-version=2021-08-01&extended=true", exception_cb=mock.ANY, headers={"Metadata": "true"}, - infinite=False, + infinite=True, log_req_resp=True, - retries=10, timeout=2, ), mock.call( @@ -3722,9 +3738,8 @@ class TestProvisioning: "api-version=2021-08-01&extended=true", exception_cb=mock.ANY, headers={"Metadata": "true"}, - infinite=False, + infinite=True, log_req_resp=True, - retries=300, timeout=2, ), mock.call( @@ -3741,9 +3756,8 @@ class TestProvisioning: "api-version=2021-08-01&extended=true", exception_cb=mock.ANY, headers={"Metadata": "true"}, - infinite=False, + infinite=True, log_req_resp=True, - retries=10, timeout=2, ), ] @@ -3871,9 +3885,8 @@ class TestProvisioning: "api-version=2021-08-01&extended=true", exception_cb=mock.ANY, headers={"Metadata": "true"}, - infinite=False, + infinite=True, log_req_resp=True, - retries=10, timeout=2, ), mock.call( @@ -3881,9 +3894,8 @@ class TestProvisioning: "api-version=2021-08-01&extended=true", exception_cb=mock.ANY, headers={"Metadata": "true"}, - infinite=False, + infinite=True, log_req_resp=True, - retries=300, timeout=2, ), mock.call( @@ -3900,9 +3912,8 @@ class TestProvisioning: "api-version=2021-08-01&extended=true", exception_cb=mock.ANY, headers={"Metadata": "true"}, - infinite=False, + infinite=True, log_req_resp=True, - retries=10, timeout=2, ), ] @@ -3988,9 +3999,8 @@ class TestProvisioning: "api-version=2021-08-01&extended=true", exception_cb=mock.ANY, headers={"Metadata": "true"}, - infinite=False, + infinite=True, log_req_resp=True, - retries=10, timeout=2, ), mock.call( @@ -4007,9 +4017,8 @@ class TestProvisioning: "api-version=2021-08-01&extended=true", exception_cb=mock.ANY, headers={"Metadata": "true"}, - infinite=False, + infinite=True, log_req_resp=True, - retries=10, timeout=2, ), ] @@ -4108,9 +4117,8 @@ class TestProvisioning: "api-version=2021-08-01&extended=true", exception_cb=mock.ANY, headers={"Metadata": "true"}, - infinite=False, + infinite=True, log_req_resp=True, - retries=10, timeout=2, ), ] |