summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cloudinit/sources/DataSourceAzure.py9
-rw-r--r--cloudinit/sources/azure/imds.py54
-rw-r--r--tests/unittests/sources/azure/test_imds.py121
-rw-r--r--tests/unittests/sources/test_azure.py70
4 files changed, 171 insertions, 83 deletions
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
index a2b052b7..d0af2089 100644
--- a/cloudinit/sources/DataSourceAzure.py
+++ b/cloudinit/sources/DataSourceAzure.py
@@ -673,9 +673,12 @@ class DataSourceAzure(sources.DataSource):
return crawled_data
@azure_ds_telemetry_reporter
- def get_metadata_from_imds(self, retries: int = 10) -> Dict:
+ def get_metadata_from_imds(self) -> Dict:
+ retry_deadline = time() + 300
try:
- return imds.fetch_metadata_with_api_fallback(retries=retries)
+ return imds.fetch_metadata_with_api_fallback(
+ retry_deadline=retry_deadline
+ )
except (UrlError, ValueError) as error:
report_diagnostic_event(
"Ignoring IMDS metadata due to: %s" % error,
@@ -981,7 +984,7 @@ class DataSourceAzure(sources.DataSource):
# Primary nic detection will be optimized in the future. The fact that
# primary nic is being attached first helps here. Otherwise each nic
# could add several seconds of delay.
- imds_md = self.get_metadata_from_imds(retries=300)
+ imds_md = self.get_metadata_from_imds()
if imds_md:
# Only primary NIC will get a response from IMDS.
LOG.info("%s is the primary nic", ifname)
diff --git a/cloudinit/sources/azure/imds.py b/cloudinit/sources/azure/imds.py
index 1f5cf008..f0b6911c 100644
--- a/cloudinit/sources/azure/imds.py
+++ b/cloudinit/sources/azure/imds.py
@@ -2,7 +2,8 @@
#
# This file is part of cloud-init. See LICENSE file for license information.
-from typing import Dict
+from time import time
+from typing import Dict, Optional
import requests
@@ -17,21 +18,31 @@ IMDS_URL = "http://169.254.169.254/metadata"
class ReadUrlRetryHandler:
+ """Manager for readurl retry behavior using exception_callback().
+
+ :param logging_backoff: Backoff to limit logging.
+ :param max_connection_errors: Number of connection errors to retry on.
+ :param retry_codes: Set of http codes to retry on.
+ :param retry_deadline: Optional time()-based deadline to retry until.
+ """
+
def __init__(
self,
*,
+ logging_backoff: float = 1.0,
+ max_connection_errors: int = 10,
retry_codes=(
404, # not found (yet)
410, # gone / unavailable (yet)
429, # rate-limited/throttled
500, # server error
),
- max_connection_errors: int = 10,
- logging_backoff: float = 1.0,
+ retry_deadline: Optional[float] = None,
) -> None:
self.logging_backoff = logging_backoff
self.max_connection_errors = max_connection_errors
self.retry_codes = retry_codes
+ self.retry_deadline = retry_deadline
self._logging_threshold = 1.0
self._request_count = 0
@@ -46,7 +57,10 @@ class ReadUrlRetryHandler:
return False
log = True
- retry = True
+ if self.retry_deadline is not None and time() >= self.retry_deadline:
+ retry = False
+ else:
+ retry = True
# Check for connection errors which may occur early boot, but
# are otherwise indicative that we are not connecting with the
@@ -76,22 +90,30 @@ class ReadUrlRetryHandler:
def _fetch_url(
- url: str, *, log_response: bool = True, retries: int = 10, timeout: int = 2
+ url: str,
+ *,
+ retry_deadline: float,
+ log_response: bool = True,
+ timeout: int = 2,
) -> bytes:
"""Fetch URL from IMDS.
+ :param url: url to fetch.
+ :param log_response: log responses in readurl().
+ :param retry_deadline: time()-based deadline to retry until.
+ :param timeout: Read/connection timeout in seconds for readurl().
+
:raises UrlError: on error fetching metadata.
"""
- handler = ReadUrlRetryHandler()
+ handler = ReadUrlRetryHandler(retry_deadline=retry_deadline)
try:
response = readurl(
url,
exception_cb=handler.exception_callback,
headers={"Metadata": "true"},
- infinite=False,
+ infinite=True,
log_req_resp=log_response,
- retries=retries,
timeout=timeout,
)
except UrlError as error:
@@ -106,14 +128,17 @@ def _fetch_url(
def _fetch_metadata(
url: str,
- retries: int = 10,
+ retry_deadline: float,
) -> Dict:
"""Fetch IMDS metadata.
+ :param url: url to fetch.
+ :param retry_deadline: time()-based deadline to retry until.
+
:raises UrlError: on error fetching metadata.
:raises ValueError: on error parsing metadata.
"""
- metadata = _fetch_url(url, retries=retries)
+ metadata = _fetch_url(url, retry_deadline=retry_deadline)
try:
return util.load_json(metadata)
@@ -125,15 +150,17 @@ def _fetch_metadata(
raise
-def fetch_metadata_with_api_fallback(retries: int = 10) -> Dict:
+def fetch_metadata_with_api_fallback(retry_deadline: float) -> Dict:
"""Fetch extended metadata, falling back to non-extended as required.
+ :param retry_deadline: time()-based deadline to retry until.
+
:raises UrlError: on error fetching metadata.
:raises ValueError: on error parsing metadata.
"""
try:
url = IMDS_URL + "/instance?api-version=2021-08-01&extended=true"
- return _fetch_metadata(url, retries=retries)
+ return _fetch_metadata(url, retry_deadline=retry_deadline)
except UrlError as error:
if error.code == 400:
report_diagnostic_event(
@@ -141,7 +168,7 @@ def fetch_metadata_with_api_fallback(retries: int = 10) -> Dict:
logger_func=LOG.warning,
)
url = IMDS_URL + "/instance?api-version=2019-06-01"
- return _fetch_metadata(url, retries=retries)
+ return _fetch_metadata(url, retry_deadline=retry_deadline)
raise
@@ -159,6 +186,7 @@ def fetch_reprovision_data() -> bytes:
404,
410,
),
+ retry_deadline=None,
)
response = readurl(
url,
diff --git a/tests/unittests/sources/azure/test_imds.py b/tests/unittests/sources/azure/test_imds.py
index 9a8aad88..d8fdd563 100644
--- a/tests/unittests/sources/azure/test_imds.py
+++ b/tests/unittests/sources/azure/test_imds.py
@@ -36,6 +36,22 @@ def mock_requests_session_request():
yield m
+@pytest.fixture(autouse=True)
+def mock_time():
+ with mock.patch.object(imds, "time", autospec=True) as m:
+ m.time_current = 0.0
+ m.time_increment = 1.0
+
+ def fake_time():
+ nonlocal m
+ current = m.time_current
+ m.time_current += m.time_increment
+ return current
+
+ m.side_effect = fake_time
+ yield m
+
+
@pytest.fixture
def mock_url_helper_time_sleep():
with mock.patch("cloudinit.url_helper.time.sleep", autospec=True) as m:
@@ -60,13 +76,14 @@ class TestFetchMetadataWithApiFallback:
"http://169.254.169.254/metadata/instance?api-version=2019-06-01"
)
headers = {"Metadata": "true"}
- retries = 10
timeout = 2
+ @pytest.mark.parametrize("retry_deadline", [0.0, 1.0, 60.0])
def test_basic(
self,
caplog,
mock_requests_session_request,
+ retry_deadline,
wrapped_readurl,
):
fake_md = {"foo": {"bar": []}}
@@ -74,7 +91,9 @@ class TestFetchMetadataWithApiFallback:
mock.Mock(content=json.dumps(fake_md)),
]
- md = imds.fetch_metadata_with_api_fallback()
+ md = imds.fetch_metadata_with_api_fallback(
+ retry_deadline=retry_deadline
+ )
assert md == fake_md
assert wrapped_readurl.mock_calls == [
@@ -82,9 +101,8 @@ class TestFetchMetadataWithApiFallback:
self.default_url,
timeout=self.timeout,
headers=self.headers,
- retries=self.retries,
exception_cb=mock.ANY,
- infinite=False,
+ infinite=True,
log_req_resp=True,
)
]
@@ -92,7 +110,7 @@ class TestFetchMetadataWithApiFallback:
(
"cloudinit.url_helper",
logging.DEBUG,
- StringMatch(r"\[0/11\] open.*"),
+ StringMatch(r"\[0/infinite\] open.*"),
),
(
"cloudinit.url_helper",
@@ -101,10 +119,12 @@ class TestFetchMetadataWithApiFallback:
),
]
+ @pytest.mark.parametrize("retry_deadline", [0.0, 1.0, 60.0])
def test_basic_fallback(
self,
caplog,
mock_requests_session_request,
+ retry_deadline,
wrapped_readurl,
):
fake_md = {"foo": {"bar": []}}
@@ -113,7 +133,9 @@ class TestFetchMetadataWithApiFallback:
mock.Mock(content=json.dumps(fake_md)),
]
- md = imds.fetch_metadata_with_api_fallback()
+ md = imds.fetch_metadata_with_api_fallback(
+ retry_deadline=retry_deadline
+ )
assert md == fake_md
assert wrapped_readurl.mock_calls == [
@@ -121,18 +143,16 @@ class TestFetchMetadataWithApiFallback:
self.default_url,
timeout=self.timeout,
headers=self.headers,
- retries=self.retries,
exception_cb=mock.ANY,
- infinite=False,
+ infinite=True,
log_req_resp=True,
),
mock.call(
self.fallback_url,
timeout=self.timeout,
headers=self.headers,
- retries=self.retries,
exception_cb=mock.ANY,
- infinite=False,
+ infinite=True,
log_req_resp=True,
),
]
@@ -141,7 +161,7 @@ class TestFetchMetadataWithApiFallback:
(
"cloudinit.url_helper",
logging.DEBUG,
- StringMatch(r"\[0/11\] open.*"),
+ StringMatch(r"\[0/infinite\] open.*"),
),
(
LOG_PATH,
@@ -156,7 +176,7 @@ class TestFetchMetadataWithApiFallback:
(
"cloudinit.url_helper",
logging.DEBUG,
- StringMatch(r"\[0/11\] open.*"),
+ StringMatch(r"\[0/infinite\] open.*"),
),
(
"cloudinit.url_helper",
@@ -176,9 +196,12 @@ class TestFetchMetadataWithApiFallback:
requests.Timeout("Fake connection timeout"),
],
)
+ @pytest.mark.parametrize("max_attempts,retry_deadline", [(2, 1.0)])
def test_will_retry_errors(
self,
caplog,
+ max_attempts,
+ retry_deadline,
mock_requests_session_request,
mock_url_helper_time_sleep,
error,
@@ -189,16 +212,18 @@ class TestFetchMetadataWithApiFallback:
mock.Mock(content=json.dumps(fake_md)),
]
- md = imds.fetch_metadata_with_api_fallback()
+ md = imds.fetch_metadata_with_api_fallback(
+ retry_deadline=retry_deadline
+ )
assert md == fake_md
- assert len(mock_requests_session_request.mock_calls) == 2
+ assert len(mock_requests_session_request.mock_calls) == max_attempts
assert mock_url_helper_time_sleep.mock_calls == [mock.call(1)]
assert caplog.record_tuples == [
(
"cloudinit.url_helper",
logging.DEBUG,
- StringMatch(r"\[0/11\] open.*"),
+ StringMatch(r"\[0/infinite\] open.*"),
),
(
LOG_PATH,
@@ -216,7 +241,7 @@ class TestFetchMetadataWithApiFallback:
(
"cloudinit.url_helper",
logging.DEBUG,
- StringMatch(r"\[1/11\] open.*"),
+ StringMatch(r"\[1/infinite\] open.*"),
),
(
"cloudinit.url_helper",
@@ -225,9 +250,11 @@ class TestFetchMetadataWithApiFallback:
),
]
+ @pytest.mark.parametrize("retry_deadline", [3.0, 30.0])
def test_will_retry_errors_on_fallback(
self,
caplog,
+ retry_deadline,
mock_requests_session_request,
mock_url_helper_time_sleep,
):
@@ -238,17 +265,20 @@ class TestFetchMetadataWithApiFallback:
fake_http_error_for_code(429),
mock.Mock(content=json.dumps(fake_md)),
]
+ max_attempts = len(mock_requests_session_request.side_effect)
- md = imds.fetch_metadata_with_api_fallback()
+ md = imds.fetch_metadata_with_api_fallback(
+ retry_deadline=retry_deadline
+ )
assert md == fake_md
- assert len(mock_requests_session_request.mock_calls) == 3
+ assert len(mock_requests_session_request.mock_calls) == max_attempts
assert mock_url_helper_time_sleep.mock_calls == [mock.call(1)]
assert caplog.record_tuples == [
(
"cloudinit.url_helper",
logging.DEBUG,
- StringMatch(r"\[0/11\] open.*"),
+ StringMatch(r"\[0/infinite\] open.*"),
),
(
LOG_PATH,
@@ -271,7 +301,7 @@ class TestFetchMetadataWithApiFallback:
(
"cloudinit.url_helper",
logging.DEBUG,
- StringMatch(r"\[0/11\] open.*"),
+ StringMatch(r"\[0/infinite\] open.*"),
),
(
LOG_PATH,
@@ -289,7 +319,7 @@ class TestFetchMetadataWithApiFallback:
(
"cloudinit.url_helper",
logging.DEBUG,
- StringMatch(r"\[1/11\] open.*"),
+ StringMatch(r"\[1/infinite\] open.*"),
),
(
"cloudinit.url_helper",
@@ -309,25 +339,37 @@ class TestFetchMetadataWithApiFallback:
requests.Timeout("Fake connection timeout"),
],
)
+ @pytest.mark.parametrize(
+ "max_attempts,retry_deadline", [(1, 0.0), (2, 1.0), (301, 300.0)]
+ )
def test_retry_until_failure(
self,
+ error,
+ max_attempts,
+ retry_deadline,
caplog,
mock_requests_session_request,
mock_url_helper_time_sleep,
- error,
):
- mock_requests_session_request.side_effect = [error] * (11)
+ mock_requests_session_request.side_effect = error
with pytest.raises(UrlError) as exc_info:
- imds.fetch_metadata_with_api_fallback()
+ imds.fetch_metadata_with_api_fallback(
+ retry_deadline=retry_deadline
+ )
assert exc_info.value.cause == error
- assert len(mock_requests_session_request.mock_calls) == (
- self.retries + 1
+
+ # Connection errors max out at 11 attempts.
+ max_attempts = (
+ 11
+ if isinstance(error, requests.ConnectionError)
+ and max_attempts > 11
+ else max_attempts
)
- assert (
- mock_url_helper_time_sleep.mock_calls
- == [mock.call(1)] * self.retries
+ assert len(mock_requests_session_request.mock_calls) == (max_attempts)
+ assert mock_url_helper_time_sleep.mock_calls == [mock.call(1)] * (
+ max_attempts - 1
)
logs = [x for x in caplog.record_tuples if x[0] == LOG_PATH]
@@ -340,7 +382,7 @@ class TestFetchMetadataWithApiFallback:
f".*{error!s}.*"
),
)
- for i in range(1, 12)
+ for i in range(1, max_attempts + 1)
] + [
(
LOG_PATH,
@@ -356,12 +398,14 @@ class TestFetchMetadataWithApiFallback:
fake_http_error_for_code(501),
],
)
+ @pytest.mark.parametrize("retry_deadline", [0.0, 1.0, 60.0])
def test_will_not_retry_errors(
self,
+ error,
+ retry_deadline,
caplog,
mock_requests_session_request,
mock_url_helper_time_sleep,
- error,
):
fake_md = {"foo": {"bar": []}}
mock_requests_session_request.side_effect = [
@@ -370,7 +414,9 @@ class TestFetchMetadataWithApiFallback:
]
with pytest.raises(UrlError) as exc_info:
- imds.fetch_metadata_with_api_fallback()
+ imds.fetch_metadata_with_api_fallback(
+ retry_deadline=retry_deadline
+ )
assert exc_info.value.cause == error
assert len(mock_requests_session_request.mock_calls) == 1
@@ -380,7 +426,7 @@ class TestFetchMetadataWithApiFallback:
(
"cloudinit.url_helper",
logging.DEBUG,
- StringMatch(r"\[0/11\] open.*"),
+ StringMatch(r"\[0/infinite\] open.*"),
),
(
LOG_PATH,
@@ -397,8 +443,10 @@ class TestFetchMetadataWithApiFallback:
),
]
+ @pytest.mark.parametrize("retry_deadline", [0.0, 1.0, 60.0])
def test_non_json_repsonse(
self,
+ retry_deadline,
caplog,
mock_requests_session_request,
wrapped_readurl,
@@ -408,16 +456,17 @@ class TestFetchMetadataWithApiFallback:
]
with pytest.raises(ValueError):
- imds.fetch_metadata_with_api_fallback()
+ imds.fetch_metadata_with_api_fallback(
+ retry_deadline=retry_deadline
+ )
assert wrapped_readurl.mock_calls == [
mock.call(
self.default_url,
timeout=self.timeout,
headers=self.headers,
- retries=self.retries,
exception_cb=mock.ANY,
- infinite=False,
+ infinite=True,
log_req_resp=True,
),
]
diff --git a/tests/unittests/sources/test_azure.py b/tests/unittests/sources/test_azure.py
index 1c43aa66..b3f7b1bc 100644
--- a/tests/unittests/sources/test_azure.py
+++ b/tests/unittests/sources/test_azure.py
@@ -2805,8 +2805,10 @@ class TestPreprovisioningHotAttachNics(CiTestCase):
@mock.patch("cloudinit.url_helper.time.sleep", autospec=True)
@mock.patch("requests.Session.request", autospec=True)
@mock.patch(MOCKPATH + "EphemeralDHCPv4", autospec=True)
+ @mock.patch(MOCKPATH + "time", autospec=True)
+ @mock.patch("cloudinit.sources.azure.imds.time", autospec=True)
def test_check_if_nic_is_primary_retries_on_failures(
- self, m_dhcpv4, m_request, m_sleep
+ self, m_imds_time, m_time, m_dhcpv4, m_request, m_sleep
):
"""Retry polling for network metadata on all failures except timeout
and network unreachable errors"""
@@ -2822,27 +2824,45 @@ class TestPreprovisioningHotAttachNics(CiTestCase):
}
m_req = mock.Mock(content=json.dumps({"not": "empty"}))
- m_request.side_effect = (
+ errors = (
[requests.Timeout("Fake connection timeout")] * 5
+ [requests.ConnectionError("Fake Network Unreachable")] * 5
- + 290 * [fake_http_error_for_code(410)]
+ + [fake_http_error_for_code(410)] * 5
+ [m_req]
)
+ m_request.side_effect = errors
m_dhcpv4.return_value.lease = lease
+ fake_time = 0.0
+ fake_time_increment = 5.0
+
+ def fake_timer():
+ nonlocal fake_time, fake_time_increment
+ fake_time += fake_time_increment
+ return fake_time
+
+ m_time.side_effect = fake_timer
+ m_imds_time.side_effect = fake_timer
is_primary = dsa._check_if_nic_is_primary("eth0")
self.assertEqual(True, is_primary)
- assert len(m_request.mock_calls) == 301
+ assert len(m_request.mock_calls) == len(errors)
# Re-run tests to verify max http failures.
+ attempts = 3
+ fake_time = 0.0
+ fake_time_increment = 300 / attempts
+
+ m_time.side_effect = fake_timer
+ m_imds_time.side_effect = fake_timer
m_request.reset_mock()
- m_request.side_effect = 305 * [fake_http_error_for_code(410)]
+ errors = 100 * [fake_http_error_for_code(410)]
+ m_request.side_effect = errors
dsa = dsaz.DataSourceAzure({}, distro=distro, paths=self.paths)
is_primary = dsa._check_if_nic_is_primary("eth1")
self.assertEqual(False, is_primary)
- assert len(m_request.mock_calls) == 301
+ assert len(m_request.mock_calls) == attempts
# Re-run tests to verify max connection error retries.
m_request.reset_mock()
@@ -2854,7 +2874,7 @@ class TestPreprovisioningHotAttachNics(CiTestCase):
is_primary = dsa._check_if_nic_is_primary("eth1")
self.assertEqual(False, is_primary)
- assert len(m_request.mock_calls) == 11
+ assert len(m_request.mock_calls) == attempts
@mock.patch("cloudinit.distros.networking.LinuxNetworking.try_set_link_up")
def test_wait_for_link_up_returns_if_already_up(self, m_is_link_up):
@@ -3531,9 +3551,8 @@ class TestProvisioning:
"api-version=2021-08-01&extended=true",
timeout=2,
headers={"Metadata": "true"},
- retries=10,
exception_cb=mock.ANY,
- infinite=False,
+ infinite=True,
log_req_resp=True,
),
]
@@ -3601,9 +3620,8 @@ class TestProvisioning:
"api-version=2021-08-01&extended=true",
exception_cb=mock.ANY,
headers={"Metadata": "true"},
- infinite=False,
+ infinite=True,
log_req_resp=True,
- retries=10,
timeout=2,
),
mock.call(
@@ -3620,9 +3638,8 @@ class TestProvisioning:
"api-version=2021-08-01&extended=true",
exception_cb=mock.ANY,
headers={"Metadata": "true"},
- infinite=False,
+ infinite=True,
log_req_resp=True,
- retries=10,
timeout=2,
),
]
@@ -3712,9 +3729,8 @@ class TestProvisioning:
"api-version=2021-08-01&extended=true",
exception_cb=mock.ANY,
headers={"Metadata": "true"},
- infinite=False,
+ infinite=True,
log_req_resp=True,
- retries=10,
timeout=2,
),
mock.call(
@@ -3722,9 +3738,8 @@ class TestProvisioning:
"api-version=2021-08-01&extended=true",
exception_cb=mock.ANY,
headers={"Metadata": "true"},
- infinite=False,
+ infinite=True,
log_req_resp=True,
- retries=300,
timeout=2,
),
mock.call(
@@ -3741,9 +3756,8 @@ class TestProvisioning:
"api-version=2021-08-01&extended=true",
exception_cb=mock.ANY,
headers={"Metadata": "true"},
- infinite=False,
+ infinite=True,
log_req_resp=True,
- retries=10,
timeout=2,
),
]
@@ -3871,9 +3885,8 @@ class TestProvisioning:
"api-version=2021-08-01&extended=true",
exception_cb=mock.ANY,
headers={"Metadata": "true"},
- infinite=False,
+ infinite=True,
log_req_resp=True,
- retries=10,
timeout=2,
),
mock.call(
@@ -3881,9 +3894,8 @@ class TestProvisioning:
"api-version=2021-08-01&extended=true",
exception_cb=mock.ANY,
headers={"Metadata": "true"},
- infinite=False,
+ infinite=True,
log_req_resp=True,
- retries=300,
timeout=2,
),
mock.call(
@@ -3900,9 +3912,8 @@ class TestProvisioning:
"api-version=2021-08-01&extended=true",
exception_cb=mock.ANY,
headers={"Metadata": "true"},
- infinite=False,
+ infinite=True,
log_req_resp=True,
- retries=10,
timeout=2,
),
]
@@ -3988,9 +3999,8 @@ class TestProvisioning:
"api-version=2021-08-01&extended=true",
exception_cb=mock.ANY,
headers={"Metadata": "true"},
- infinite=False,
+ infinite=True,
log_req_resp=True,
- retries=10,
timeout=2,
),
mock.call(
@@ -4007,9 +4017,8 @@ class TestProvisioning:
"api-version=2021-08-01&extended=true",
exception_cb=mock.ANY,
headers={"Metadata": "true"},
- infinite=False,
+ infinite=True,
log_req_resp=True,
- retries=10,
timeout=2,
),
]
@@ -4108,9 +4117,8 @@ class TestProvisioning:
"api-version=2021-08-01&extended=true",
exception_cb=mock.ANY,
headers={"Metadata": "true"},
- infinite=False,
+ infinite=True,
log_req_resp=True,
- retries=10,
timeout=2,
),
]