sources/azure: fix regressions in IMDS behavior (#2041)

There are effectively two regressions in the recent IMDS refactor: 1. The metadata check len(imds_md["interface"]) in _check_if_nic_is_primary() is no longer correct as the refactor switched URLs and did not update this call to account for the fact that this metadata now lives under "network". 2. Network metadata was fetched with infinite=True and is now limited to ten retries. This callback had the twist of only allowing up to ten connection errors but otherwise would retry indefinetely. For check_if_nic_is_primary(): - Drop the interface count check for _check_if_nic_is_primary(), we don't need it anyways. - Fix/update the unit tests mocks that allowed the tests to pass, adding another test to verify max retries for http and connection errors. - Use 300 retries. We do want to hit a case where we spin forever, but this should be more than enough time for IMDS to respond in the Savable PPS case (~5 minutes). For IMDS: - Consolidate IMDS retry handlers into a new ReadUrlRetryHandler class that supports the options required for each variant of request. - Minor tweaks to log and expand logging checks in unit tests. - Move all unit tests to mocking via mock_requests_session_request and replace mock_readurl fixture with wrapped_readurl to improve consistency between tests. Note that this change drops usage of `retry_on_url_exc` and can probably be removed altogether as it is no longer used AFAICT. Signed-off-by: Chris Patterson <cpatterson@microsoft.com>
author: Chris Patterson <cpatterson@microsoft.com> 2023-03-01 17:14:02 -0500
committer: GitHub <noreply@github.com> 2023-03-01 16:14:02 -0600
commit: d781e14cd86cd85900a3c289ae5671ec6e77916f (patch)
tree: fed211d711b88e9d0aa6271767c11aab36a5d388
parent: 5d1d2544ab1f070ab9810779ba914298d44c06b2 (diff)
download: cloud-init-git-d781e14cd86cd85900a3c289ae5671ec6e77916f.tar.gz
4 files changed, 343 insertions, 191 deletions
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
index 4f804991..dfcb891f 100644
--- a/cloudinit/sources/DataSourceAzure.py
+++ b/cloudinit/sources/DataSourceAzure.py
@@ -679,9 +679,9 @@ class DataSourceAzure(sources.DataSource):
         return crawled_data
 
     @azure_ds_telemetry_reporter
-    def get_metadata_from_imds(self) -> Dict:
+    def get_metadata_from_imds(self, retries: int = 10) -> Dict:
         try:
-            return imds.fetch_metadata_with_api_fallback()
+            return imds.fetch_metadata_with_api_fallback(retries=retries)
         except (UrlError, ValueError) as error:
             report_diagnostic_event(
                 "Ignoring IMDS metadata due to: %s" % error,
@@ -978,11 +978,8 @@ class DataSourceAzure(sources.DataSource):
         raise BrokenAzureDataSource("Shutdown failure for PPS disk.")
 
     @azure_ds_telemetry_reporter
-    def _check_if_nic_is_primary(self, ifname):
-        """Check if a given interface is the primary nic or not. If it is the
-        primary nic, then we also get the expected total nic count from IMDS.
-        IMDS will process the request and send a response only for primary NIC.
-        """
+    def _check_if_nic_is_primary(self, ifname: str) -> bool:
+        """Check if a given interface is the primary nic or not."""
         # For now, only a VM's primary NIC can contact IMDS and WireServer. If
         # DHCP fails for a NIC, we have no mechanism to determine if the NIC is
         # primary or secondary. In this case, retry DHCP until successful.
@@ -991,18 +988,11 @@ class DataSourceAzure(sources.DataSource):
         # Primary nic detection will be optimized in the future. The fact that
         # primary nic is being attached first helps here. Otherwise each nic
         # could add several seconds of delay.
-        imds_md = self.get_metadata_from_imds()
+        imds_md = self.get_metadata_from_imds(retries=300)
         if imds_md:
             # Only primary NIC will get a response from IMDS.
             LOG.info("%s is the primary nic", ifname)
-
-            # Set the expected nic count based on the response received.
-            expected_nic_count = len(imds_md["interface"])
-            report_diagnostic_event(
-                "Expected nic count: %d" % expected_nic_count,
-                logger_func=LOG.info,
-            )
-            return True, expected_nic_count
+            return True
 
         # If we are not the primary nic, then clean the dhcp context.
         LOG.warning(
@@ -1011,7 +1001,7 @@ class DataSourceAzure(sources.DataSource):
             ifname,
         )
         self._teardown_ephemeral_networking()
-        return False, -1
+        return False
 
     @azure_ds_telemetry_reporter
     def _wait_for_hot_attached_primary_nic(self, nl_sock):
@@ -1054,9 +1044,7 @@ class DataSourceAzure(sources.DataSource):
                 # won't be in primary_nic_found = false state for long.
                 if not primary_nic_found:
                     LOG.info("Checking if %s is the primary nic", ifname)
-                    primary_nic_found, _ = self._check_if_nic_is_primary(
-                        ifname
-                    )
+                    primary_nic_found = self._check_if_nic_is_primary(ifname)
 
                 # Exit criteria: check if we've discovered primary nic
                 if primary_nic_found:
diff --git a/cloudinit/sources/azure/imds.py b/cloudinit/sources/azure/imds.py
index 54fc9a05..5e4046d0 100644
--- a/cloudinit/sources/azure/imds.py
+++ b/cloudinit/sources/azure/imds.py
@@ -2,7 +2,6 @@
 #
 # This file is part of cloud-init. See LICENSE file for license information.
 
-import functools
 from typing import Dict
 
 import requests
@@ -10,25 +9,69 @@ import requests
 from cloudinit import log as logging
 from cloudinit import util
 from cloudinit.sources.helpers.azure import report_diagnostic_event
-from cloudinit.url_helper import UrlError, readurl, retry_on_url_exc
+from cloudinit.url_helper import UrlError, readurl
 
 LOG = logging.getLogger(__name__)
 
 IMDS_URL = "http://169.254.169.254/metadata"
 
-_readurl_exception_callback = functools.partial(
-    retry_on_url_exc,
-    retry_codes=(
-        404,  # not found (yet)
-        410,  # gone / unavailable (yet)
-        429,  # rate-limited/throttled
-        500,  # server error
-    ),
-    retry_instances=(
-        requests.ConnectionError,
-        requests.Timeout,
-    ),
-)
+
+class ReadUrlRetryHandler:
+    def __init__(
+        self,
+        *,
+        retry_codes=(
+            404,  # not found (yet)
+            410,  # gone / unavailable (yet)
+            429,  # rate-limited/throttled
+            500,  # server error
+        ),
+        max_connection_errors: int = 10,
+        logging_backoff: float = 1.0,
+    ) -> None:
+        self.logging_backoff = logging_backoff
+        self.max_connection_errors = max_connection_errors
+        self.retry_codes = retry_codes
+        self._logging_threshold = 1.0
+        self._request_count = 0
+
+    def exception_callback(self, req_args, exception) -> bool:
+        self._request_count += 1
+        if not isinstance(exception, UrlError):
+            report_diagnostic_event(
+                "Polling IMDS failed with unexpected exception: %r"
+                % (exception),
+                logger_func=LOG.warning,
+            )
+            return False
+
+        log = True
+        retry = True
+
+        # Check for connection errors which may occur early boot, but
+        # are otherwise indicative that we are not connecting with the
+        # primary NIC.
+        if isinstance(
+            exception.cause, (requests.ConnectionError, requests.Timeout)
+        ):
+            self.max_connection_errors -= 1
+            if self.max_connection_errors < 0:
+                retry = False
+        elif exception.code not in self.retry_codes:
+            retry = False
+
+        if self._request_count >= self._logging_threshold:
+            self._logging_threshold *= self.logging_backoff
+        else:
+            log = False
+
+        if log or not retry:
+            report_diagnostic_event(
+                "Polling IMDS failed attempt %d with exception: %r"
+                % (self._request_count, exception),
+                logger_func=LOG.info,
+            )
+        return retry
 
 
 def _fetch_url(
@@ -38,11 +81,12 @@ def _fetch_url(
 
     :raises UrlError: on error fetching metadata.
     """
+    handler = ReadUrlRetryHandler()
 
     try:
         response = readurl(
             url,
-            exception_cb=_readurl_exception_callback,
+            exception_cb=handler.exception_callback,
             headers={"Metadata": "true"},
             infinite=False,
             log_req_resp=log_response,
@@ -61,13 +105,14 @@ def _fetch_url(
 
 def _fetch_metadata(
     url: str,
+    retries: int = 10,
 ) -> Dict:
     """Fetch IMDS metadata.
 
     :raises UrlError: on error fetching metadata.
     :raises ValueError: on error parsing metadata.
     """
-    metadata = _fetch_url(url)
+    metadata = _fetch_url(url, retries=retries)
 
     try:
         return util.load_json(metadata)
@@ -79,7 +124,7 @@ def _fetch_metadata(
         raise
 
 
-def fetch_metadata_with_api_fallback() -> Dict:
+def fetch_metadata_with_api_fallback(retries: int = 10) -> Dict:
     """Fetch extended metadata, falling back to non-extended as required.
 
     :raises UrlError: on error fetching metadata.
@@ -87,7 +132,7 @@ def fetch_metadata_with_api_fallback() -> Dict:
     """
     try:
         url = IMDS_URL + "/instance?api-version=2021-08-01&extended=true"
-        return _fetch_metadata(url)
+        return _fetch_metadata(url, retries=retries)
     except UrlError as error:
         if error.code == 400:
             report_diagnostic_event(
@@ -95,7 +140,7 @@ def fetch_metadata_with_api_fallback() -> Dict:
                 logger_func=LOG.warning,
             )
             url = IMDS_URL + "/instance?api-version=2019-06-01"
-            return _fetch_metadata(url)
+            return _fetch_metadata(url, retries=retries)
         raise
 
 
@@ -106,43 +151,17 @@ def fetch_reprovision_data() -> bytes:
     """
     url = IMDS_URL + "/reprovisiondata?api-version=2019-06-01"
 
-    logging_threshold = 1
-    poll_counter = 0
-
-    def exception_callback(msg, exception):
-        nonlocal logging_threshold
-        nonlocal poll_counter
-
-        poll_counter += 1
-        if not isinstance(exception, UrlError):
-            report_diagnostic_event(
-                "Polling IMDS failed with unexpected exception: %r"
-                % (exception),
-                logger_func=LOG.warning,
-            )
-            return False
-
-        log = True
-        retry = False
-        if exception.code in (404, 410):
-            retry = True
-            if poll_counter >= logging_threshold:
-                # Exponential back-off on logging.
-                logging_threshold *= 2
-            else:
-                log = False
-
-        if log:
-            report_diagnostic_event(
-                "Polling IMDS failed with exception: %r count: %d"
-                % (exception, poll_counter),
-                logger_func=LOG.info,
-            )
-        return retry
-
+    handler = ReadUrlRetryHandler(
+        logging_backoff=2.0,
+        max_connection_errors=0,
+        retry_codes=(
+            404,
+            410,
+        ),
+    )
     response = readurl(
         url,
-        exception_cb=exception_callback,
+        exception_cb=handler.exception_callback,
         headers={"Metadata": "true"},
         infinite=True,
         log_req_resp=False,
@@ -150,7 +169,7 @@ def fetch_reprovision_data() -> bytes:
     )
 
     report_diagnostic_event(
-        f"Polled IMDS {poll_counter+1} time(s)",
+        f"Polled IMDS {handler._request_count+1} time(s)",
         logger_func=LOG.debug,
     )
     return response.contents
diff --git a/tests/unittests/sources/azure/test_imds.py b/tests/unittests/sources/azure/test_imds.py
index b5a72645..03f66502 100644
--- a/tests/unittests/sources/azure/test_imds.py
+++ b/tests/unittests/sources/azure/test_imds.py
@@ -3,20 +3,30 @@
 import json
 import logging
 import math
+import re
 from unittest import mock
 
 import pytest
 import requests
 
 from cloudinit.sources.azure import imds
-from cloudinit.url_helper import UrlError
+from cloudinit.url_helper import UrlError, readurl
 
-MOCKPATH = "cloudinit.sources.azure.imds."
+LOG_PATH = "cloudinit.sources.azure.imds"
+MOCK_PATH = "cloudinit.sources.azure.imds."
+
+
+class StringMatch:
+    def __init__(self, regex) -> None:
+        self.regex = regex
+
+    def __eq__(self, other) -> bool:
+        return bool(re.match("^" + self.regex + "$", other))
 
 
 @pytest.fixture
-def mock_readurl():
-    with mock.patch(MOCKPATH + "readurl", autospec=True) as m:
+def wrapped_readurl():
+    with mock.patch.object(imds, "readurl", wraps=readurl) as m:
         yield m
 
 
@@ -56,54 +66,63 @@ class TestFetchMetadataWithApiFallback:
     def test_basic(
         self,
         caplog,
-        mock_readurl,
+        mock_requests_session_request,
+        wrapped_readurl,
     ):
         fake_md = {"foo": {"bar": []}}
-        mock_readurl.side_effect = [
-            mock.Mock(contents=json.dumps(fake_md).encode()),
+        mock_requests_session_request.side_effect = [
+            mock.Mock(content=json.dumps(fake_md)),
         ]
 
         md = imds.fetch_metadata_with_api_fallback()
 
         assert md == fake_md
-        assert mock_readurl.mock_calls == [
+        assert wrapped_readurl.mock_calls == [
             mock.call(
                 self.default_url,
                 timeout=self.timeout,
                 headers=self.headers,
                 retries=self.retries,
-                exception_cb=imds._readurl_exception_callback,
+                exception_cb=mock.ANY,
                 infinite=False,
                 log_req_resp=True,
-            ),
+            )
         ]
-
-        warnings = [
-            x.message for x in caplog.records if x.levelno == logging.WARNING
+        assert caplog.record_tuples == [
+            (
+                "cloudinit.url_helper",
+                logging.DEBUG,
+                StringMatch(r"\[0/11\] open.*"),
+            ),
+            (
+                "cloudinit.url_helper",
+                logging.DEBUG,
+                StringMatch("Read from.*"),
+            ),
         ]
-        assert warnings == []
 
     def test_basic_fallback(
         self,
         caplog,
-        mock_readurl,
+        mock_requests_session_request,
+        wrapped_readurl,
     ):
         fake_md = {"foo": {"bar": []}}
-        mock_readurl.side_effect = [
+        mock_requests_session_request.side_effect = [
             UrlError("No IMDS version", code=400),
-            mock.Mock(contents=json.dumps(fake_md).encode()),
+            mock.Mock(content=json.dumps(fake_md)),
         ]
 
         md = imds.fetch_metadata_with_api_fallback()
 
         assert md == fake_md
-        assert mock_readurl.mock_calls == [
+        assert wrapped_readurl.mock_calls == [
             mock.call(
                 self.default_url,
                 timeout=self.timeout,
                 headers=self.headers,
                 retries=self.retries,
-                exception_cb=imds._readurl_exception_callback,
+                exception_cb=mock.ANY,
                 infinite=False,
                 log_req_resp=True,
             ),
@@ -112,18 +131,38 @@ class TestFetchMetadataWithApiFallback:
                 timeout=self.timeout,
                 headers=self.headers,
                 retries=self.retries,
-                exception_cb=imds._readurl_exception_callback,
+                exception_cb=mock.ANY,
                 infinite=False,
                 log_req_resp=True,
             ),
         ]
 
-        warnings = [
-            x.message for x in caplog.records if x.levelno == logging.WARNING
-        ]
-        assert warnings == [
-            "Failed to fetch metadata from IMDS: No IMDS version",
-            "Falling back to IMDS api-version: 2019-06-01",
+        assert caplog.record_tuples == [
+            (
+                "cloudinit.url_helper",
+                logging.DEBUG,
+                StringMatch(r"\[0/11\] open.*"),
+            ),
+            (
+                LOG_PATH,
+                logging.WARNING,
+                "Failed to fetch metadata from IMDS: No IMDS version",
+            ),
+            (
+                LOG_PATH,
+                logging.WARNING,
+                "Falling back to IMDS api-version: 2019-06-01",
+            ),
+            (
+                "cloudinit.url_helper",
+                logging.DEBUG,
+                StringMatch(r"\[0/11\] open.*"),
+            ),
+            (
+                "cloudinit.url_helper",
+                logging.DEBUG,
+                StringMatch("Read from.*"),
+            ),
         ]
 
     @pytest.mark.parametrize(
@@ -155,11 +194,36 @@ class TestFetchMetadataWithApiFallback:
         assert md == fake_md
         assert len(mock_requests_session_request.mock_calls) == 2
         assert mock_url_helper_time_sleep.mock_calls == [mock.call(1)]
-
-        warnings = [
-            x.message for x in caplog.records if x.levelno == logging.WARNING
+        assert caplog.record_tuples == [
+            (
+                "cloudinit.url_helper",
+                logging.DEBUG,
+                StringMatch(r"\[0/11\] open.*"),
+            ),
+            (
+                LOG_PATH,
+                logging.INFO,
+                StringMatch(
+                    "Polling IMDS failed attempt 1 with exception:"
+                    f".*{error!s}.*"
+                ),
+            ),
+            (
+                "cloudinit.url_helper",
+                logging.DEBUG,
+                StringMatch("Please wait 1 second.*"),
+            ),
+            (
+                "cloudinit.url_helper",
+                logging.DEBUG,
+                StringMatch(r"\[1/11\] open.*"),
+            ),
+            (
+                "cloudinit.url_helper",
+                logging.DEBUG,
+                StringMatch("Read from.*"),
+            ),
         ]
-        assert warnings == []
 
     def test_will_retry_errors_on_fallback(
         self,
@@ -180,13 +244,58 @@ class TestFetchMetadataWithApiFallback:
         assert md == fake_md
         assert len(mock_requests_session_request.mock_calls) == 3
         assert mock_url_helper_time_sleep.mock_calls == [mock.call(1)]
-
-        warnings = [
-            x.message for x in caplog.records if x.levelno == logging.WARNING
-        ]
-        assert warnings == [
-            "Failed to fetch metadata from IMDS: fake error",
-            "Falling back to IMDS api-version: 2019-06-01",
+        assert caplog.record_tuples == [
+            (
+                "cloudinit.url_helper",
+                logging.DEBUG,
+                StringMatch(r"\[0/11\] open.*"),
+            ),
+            (
+                LOG_PATH,
+                logging.INFO,
+                StringMatch(
+                    "Polling IMDS failed attempt 1 with exception:"
+                    f".*{error!s}.*"
+                ),
+            ),
+            (
+                LOG_PATH,
+                logging.WARNING,
+                "Failed to fetch metadata from IMDS: fake error",
+            ),
+            (
+                LOG_PATH,
+                logging.WARNING,
+                "Falling back to IMDS api-version: 2019-06-01",
+            ),
+            (
+                "cloudinit.url_helper",
+                logging.DEBUG,
+                StringMatch(r"\[0/11\] open.*"),
+            ),
+            (
+                LOG_PATH,
+                logging.INFO,
+                StringMatch(
+                    "Polling IMDS failed attempt 1 with exception:"
+                    f".*{error!s}.*"
+                ),
+            ),
+            (
+                "cloudinit.url_helper",
+                logging.DEBUG,
+                StringMatch("Please wait 1 second.*"),
+            ),
+            (
+                "cloudinit.url_helper",
+                logging.DEBUG,
+                StringMatch(r"\[1/11\] open.*"),
+            ),
+            (
+                "cloudinit.url_helper",
+                logging.DEBUG,
+                StringMatch("Read from.*"),
+            ),
         ]
 
     @pytest.mark.parametrize(
@@ -221,10 +330,24 @@ class TestFetchMetadataWithApiFallback:
             == [mock.call(1)] * self.retries
         )
 
-        warnings = [
-            x.message for x in caplog.records if x.levelno == logging.WARNING
+        logs = [x for x in caplog.record_tuples if x[0] == LOG_PATH]
+        assert logs == [
+            (
+                LOG_PATH,
+                logging.INFO,
+                StringMatch(
+                    f"Polling IMDS failed attempt {i} with exception:"
+                    f".*{error!s}.*"
+                ),
+            )
+            for i in range(1, 12)
+        ] + [
+            (
+                LOG_PATH,
+                logging.WARNING,
+                f"Failed to fetch metadata from IMDS: {error!s}",
+            )
         ]
-        assert warnings == [f"Failed to fetch metadata from IMDS: {error!s}"]
 
     @pytest.mark.parametrize(
         "error",
@@ -253,30 +376,47 @@ class TestFetchMetadataWithApiFallback:
         assert len(mock_requests_session_request.mock_calls) == 1
         assert mock_url_helper_time_sleep.mock_calls == []
 
-        warnings = [
-            x.message for x in caplog.records if x.levelno == logging.WARNING
+        assert caplog.record_tuples == [
+            (
+                "cloudinit.url_helper",
+                logging.DEBUG,
+                StringMatch(r"\[0/11\] open.*"),
+            ),
+            (
+                LOG_PATH,
+                logging.INFO,
+                StringMatch(
+                    "Polling IMDS failed attempt 1 with exception:"
+                    f".*{error!s}.*"
+                ),
+            ),
+            (
+                LOG_PATH,
+                logging.WARNING,
+                f"Failed to fetch metadata from IMDS: {error!s}",
+            ),
         ]
-        assert warnings == [f"Failed to fetch metadata from IMDS: {error!s}"]
 
     def test_non_json_repsonse(
         self,
         caplog,
-        mock_readurl,
+        mock_requests_session_request,
+        wrapped_readurl,
     ):
-        mock_readurl.side_effect = [
-            mock.Mock(contents=b"bad data"),
+        mock_requests_session_request.side_effect = [
+            mock.Mock(content=b"bad data")
         ]
 
         with pytest.raises(ValueError):
             imds.fetch_metadata_with_api_fallback()
 
-        assert mock_readurl.mock_calls == [
+        assert wrapped_readurl.mock_calls == [
             mock.call(
                 self.default_url,
                 timeout=self.timeout,
                 headers=self.headers,
                 retries=self.retries,
-                exception_cb=imds._readurl_exception_callback,
+                exception_cb=mock.ANY,
                 infinite=False,
                 log_req_resp=True,
             ),
@@ -304,17 +444,18 @@ class TestFetchReprovisionData:
     def test_basic(
         self,
         caplog,
-        mock_readurl,
+        mock_requests_session_request,
+        wrapped_readurl,
     ):
         content = b"ovf content"
-        mock_readurl.side_effect = [
-            mock.Mock(contents=content),
+        mock_requests_session_request.side_effect = [
+            mock.Mock(content=content),
         ]
 
         ovf = imds.fetch_reprovision_data()
 
         assert ovf == content
-        assert mock_readurl.mock_calls == [
+        assert wrapped_readurl.mock_calls == [
             mock.call(
                 self.url,
                 timeout=self.timeout,
@@ -327,10 +468,15 @@ class TestFetchReprovisionData:
 
         assert caplog.record_tuples == [
             (
-                "cloudinit.sources.azure.imds",
+                "cloudinit.url_helper",
+                logging.DEBUG,
+                StringMatch(r"Read from.*"),
+            ),
+            (
+                LOG_PATH,
                 logging.DEBUG,
                 "Polled IMDS 1 time(s)",
-            )
+            ),
         ]
 
     @pytest.mark.parametrize(
@@ -370,10 +516,10 @@ class TestFetchReprovisionData:
         )
         backoff_logs = [
             (
-                "cloudinit.sources.azure.imds",
+                LOG_PATH,
                 logging.INFO,
-                "Polling IMDS failed with exception: "
-                f"{wrapped_error!r} count: {i}",
+                f"Polling IMDS failed attempt {i} with exception: "
+                f"{wrapped_error!r}",
             )
             for i in range(1, failures + 1)
             if i == 1 or math.log2(i).is_integer()
@@ -382,10 +528,10 @@ class TestFetchReprovisionData:
             (
                 "cloudinit.url_helper",
                 logging.DEBUG,
-                mock.ANY,
+                StringMatch(r"Read from.*"),
             ),
             (
-                "cloudinit.sources.azure.imds",
+                LOG_PATH,
                 logging.DEBUG,
                 f"Polled IMDS {failures+1} time(s)",
             ),
@@ -437,20 +583,20 @@ class TestFetchReprovisionData:
 
         backoff_logs = [
             (
-                "cloudinit.sources.azure.imds",
+                LOG_PATH,
                 logging.INFO,
-                "Polling IMDS failed with exception: "
-                f"{wrapped_error!r} count: {i}",
+                f"Polling IMDS failed attempt {i} with exception: "
+                f"{wrapped_error!r}",
             )
             for i in range(1, failures + 1)
             if i == 1 or math.log2(i).is_integer()
         ]
         assert caplog.record_tuples == backoff_logs + [
             (
-                "cloudinit.sources.azure.imds",
+                LOG_PATH,
                 logging.INFO,
-                "Polling IMDS failed with exception: "
-                f"{exc_info.value!r} count: {failures+1}",
+                f"Polling IMDS failed attempt {failures+1} with exception: "
+                f"{exc_info.value!r}",
             ),
         ]
 
@@ -483,9 +629,9 @@ class TestFetchReprovisionData:
 
         assert caplog.record_tuples == [
             (
-                "cloudinit.sources.azure.imds",
+                LOG_PATH,
                 logging.INFO,
-                "Polling IMDS failed with exception: "
-                f"{exc_info.value!r} count: 1",
+                "Polling IMDS failed attempt 1 with exception: "
+                f"{exc_info.value!r}",
             ),
         ]
diff --git a/tests/unittests/sources/test_azure.py b/tests/unittests/sources/test_azure.py
index fe23b2e7..166cbe13 100644
--- a/tests/unittests/sources/test_azure.py
+++ b/tests/unittests/sources/test_azure.py
@@ -16,7 +16,6 @@ from cloudinit.net import dhcp
 from cloudinit.sources import UNSET
 from cloudinit.sources import DataSourceAzure as dsaz
 from cloudinit.sources import InvalidMetaDataException
-from cloudinit.sources.azure import imds
 from cloudinit.sources.helpers import netlink
 from cloudinit.util import (
     MountFailedError,
@@ -299,6 +298,15 @@ def patched_reported_ready_marker_path(azure_ds, patched_markers_dir_path):
         yield reported_ready_marker
 
 
+def fake_http_error_for_code(status_code: int):
+    response_failure = requests.Response()
+    response_failure.status_code = status_code
+    return requests.exceptions.HTTPError(
+        "fake error",
+        response=response_failure,
+    )
+
+
 def construct_ovf_env(
     *,
     custom_data=None,
@@ -2887,36 +2895,38 @@ class TestPreprovisioningHotAttachNics(CiTestCase):
             "unknown-245": "624c3620",
         }
 
-        # Simulate two NICs by adding the same one twice.
-        md = {
-            "interface": [
-                IMDS_NETWORK_METADATA["interface"][0],
-                IMDS_NETWORK_METADATA["interface"][0],
-            ]
-        }
-
-        m_req = mock.Mock(content=json.dumps(md))
-        m_request.side_effect = [
-            requests.Timeout("Fake connection timeout"),
-            requests.ConnectionError("Fake Network Unreachable"),
-            m_req,
-        ]
+        m_req = mock.Mock(content=json.dumps({"not": "empty"}))
+        m_request.side_effect = (
+            [requests.Timeout("Fake connection timeout")] * 5
+            + [requests.ConnectionError("Fake Network Unreachable")] * 5
+            + 290 * [fake_http_error_for_code(410)]
+            + [m_req]
+        )
         m_dhcpv4.return_value.lease = lease
 
-        is_primary, expected_nic_count = dsa._check_if_nic_is_primary("eth0")
+        is_primary = dsa._check_if_nic_is_primary("eth0")
         self.assertEqual(True, is_primary)
-        self.assertEqual(2, expected_nic_count)
-        assert len(m_request.mock_calls) == 3
+        assert len(m_request.mock_calls) == 301
 
-        # Re-run tests to verify max retries.
+        # Re-run tests to verify max http failures.
+        m_request.reset_mock()
+        m_request.side_effect = 305 * [fake_http_error_for_code(410)]
+
+        dsa = dsaz.DataSourceAzure({}, distro=distro, paths=self.paths)
+
+        is_primary = dsa._check_if_nic_is_primary("eth1")
+        self.assertEqual(False, is_primary)
+        assert len(m_request.mock_calls) == 301
+
+        # Re-run tests to verify max connection error retries.
         m_request.reset_mock()
         m_request.side_effect = [
             requests.Timeout("Fake connection timeout")
-        ] * 6 + [requests.ConnectionError("Fake Network Unreachable")] * 6
+        ] * 9 + [requests.ConnectionError("Fake Network Unreachable")] * 9
 
         dsa = dsaz.DataSourceAzure({}, distro=distro, paths=self.paths)
 
-        is_primary, expected_nic_count = dsa._check_if_nic_is_primary("eth1")
+        is_primary = dsa._check_if_nic_is_primary("eth1")
         self.assertEqual(False, is_primary)
         assert len(m_request.mock_calls) == 11
 
@@ -3591,15 +3601,6 @@ class TestEphemeralNetworking:
         assert azure_ds._ephemeral_dhcp_ctx is None
 
 
-def fake_http_error_for_code(status_code: int):
-    response_failure = requests.Response()
-    response_failure.status_code = status_code
-    return requests.exceptions.HTTPError(
-        "fake error",
-        response=response_failure,
-    )
-
-
 class TestInstanceId:
     def test_metadata(self, azure_ds, mock_dmi_read_dmi_data):
         azure_ds.metadata = {"instance-id": "test-id"}
@@ -3704,7 +3705,7 @@ class TestProvisioning:
                 timeout=2,
                 headers={"Metadata": "true"},
                 retries=10,
-                exception_cb=imds._readurl_exception_callback,
+                exception_cb=mock.ANY,
                 infinite=False,
                 log_req_resp=True,
             ),
@@ -3763,7 +3764,7 @@ class TestProvisioning:
             mock.call(
                 "http://169.254.169.254/metadata/instance?"
                 "api-version=2021-08-01&extended=true",
-                exception_cb=imds._readurl_exception_callback,
+                exception_cb=mock.ANY,
                 headers={"Metadata": "true"},
                 infinite=False,
                 log_req_resp=True,
@@ -3782,7 +3783,7 @@ class TestProvisioning:
             mock.call(
                 "http://169.254.169.254/metadata/instance?"
                 "api-version=2021-08-01&extended=true",
-                exception_cb=imds._readurl_exception_callback,
+                exception_cb=mock.ANY,
                 headers={"Metadata": "true"},
                 infinite=False,
                 log_req_resp=True,
@@ -3852,9 +3853,7 @@ class TestProvisioning:
         )
         self.mock_readurl.side_effect = [
             mock.MagicMock(contents=json.dumps(self.imds_md).encode()),
-            mock.MagicMock(
-                contents=json.dumps(self.imds_md["network"]).encode()
-            ),
+            mock.MagicMock(contents=json.dumps(self.imds_md).encode()),
             mock.MagicMock(contents=construct_ovf_env().encode()),
             mock.MagicMock(contents=json.dumps(self.imds_md).encode()),
         ]
@@ -3866,7 +3865,7 @@ class TestProvisioning:
             mock.call(
                 "http://169.254.169.254/metadata/instance?"
                 "api-version=2021-08-01&extended=true",
-                exception_cb=imds._readurl_exception_callback,
+                exception_cb=mock.ANY,
                 headers={"Metadata": "true"},
                 infinite=False,
                 log_req_resp=True,
@@ -3876,11 +3875,11 @@ class TestProvisioning:
             mock.call(
                 "http://169.254.169.254/metadata/instance?"
                 "api-version=2021-08-01&extended=true",
-                exception_cb=imds._readurl_exception_callback,
+                exception_cb=mock.ANY,
                 headers={"Metadata": "true"},
                 infinite=False,
                 log_req_resp=True,
-                retries=10,
+                retries=300,
                 timeout=2,
             ),
             mock.call(
@@ -3895,7 +3894,7 @@ class TestProvisioning:
             mock.call(
                 "http://169.254.169.254/metadata/instance?"
                 "api-version=2021-08-01&extended=true",
-                exception_cb=imds._readurl_exception_callback,
+                exception_cb=mock.ANY,
                 headers={"Metadata": "true"},
                 infinite=False,
                 log_req_resp=True,
@@ -4015,7 +4014,7 @@ class TestProvisioning:
             mock.call(
                 "http://169.254.169.254/metadata/instance?"
                 "api-version=2021-08-01&extended=true",
-                exception_cb=imds._readurl_exception_callback,
+                exception_cb=mock.ANY,
                 headers={"Metadata": "true"},
                 infinite=False,
                 log_req_resp=True,
@@ -4025,11 +4024,11 @@ class TestProvisioning:
             mock.call(
                 "http://169.254.169.254/metadata/instance?"
                 "api-version=2021-08-01&extended=true",
-                exception_cb=imds._readurl_exception_callback,
+                exception_cb=mock.ANY,
                 headers={"Metadata": "true"},
                 infinite=False,
                 log_req_resp=True,
-                retries=10,
+                retries=300,
                 timeout=2,
             ),
             mock.call(
@@ -4044,7 +4043,7 @@ class TestProvisioning:
             mock.call(
                 "http://169.254.169.254/metadata/instance?"
                 "api-version=2021-08-01&extended=true",
-                exception_cb=imds._readurl_exception_callback,
+                exception_cb=mock.ANY,
                 headers={"Metadata": "true"},
                 infinite=False,
                 log_req_resp=True,
@@ -4122,7 +4121,7 @@ class TestProvisioning:
             mock.call(
                 "http://169.254.169.254/metadata/instance?"
                 "api-version=2021-08-01&extended=true",
-                exception_cb=imds._readurl_exception_callback,
+                exception_cb=mock.ANY,
                 headers={"Metadata": "true"},
                 infinite=False,
                 log_req_resp=True,
@@ -4141,7 +4140,7 @@ class TestProvisioning:
             mock.call(
                 "http://169.254.169.254/metadata/instance?"
                 "api-version=2021-08-01&extended=true",
-                exception_cb=imds._readurl_exception_callback,
+                exception_cb=mock.ANY,
                 headers={"Metadata": "true"},
                 infinite=False,
                 log_req_resp=True,
@@ -4202,7 +4201,7 @@ class TestProvisioning:
             mock.call(
                 "http://169.254.169.254/metadata/instance?"
                 "api-version=2021-08-01&extended=true",
-                exception_cb=imds._readurl_exception_callback,
+                exception_cb=mock.ANY,
                 headers={"Metadata": "true"},
                 infinite=False,
                 log_req_resp=True,
author	Chris Patterson <cpatterson@microsoft.com>	2023-03-01 17:14:02 -0500
committer	GitHub <noreply@github.com>	2023-03-01 16:14:02 -0600
commit	d781e14cd86cd85900a3c289ae5671ec6e77916f (patch)
tree	fed211d711b88e9d0aa6271767c11aab36a5d388
parent	5d1d2544ab1f070ab9810779ba914298d44c06b2 (diff)
download	cloud-init-git-d781e14cd86cd85900a3c289ae5671ec6e77916f.tar.gz