summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Patterson <cpatterson@microsoft.com>2023-05-12 03:21:14 -0700
committerGitHub <noreply@github.com>2023-05-12 12:21:14 +0200
commit9f8450368c2ae713de0a2308f5d3cb73de5b39f2 (patch)
treef143224ac7459a05f2e14e7aab4d90b91981caf0
parent60248d88f2d9e963a0bd0510180ec7b4d41cebd9 (diff)
downloadcloud-init-git-9f8450368c2ae713de0a2308f5d3cb73de5b39f2.tar.gz
azure/errors: introduce reportable errors for imds (#3647)
Always report failure to host, but report failure to fabric only outside of _check_if_nic_is_primary() which is expected to fail if nic is not primary. Add two types of reportable errors for IMDS metadata: - add ReportableErrorImdsUrlError() for url errors. - add ReportableErrorImdsMetadataParsingException() for parsing errors. Tweak ReportableError repr to be a bit friendlier. Signed-off-by: Chris Patterson <cpatterson@microsoft.com>
-rw-r--r--cloudinit/sources/DataSourceAzure.py35
-rw-r--r--cloudinit/sources/azure/errors.py41
-rw-r--r--tests/unittests/sources/azure/test_errors.py65
-rw-r--r--tests/unittests/sources/test_azure.py65
4 files changed, 194 insertions, 12 deletions
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
index b8087406..dc8b2a21 100644
--- a/cloudinit/sources/DataSourceAzure.py
+++ b/cloudinit/sources/DataSourceAzure.py
@@ -543,7 +543,7 @@ class DataSourceAzure(sources.DataSource):
imds_md = {}
if self._is_ephemeral_networking_up():
- imds_md = self.get_metadata_from_imds()
+ imds_md = self.get_metadata_from_imds(report_failure=True)
if not imds_md and ovf_source is None:
msg = "No OVF or IMDS available"
@@ -575,7 +575,7 @@ class DataSourceAzure(sources.DataSource):
md, userdata_raw, cfg, files = self._reprovision()
# fetch metadata again as it has changed after reprovisioning
- imds_md = self.get_metadata_from_imds()
+ imds_md = self.get_metadata_from_imds(report_failure=True)
# Report errors if IMDS network configuration is missing data.
self.validate_imds_network_metadata(imds_md=imds_md)
@@ -667,18 +667,33 @@ class DataSourceAzure(sources.DataSource):
return crawled_data
@azure_ds_telemetry_reporter
- def get_metadata_from_imds(self) -> Dict:
- retry_deadline = time() + 300
+ def get_metadata_from_imds(self, report_failure: bool) -> Dict:
+ start_time = time()
+ retry_deadline = start_time + 300
+ error_string: Optional[str] = None
+ error_report: Optional[errors.ReportableError] = None
try:
return imds.fetch_metadata_with_api_fallback(
retry_deadline=retry_deadline
)
- except (UrlError, ValueError) as error:
- report_diagnostic_event(
- "Ignoring IMDS metadata due to: %s" % error,
- logger_func=LOG.warning,
+ except UrlError as error:
+ error_string = str(error)
+ duration = time() - start_time
+ error_report = errors.ReportableErrorImdsUrlError(
+ exception=error, duration=duration
)
- return {}
+ except ValueError as error:
+ error_string = str(error)
+ error_report = errors.ReportableErrorImdsMetadataParsingException(
+ exception=error
+ )
+
+ self._report_failure(error_report, host_only=not report_failure)
+ report_diagnostic_event(
+ "Ignoring IMDS metadata due to: %s" % error_string,
+ logger_func=LOG.warning,
+ )
+ return {}
def clear_cached_attrs(self, attr_defaults=()):
"""Reset any cached class attributes to defaults."""
@@ -976,7 +991,7 @@ class DataSourceAzure(sources.DataSource):
# Primary nic detection will be optimized in the future. The fact that
# primary nic is being attached first helps here. Otherwise each nic
# could add several seconds of delay.
- imds_md = self.get_metadata_from_imds()
+ imds_md = self.get_metadata_from_imds(report_failure=False)
if imds_md:
# Only primary NIC will get a response from IMDS.
LOG.info("%s is the primary nic", ifname)
diff --git a/cloudinit/sources/azure/errors.py b/cloudinit/sources/azure/errors.py
index ca902a03..966725b0 100644
--- a/cloudinit/sources/azure/errors.py
+++ b/cloudinit/sources/azure/errors.py
@@ -10,8 +10,11 @@ from datetime import datetime
from io import StringIO
from typing import Any, Dict, List, Optional
+import requests
+
from cloudinit import version
from cloudinit.sources.azure import identity
+from cloudinit.url_helper import UrlError
LOG = logging.getLogger(__name__)
@@ -81,7 +84,12 @@ class ReportableError(Exception):
)
def __repr__(self) -> str:
- return self.as_encoded_report()
+ return (
+ f"{self.__class__.__name__}("
+ f"reason={self.reason}, "
+ f"timestamp={self.timestamp}, "
+ f"supporting_data={self.supporting_data})"
+ )
class ReportableErrorDhcpInterfaceNotFound(ReportableError):
@@ -99,6 +107,37 @@ class ReportableErrorDhcpLease(ReportableError):
self.supporting_data["interface"] = interface
+class ReportableErrorImdsUrlError(ReportableError):
+ def __init__(self, *, exception: UrlError, duration: float) -> None:
+ # ConnectTimeout sub-classes ConnectError so order is important.
+ if isinstance(exception.cause, requests.ConnectTimeout):
+ reason = "connection timeout querying IMDS"
+ elif isinstance(exception.cause, requests.ConnectionError):
+ reason = "connection error querying IMDS"
+ elif isinstance(exception.cause, requests.ReadTimeout):
+ reason = "read timeout querying IMDS"
+ elif exception.code:
+ reason = "http error querying IMDS"
+ else:
+ reason = "unexpected error querying IMDS"
+
+ super().__init__(reason)
+
+ if exception.code:
+ self.supporting_data["http_code"] = exception.code
+
+ self.supporting_data["duration"] = duration
+ self.supporting_data["exception"] = repr(exception)
+ self.supporting_data["url"] = exception.url
+
+
+class ReportableErrorImdsMetadataParsingException(ReportableError):
+ def __init__(self, *, exception: ValueError) -> None:
+ super().__init__("error parsing IMDS metadata")
+
+ self.supporting_data["exception"] = repr(exception)
+
+
class ReportableErrorUnhandledException(ReportableError):
def __init__(self, exception: Exception) -> None:
super().__init__("unhandled exception")
diff --git a/tests/unittests/sources/azure/test_errors.py b/tests/unittests/sources/azure/test_errors.py
index e9d3e039..cf3e0e71 100644
--- a/tests/unittests/sources/azure/test_errors.py
+++ b/tests/unittests/sources/azure/test_errors.py
@@ -5,9 +5,11 @@ import datetime
from unittest import mock
import pytest
+import requests
from cloudinit import version
from cloudinit.sources.azure import errors
+from cloudinit.url_helper import UrlError
@pytest.fixture()
@@ -134,6 +136,69 @@ def test_dhcp_interface_not_found():
assert error.supporting_data["duration"] == 5.6
+@pytest.mark.parametrize(
+ "exception,reason",
+ [
+ (
+ UrlError(
+ requests.ConnectionError(),
+ ),
+ "connection error querying IMDS",
+ ),
+ (
+ UrlError(
+ requests.ConnectTimeout(),
+ ),
+ "connection timeout querying IMDS",
+ ),
+ (
+ UrlError(
+ requests.ReadTimeout(),
+ ),
+ "read timeout querying IMDS",
+ ),
+ (
+ UrlError(
+ Exception(),
+ code=500,
+ ),
+ "http error querying IMDS",
+ ),
+ (
+ UrlError(
+ requests.HTTPError(),
+ code=None,
+ ),
+ "unexpected error querying IMDS",
+ ),
+ ],
+)
+def test_imds_url_error(exception, reason):
+ duration = 123.4
+ fake_url = "fake://url"
+
+ exception.url = fake_url
+ error = errors.ReportableErrorImdsUrlError(
+ exception=exception, duration=duration
+ )
+
+ assert error.reason == reason
+ assert error.supporting_data["duration"] == duration
+ assert error.supporting_data["exception"] == repr(exception)
+ assert error.supporting_data["url"] == fake_url
+
+
+def test_imds_metadata_parsing_exception():
+ exception = ValueError("foobar")
+
+ error = errors.ReportableErrorImdsMetadataParsingException(
+ exception=exception
+ )
+
+ assert error.reason == "error parsing IMDS metadata"
+ assert error.supporting_data["exception"] == repr(exception)
+
+
def test_unhandled_exception():
source_error = None
try:
diff --git a/tests/unittests/sources/test_azure.py b/tests/unittests/sources/test_azure.py
index 59ac6459..ed605ee7 100644
--- a/tests/unittests/sources/test_azure.py
+++ b/tests/unittests/sources/test_azure.py
@@ -17,7 +17,7 @@ from cloudinit.reporting.handlers import HyperVKvpReportingHandler
from cloudinit.sources import UNSET
from cloudinit.sources import DataSourceAzure as dsaz
from cloudinit.sources import InvalidMetaDataException
-from cloudinit.sources.azure import errors, identity
+from cloudinit.sources.azure import errors, identity, imds
from cloudinit.sources.helpers import netlink
from cloudinit.util import (
MountFailedError,
@@ -136,6 +136,16 @@ def mock_ephemeral_dhcp_v4():
@pytest.fixture
+def mock_imds_fetch_metadata_with_api_fallback():
+ with mock.patch.object(
+ imds,
+ "fetch_metadata_with_api_fallback",
+ autospec=True,
+ ) as m:
+ yield m
+
+
+@pytest.fixture
def mock_kvp_report_failure_to_host():
with mock.patch(
MOCKPATH + "kvp.report_failure_to_host",
@@ -4244,6 +4254,59 @@ class TestProvisioning:
assert len(self.mock_kvp_report_success_to_host.mock_calls) == 1
+class TestGetMetadataFromImds:
+ @pytest.mark.parametrize("report_failure", [False, True])
+ @pytest.mark.parametrize(
+ "exception,reported_error_type",
+ [
+ (
+ url_helper.UrlError(requests.ConnectionError()),
+ errors.ReportableErrorImdsUrlError,
+ ),
+ (
+ ValueError("bad data"),
+ errors.ReportableErrorImdsMetadataParsingException,
+ ),
+ ],
+ )
+ def test_errors(
+ self,
+ azure_ds,
+ exception,
+ mock_azure_report_failure_to_fabric,
+ mock_imds_fetch_metadata_with_api_fallback,
+ mock_kvp_report_failure_to_host,
+ monkeypatch,
+ report_failure,
+ reported_error_type,
+ ):
+ monkeypatch.setattr(
+ azure_ds, "_is_ephemeral_networking_up", lambda: True
+ )
+ mock_imds_fetch_metadata_with_api_fallback.side_effect = exception
+
+ assert (
+ azure_ds.get_metadata_from_imds(report_failure=report_failure)
+ == {}
+ )
+ assert mock_imds_fetch_metadata_with_api_fallback.mock_calls == [
+ mock.call(retry_deadline=mock.ANY)
+ ]
+
+ reported_error = mock_kvp_report_failure_to_host.call_args[0][0]
+ assert isinstance(reported_error, reported_error_type)
+ assert reported_error.supporting_data["exception"] == repr(exception)
+ assert mock_kvp_report_failure_to_host.mock_calls == [
+ mock.call(reported_error)
+ ]
+ if report_failure:
+ assert mock_azure_report_failure_to_fabric.mock_calls == [
+ mock.call(endpoint=mock.ANY, error=reported_error)
+ ]
+ else:
+ assert mock_azure_report_failure_to_fabric.mock_calls == []
+
+
class TestReportFailure:
@pytest.mark.parametrize("kvp_enabled", [False, True])
def report_host_only_kvp_enabled(