summaryrefslogtreecommitdiff
path: root/cloudinit
diff options
context:
space:
mode:
authorChris Patterson <cpatterson@microsoft.com>2023-05-12 03:21:14 -0700
committerGitHub <noreply@github.com>2023-05-12 12:21:14 +0200
commit9f8450368c2ae713de0a2308f5d3cb73de5b39f2 (patch)
treef143224ac7459a05f2e14e7aab4d90b91981caf0 /cloudinit
parent60248d88f2d9e963a0bd0510180ec7b4d41cebd9 (diff)
downloadcloud-init-git-9f8450368c2ae713de0a2308f5d3cb73de5b39f2.tar.gz
azure/errors: introduce reportable errors for imds (#3647)
Always report failure to host, but report failure to fabric only outside of _check_if_nic_is_primary() which is expected to fail if nic is not primary. Add two types of reportable errors for IMDS metadata: - add ReportableErrorImdsUrlError() for url errors. - add ReportableErrorImdsMetadataParsingException() for parsing errors. Tweak ReportableError repr to be a bit friendlier. Signed-off-by: Chris Patterson <cpatterson@microsoft.com>
Diffstat (limited to 'cloudinit')
-rw-r--r--cloudinit/sources/DataSourceAzure.py35
-rw-r--r--cloudinit/sources/azure/errors.py41
2 files changed, 65 insertions, 11 deletions
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
index b8087406..dc8b2a21 100644
--- a/cloudinit/sources/DataSourceAzure.py
+++ b/cloudinit/sources/DataSourceAzure.py
@@ -543,7 +543,7 @@ class DataSourceAzure(sources.DataSource):
imds_md = {}
if self._is_ephemeral_networking_up():
- imds_md = self.get_metadata_from_imds()
+ imds_md = self.get_metadata_from_imds(report_failure=True)
if not imds_md and ovf_source is None:
msg = "No OVF or IMDS available"
@@ -575,7 +575,7 @@ class DataSourceAzure(sources.DataSource):
md, userdata_raw, cfg, files = self._reprovision()
# fetch metadata again as it has changed after reprovisioning
- imds_md = self.get_metadata_from_imds()
+ imds_md = self.get_metadata_from_imds(report_failure=True)
# Report errors if IMDS network configuration is missing data.
self.validate_imds_network_metadata(imds_md=imds_md)
@@ -667,18 +667,33 @@ class DataSourceAzure(sources.DataSource):
return crawled_data
@azure_ds_telemetry_reporter
- def get_metadata_from_imds(self) -> Dict:
- retry_deadline = time() + 300
+ def get_metadata_from_imds(self, report_failure: bool) -> Dict:
+ start_time = time()
+ retry_deadline = start_time + 300
+ error_string: Optional[str] = None
+ error_report: Optional[errors.ReportableError] = None
try:
return imds.fetch_metadata_with_api_fallback(
retry_deadline=retry_deadline
)
- except (UrlError, ValueError) as error:
- report_diagnostic_event(
- "Ignoring IMDS metadata due to: %s" % error,
- logger_func=LOG.warning,
+ except UrlError as error:
+ error_string = str(error)
+ duration = time() - start_time
+ error_report = errors.ReportableErrorImdsUrlError(
+ exception=error, duration=duration
)
- return {}
+ except ValueError as error:
+ error_string = str(error)
+ error_report = errors.ReportableErrorImdsMetadataParsingException(
+ exception=error
+ )
+
+ self._report_failure(error_report, host_only=not report_failure)
+ report_diagnostic_event(
+ "Ignoring IMDS metadata due to: %s" % error_string,
+ logger_func=LOG.warning,
+ )
+ return {}
def clear_cached_attrs(self, attr_defaults=()):
"""Reset any cached class attributes to defaults."""
@@ -976,7 +991,7 @@ class DataSourceAzure(sources.DataSource):
# Primary nic detection will be optimized in the future. The fact that
# primary nic is being attached first helps here. Otherwise each nic
# could add several seconds of delay.
- imds_md = self.get_metadata_from_imds()
+ imds_md = self.get_metadata_from_imds(report_failure=False)
if imds_md:
# Only primary NIC will get a response from IMDS.
LOG.info("%s is the primary nic", ifname)
diff --git a/cloudinit/sources/azure/errors.py b/cloudinit/sources/azure/errors.py
index ca902a03..966725b0 100644
--- a/cloudinit/sources/azure/errors.py
+++ b/cloudinit/sources/azure/errors.py
@@ -10,8 +10,11 @@ from datetime import datetime
from io import StringIO
from typing import Any, Dict, List, Optional
+import requests
+
from cloudinit import version
from cloudinit.sources.azure import identity
+from cloudinit.url_helper import UrlError
LOG = logging.getLogger(__name__)
@@ -81,7 +84,12 @@ class ReportableError(Exception):
)
def __repr__(self) -> str:
- return self.as_encoded_report()
+ return (
+ f"{self.__class__.__name__}("
+ f"reason={self.reason}, "
+ f"timestamp={self.timestamp}, "
+ f"supporting_data={self.supporting_data})"
+ )
class ReportableErrorDhcpInterfaceNotFound(ReportableError):
@@ -99,6 +107,37 @@ class ReportableErrorDhcpLease(ReportableError):
self.supporting_data["interface"] = interface
+class ReportableErrorImdsUrlError(ReportableError):
+ def __init__(self, *, exception: UrlError, duration: float) -> None:
+ # ConnectTimeout sub-classes ConnectError so order is important.
+ if isinstance(exception.cause, requests.ConnectTimeout):
+ reason = "connection timeout querying IMDS"
+ elif isinstance(exception.cause, requests.ConnectionError):
+ reason = "connection error querying IMDS"
+ elif isinstance(exception.cause, requests.ReadTimeout):
+ reason = "read timeout querying IMDS"
+ elif exception.code:
+ reason = "http error querying IMDS"
+ else:
+ reason = "unexpected error querying IMDS"
+
+ super().__init__(reason)
+
+ if exception.code:
+ self.supporting_data["http_code"] = exception.code
+
+ self.supporting_data["duration"] = duration
+ self.supporting_data["exception"] = repr(exception)
+ self.supporting_data["url"] = exception.url
+
+
+class ReportableErrorImdsMetadataParsingException(ReportableError):
+ def __init__(self, *, exception: ValueError) -> None:
+ super().__init__("error parsing IMDS metadata")
+
+ self.supporting_data["exception"] = repr(exception)
+
+
class ReportableErrorUnhandledException(ReportableError):
def __init__(self, exception: Exception) -> None:
super().__init__("unhandled exception")