summaryrefslogtreecommitdiff
path: root/cloudinit
diff options
context:
space:
mode:
Diffstat (limited to 'cloudinit')
-rw-r--r--cloudinit/sources/DataSourceAzure.py28
-rw-r--r--cloudinit/sources/azure/errors.py93
-rw-r--r--cloudinit/sources/helpers/azure.py15
3 files changed, 118 insertions, 18 deletions
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
index aeec6a92..83dbdce1 100644
--- a/cloudinit/sources/DataSourceAzure.py
+++ b/cloudinit/sources/DataSourceAzure.py
@@ -26,7 +26,7 @@ from cloudinit.net.dhcp import (
)
from cloudinit.net.ephemeral import EphemeralDHCPv4
from cloudinit.reporting import events
-from cloudinit.sources.azure import identity, imds
+from cloudinit.sources.azure import errors, identity, imds
from cloudinit.sources.helpers import netlink
from cloudinit.sources.helpers.azure import (
DEFAULT_WIRESERVER_ENDPOINT,
@@ -727,11 +727,12 @@ class DataSourceAzure(sources.DataSource):
msg="Crawl of metadata service",
func=self.crawl_metadata,
)
- except Exception as e:
- report_diagnostic_event(
- "Could not crawl Azure metadata: %s" % e, logger_func=LOG.error
- )
- self._report_failure()
+ except errors.ReportableError as error:
+ self._report_failure(error)
+ return False
+ except Exception as error:
+ reportable_error = errors.ReportableErrorUnhandledException(error)
+ self._report_failure(reportable_error)
return False
finally:
self._teardown_ephemeral_networking()
@@ -1170,12 +1171,17 @@ class DataSourceAzure(sources.DataSource):
return reprovision_data
@azure_ds_telemetry_reporter
- def _report_failure(self) -> bool:
+ def _report_failure(self, error: errors.ReportableError) -> bool:
"""Tells the Azure fabric that provisioning has failed.
@param description: A description of the error encountered.
@return: The success status of sending the failure signal.
"""
+ report_diagnostic_event(
+ f"Azure datasource failure occurred: {error.as_description()}",
+ logger_func=LOG.error,
+ )
+
if self._is_ephemeral_networking_up():
try:
report_diagnostic_event(
@@ -1183,7 +1189,9 @@ class DataSourceAzure(sources.DataSource):
"to report failure to Azure",
logger_func=LOG.debug,
)
- report_failure_to_fabric(endpoint=self._wireserver_endpoint)
+ report_failure_to_fabric(
+ endpoint=self._wireserver_endpoint, error=error
+ )
return True
except Exception as e:
report_diagnostic_event(
@@ -1203,7 +1211,9 @@ class DataSourceAzure(sources.DataSource):
except NoDHCPLeaseError:
# Reporting failure will fail, but it will emit telemetry.
pass
- report_failure_to_fabric(endpoint=self._wireserver_endpoint)
+ report_failure_to_fabric(
+ endpoint=self._wireserver_endpoint, error=error
+ )
return True
except Exception as e:
report_diagnostic_event(
diff --git a/cloudinit/sources/azure/errors.py b/cloudinit/sources/azure/errors.py
new file mode 100644
index 00000000..0dd426a7
--- /dev/null
+++ b/cloudinit/sources/azure/errors.py
@@ -0,0 +1,93 @@
+# Copyright (C) 2022 Microsoft Corporation.
+#
+# This file is part of cloud-init. See LICENSE file for license information.
+
+import base64
+import csv
+import logging
+import traceback
+from datetime import datetime
+from io import StringIO
+from typing import Any, Dict, Optional
+
+from cloudinit import version
+from cloudinit.sources.azure import identity
+
+LOG = logging.getLogger(__name__)
+
+
+class ReportableError(Exception):
+ def __init__(
+ self,
+ reason: str,
+ *,
+ supporting_data: Optional[Dict[str, Any]] = None,
+ ) -> None:
+ self.agent = f"Cloud-Init/{version.version_string()}"
+ self.documentation_url = "https://aka.ms/linuxprovisioningerror"
+ self.reason = reason
+
+ if supporting_data:
+ self.supporting_data = supporting_data
+ else:
+ self.supporting_data = {}
+
+ self.timestamp = datetime.utcnow()
+
+ try:
+ self.vm_id = identity.query_vm_id()
+ except Exception as id_error:
+ self.vm_id = f"failed to read vm id: {id_error!r}"
+
+ def as_description(
+ self, *, delimiter: str = "|", quotechar: str = "'"
+ ) -> str:
+ data = [
+ f"reason={self.reason}",
+ f"agent={self.agent}",
+ ]
+ data += [f"{k}={v}" for k, v in self.supporting_data.items()]
+ data += [
+ f"vm_id={self.vm_id}",
+ f"timestamp={self.timestamp.isoformat()}",
+ f"documentation_url={self.documentation_url}",
+ ]
+
+ with StringIO() as io:
+ csv.writer(
+ io,
+ delimiter=delimiter,
+ quotechar=quotechar,
+ quoting=csv.QUOTE_MINIMAL,
+ ).writerow(data)
+
+ # strip trailing \r\n
+ csv_data = io.getvalue().rstrip()
+
+ return f"PROVISIONING_ERROR: {csv_data}"
+
+ def __eq__(self, other) -> bool:
+ return (
+ isinstance(other, ReportableError)
+ and self.timestamp == other.timestamp
+ and self.reason == other.reason
+ and self.supporting_data == other.supporting_data
+ )
+
+ def __repr__(self) -> str:
+ return self.as_description()
+
+
+class ReportableErrorUnhandledException(ReportableError):
+ def __init__(self, exception: Exception) -> None:
+ super().__init__("unhandled exception")
+
+ trace = "".join(
+ traceback.format_exception(
+ type(exception), exception, exception.__traceback__
+ )
+ )
+ trace_base64 = base64.b64encode(trace.encode("utf-8"))
+
+ self.supporting_data["exception"] = repr(exception)
+ self.supporting_data["traceback_base64"] = trace_base64
diff --git a/cloudinit/sources/helpers/azure.py b/cloudinit/sources/helpers/azure.py
index c0ffd760..2413d6b0 100644
--- a/cloudinit/sources/helpers/azure.py
+++ b/cloudinit/sources/helpers/azure.py
@@ -12,7 +12,7 @@ from contextlib import contextmanager
from datetime import datetime
from errno import ENOENT
from time import sleep, time
-from typing import Callable, List, Optional, TypeVar, Union
+from typing import TYPE_CHECKING, Callable, List, Optional, TypeVar, Union
from xml.etree import ElementTree
from xml.sax.saxutils import escape
@@ -20,6 +20,9 @@ from cloudinit import distros, subp, temp_utils, url_helper, util, version
from cloudinit.reporting import events
from cloudinit.settings import CFG_BUILTIN
+if TYPE_CHECKING:
+ from cloudinit.sources.azure import errors
+
LOG = logging.getLogger(__name__)
# Default Wireserver endpoint (if not found in DHCP option 245).
@@ -43,12 +46,6 @@ azure_ds_reporter = events.ReportEventStack(
reporting_enabled=True,
)
-DEFAULT_REPORT_FAILURE_USER_VISIBLE_MESSAGE = (
- "The VM encountered an error during deployment. "
- "Please visit https://aka.ms/linuxprovisioningerror "
- "for more information on remediation."
-)
-
T = TypeVar("T")
@@ -1024,9 +1021,9 @@ def get_metadata_from_fabric(
@azure_ds_telemetry_reporter
-def report_failure_to_fabric(endpoint: str):
+def report_failure_to_fabric(endpoint: str, error: "errors.ReportableError"):
shim = WALinuxAgentShim(endpoint=endpoint)
- description = DEFAULT_REPORT_FAILURE_USER_VISIBLE_MESSAGE
+ description = error.as_description()
try:
shim.register_with_azure_and_report_failure(description=description)
finally: