From 6e380dd68c32555d54f6ecbd9adff0b3248b679d Mon Sep 17 00:00:00 2001 From: Chris Patterson Date: Tue, 25 Apr 2023 13:14:45 -0400 Subject: sources/azure: report failures to host via kvp (#2136) Azure can report provisioning failures via the Wireserver health endpoint. However, in the event of networking failures or Wireserver issues, this report cannot be made and the VM will result in an OS provisioning timeout and a generic error is presented to the user. Report the failure via KVP using the "PROVISIONING_REPORT" key so that the host can relay the provisioning error report to the user when the VM fails to provision. The format used is subject to change and/or removal. Signed-off-by: Chris Patterson --- cloudinit/reporting/handlers.py | 15 ++++++++++ cloudinit/sources/DataSourceAzure.py | 16 ++++++++++- tests/unittests/reporting/test_reporting_hyperv.py | 15 ++++++++++ tests/unittests/sources/test_azure.py | 33 ++++++++++++++++++++++ 4 files changed, 78 insertions(+), 1 deletion(-) diff --git a/cloudinit/reporting/handlers.py b/cloudinit/reporting/handlers.py index 2c1f4998..ff07f940 100644 --- a/cloudinit/reporting/handlers.py +++ b/cloudinit/reporting/handlers.py @@ -347,6 +347,21 @@ class HyperVKvpReportingHandler(ReportingHandler): break return result_array + def write_key(self, key: str, value: str) -> None: + """Write KVP key-value. + + Values will be truncated as needed. + """ + if len(value) >= self.HV_KVP_AZURE_MAX_VALUE_SIZE: + value = value[0 : self.HV_KVP_AZURE_MAX_VALUE_SIZE - 1] + + data = [self._encode_kvp_item(key, value)] + + try: + self._append_kvp_item(data) + except (OSError, IOError): + LOG.warning("failed posting kvp=%s value=%s", key, value) + def _encode_event(self, event): """ encode the event into kvp data bytes. diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index d0af2089..969cb376 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -25,7 +25,7 @@ from cloudinit.net.dhcp import ( NoDHCPLeaseMissingDhclientError, ) from cloudinit.net.ephemeral import EphemeralDHCPv4 -from cloudinit.reporting import events +from cloudinit.reporting import events, handlers, instantiated_handler_registry from cloudinit.sources.azure import errors, identity, imds from cloudinit.sources.helpers import netlink from cloudinit.sources.helpers.azure import ( @@ -1174,6 +1174,20 @@ class DataSourceAzure(sources.DataSource): ) return reprovision_data + @azure_ds_telemetry_reporter + def _report_failure_to_host(self, error: errors.ReportableError) -> bool: + """Report failure to host via well-known key.""" + value = error.as_description() + kvp_handler = instantiated_handler_registry.registered_items.get( + "telemetry" + ) + if not isinstance(kvp_handler, handlers.HyperVKvpReportingHandler): + LOG.debug("KVP handler not enabled, skipping host report.") + return False + + kvp_handler.write_key("PROVISIONING_REPORT", value) + return True + @azure_ds_telemetry_reporter def _report_failure(self, error: errors.ReportableError) -> bool: """Tells the Azure fabric that provisioning has failed. diff --git a/tests/unittests/reporting/test_reporting_hyperv.py b/tests/unittests/reporting/test_reporting_hyperv.py index 35ab0c58..0464f485 100644 --- a/tests/unittests/reporting/test_reporting_hyperv.py +++ b/tests/unittests/reporting/test_reporting_hyperv.py @@ -347,3 +347,18 @@ class TextKvpReporter(CiTestCase): self.assertNotEqual( kvps[0]["key"], kvps[1]["key"], "duplicate keys for KVP entries" ) + + def test_write_key(self): + reporter = HyperVKvpReportingHandler(kvp_file_path=self.tmp_file_path) + reporter.write_key("test-key", "test-value") + assert list(reporter._iterate_kvps(0)) == [ + {"key": "test-key", "value": "test-value"} + ] + + def test_write_key_truncates(self): + reporter = HyperVKvpReportingHandler(kvp_file_path=self.tmp_file_path) + + value = "A" * 2000 + reporter.write_key("test-key", value) + + assert len(list(reporter._iterate_kvps(0))[0]["value"]) == 1023 diff --git a/tests/unittests/sources/test_azure.py b/tests/unittests/sources/test_azure.py index b3f7b1bc..6251b7ed 100644 --- a/tests/unittests/sources/test_azure.py +++ b/tests/unittests/sources/test_azure.py @@ -13,6 +13,7 @@ import requests from cloudinit import distros, dmi, helpers, subp, url_helper from cloudinit.net import dhcp +from cloudinit.reporting.handlers import HyperVKvpReportingHandler from cloudinit.sources import UNSET from cloudinit.sources import DataSourceAzure as dsaz from cloudinit.sources import InvalidMetaDataException @@ -301,6 +302,17 @@ def patched_reported_ready_marker_path(azure_ds, patched_markers_dir_path): yield reported_ready_marker +@pytest.fixture +def telemetry_reporter(tmp_path): + kvp_file_path = tmp_path / "kvp_pool_file" + kvp_file_path.write_bytes(b"") + reporter = HyperVKvpReportingHandler(kvp_file_path=str(kvp_file_path)) + + dsaz.instantiated_handler_registry.register_item("telemetry", reporter) + yield reporter + dsaz.instantiated_handler_registry.unregister_item("telemetry") + + def fake_http_error_for_code(status_code: int): response_failure = requests.Response() response_failure.status_code = status_code @@ -4378,3 +4390,24 @@ class TestValidateIMDSMetadata: } assert azure_ds.validate_imds_network_metadata(imds_md) is False + + +class TestReportFailureToHost: + def test_report(self, azure_ds, caplog, telemetry_reporter): + error = errors.ReportableError(reason="test") + assert azure_ds._report_failure_to_host(error) is True + assert ( + "KVP handler not enabled, skipping host report." not in caplog.text + ) + + report = { + "key": "PROVISIONING_REPORT", + "value": error.as_description(), + } + assert report in list(telemetry_reporter._iterate_kvps(0)) + + def test_report_skipped_without_telemtry(self, azure_ds, caplog): + error = errors.ReportableError(reason="test") + + assert azure_ds._report_failure_to_host(error) is False + assert "KVP handler not enabled, skipping host report." in caplog.text -- cgit v1.2.1