From 967104088db0e9724096e3776c9c31ccbb3f97cb Mon Sep 17 00:00:00 2001 From: Chris Patterson Date: Fri, 28 Apr 2023 14:29:14 -0700 Subject: sources/azure: report success to host and introduce kvp module (#2141) Add success reporting to the host via KVP. - Move _report_failure_to_host() into kvp module. - Tweak error description to use result=error instead of PROVISIONING_ERROR: ... - Use result=success for the successful ("ready") reports. - report_x_via_kvp => report_x_to_host for consistency with fabric. ReportableError.as_description() => as_encoded_report() Signed-off-by: Chris Patterson --- cloudinit/sources/DataSourceAzure.py | 23 ++------ cloudinit/sources/azure/errors.py | 38 +++++++----- cloudinit/sources/azure/kvp.py | 57 ++++++++++++++++++ cloudinit/sources/helpers/azure.py | 2 +- tests/unittests/sources/azure/test_errors.py | 7 ++- tests/unittests/sources/azure/test_kvp.py | 88 ++++++++++++++++++++++++++++ tests/unittests/sources/test_azure.py | 80 ++++++++++++++++--------- tests/unittests/sources/test_azure_helper.py | 2 +- 8 files changed, 232 insertions(+), 65 deletions(-) create mode 100644 cloudinit/sources/azure/kvp.py create mode 100644 tests/unittests/sources/azure/test_kvp.py diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 969cb376..923644e1 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -25,8 +25,8 @@ from cloudinit.net.dhcp import ( NoDHCPLeaseMissingDhclientError, ) from cloudinit.net.ephemeral import EphemeralDHCPv4 -from cloudinit.reporting import events, handlers, instantiated_handler_registry -from cloudinit.sources.azure import errors, identity, imds +from cloudinit.reporting import events +from cloudinit.sources.azure import errors, identity, imds, kvp from cloudinit.sources.helpers import netlink from cloudinit.sources.helpers.azure import ( DEFAULT_WIRESERVER_ENDPOINT, @@ -1174,20 +1174,6 @@ class DataSourceAzure(sources.DataSource): ) return reprovision_data - @azure_ds_telemetry_reporter - def _report_failure_to_host(self, error: errors.ReportableError) -> bool: - """Report failure to host via well-known key.""" - value = error.as_description() - kvp_handler = instantiated_handler_registry.registered_items.get( - "telemetry" - ) - if not isinstance(kvp_handler, handlers.HyperVKvpReportingHandler): - LOG.debug("KVP handler not enabled, skipping host report.") - return False - - kvp_handler.write_key("PROVISIONING_REPORT", value) - return True - @azure_ds_telemetry_reporter def _report_failure(self, error: errors.ReportableError) -> bool: """Tells the Azure fabric that provisioning has failed. @@ -1196,9 +1182,10 @@ class DataSourceAzure(sources.DataSource): @return: The success status of sending the failure signal. """ report_diagnostic_event( - f"Azure datasource failure occurred: {error.as_description()}", + f"Azure datasource failure occurred: {error.as_encoded_report()}", logger_func=LOG.error, ) + kvp.report_failure_to_host(error) if self._is_ephemeral_networking_up(): try: @@ -1253,6 +1240,8 @@ class DataSourceAzure(sources.DataSource): :returns: List of SSH keys, if requested. """ + kvp.report_success_to_host() + try: data = get_metadata_from_fabric( endpoint=self._wireserver_endpoint, diff --git a/cloudinit/sources/azure/errors.py b/cloudinit/sources/azure/errors.py index 1a452635..5c4ad7db 100644 --- a/cloudinit/sources/azure/errors.py +++ b/cloudinit/sources/azure/errors.py @@ -8,7 +8,7 @@ import logging import traceback from datetime import datetime from io import StringIO -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional from cloudinit import version from cloudinit.sources.azure import identity @@ -16,6 +16,22 @@ from cloudinit.sources.azure import identity LOG = logging.getLogger(__name__) +def encode_report( + data: List[str], delimiter: str = "|", quotechar: str = "'" +) -> str: + """Encode report data with csv.""" + with StringIO() as io: + csv.writer( + io, + delimiter=delimiter, + quotechar=quotechar, + quoting=csv.QUOTE_MINIMAL, + ).writerow(data) + + # strip trailing \r\n + return io.getvalue().rstrip() + + class ReportableError(Exception): def __init__( self, @@ -39,10 +55,11 @@ class ReportableError(Exception): except Exception as id_error: self.vm_id = f"failed to read vm id: {id_error!r}" - def as_description( - self, *, delimiter: str = "|", quotechar: str = "'" + def as_encoded_report( + self, ) -> str: data = [ + "result=error", f"reason={self.reason}", f"agent={self.agent}", ] @@ -53,18 +70,7 @@ class ReportableError(Exception): f"documentation_url={self.documentation_url}", ] - with StringIO() as io: - csv.writer( - io, - delimiter=delimiter, - quotechar=quotechar, - quoting=csv.QUOTE_MINIMAL, - ).writerow(data) - - # strip trailing \r\n - csv_data = io.getvalue().rstrip() - - return f"PROVISIONING_ERROR: {csv_data}" + return encode_report(data) def __eq__(self, other) -> bool: return ( @@ -75,7 +81,7 @@ class ReportableError(Exception): ) def __repr__(self) -> str: - return self.as_description() + return self.as_encoded_report() class ReportableErrorUnhandledException(ReportableError): diff --git a/cloudinit/sources/azure/kvp.py b/cloudinit/sources/azure/kvp.py new file mode 100644 index 00000000..735c4616 --- /dev/null +++ b/cloudinit/sources/azure/kvp.py @@ -0,0 +1,57 @@ +# Copyright (C) 2022 Microsoft Corporation. +# +# This file is part of cloud-init. See LICENSE file for license information. + +import logging +from datetime import datetime +from typing import Optional + +from cloudinit import version +from cloudinit.reporting import handlers, instantiated_handler_registry +from cloudinit.sources.azure import errors, identity + +LOG = logging.getLogger(__name__) + + +def get_kvp_handler() -> Optional[handlers.HyperVKvpReportingHandler]: + """Get instantiated KVP telemetry handler.""" + kvp_handler = instantiated_handler_registry.registered_items.get( + "telemetry" + ) + if not isinstance(kvp_handler, handlers.HyperVKvpReportingHandler): + return None + + return kvp_handler + + +def report_via_kvp(report: str) -> bool: + """Report to host via PROVISIONING_REPORT KVP key.""" + kvp_handler = get_kvp_handler() + if kvp_handler is None: + LOG.debug("KVP handler not enabled, skipping host report.") + return False + + kvp_handler.write_key("PROVISIONING_REPORT", report) + return True + + +def report_failure_to_host(error: errors.ReportableError) -> bool: + return report_via_kvp(error.as_encoded_report()) + + +def report_success_to_host() -> bool: + try: + vm_id = identity.query_vm_id() + except Exception as id_error: + vm_id = f"failed to read vm id: {id_error!r}" + + report = errors.encode_report( + [ + "result=success", + f"agent=Cloud-Init/{version.version_string()}", + f"timestamp={datetime.utcnow().isoformat()}", + f"vm_id={vm_id}", + ] + ) + + return report_via_kvp(report) diff --git a/cloudinit/sources/helpers/azure.py b/cloudinit/sources/helpers/azure.py index 2413d6b0..6e5c1f43 100644 --- a/cloudinit/sources/helpers/azure.py +++ b/cloudinit/sources/helpers/azure.py @@ -1023,7 +1023,7 @@ def get_metadata_from_fabric( @azure_ds_telemetry_reporter def report_failure_to_fabric(endpoint: str, error: "errors.ReportableError"): shim = WALinuxAgentShim(endpoint=endpoint) - description = error.as_description() + description = error.as_encoded_report() try: shim.register_with_azure_and_report_failure(description=description) finally: diff --git a/tests/unittests/sources/azure/test_errors.py b/tests/unittests/sources/azure/test_errors.py index 9211d472..d2213613 100644 --- a/tests/unittests/sources/azure/test_errors.py +++ b/tests/unittests/sources/azure/test_errors.py @@ -105,7 +105,8 @@ def test_reportable_errors( ) data = [ - "PROVISIONING_ERROR: " + quote_csv_value(f"reason={reason}"), + "result=error", + quote_csv_value(f"reason={reason}"), f"agent=Cloud-Init/{version.version_string()}", ] data += [quote_csv_value(f"{k}={v}") for k, v in supporting_data.items()] @@ -115,7 +116,7 @@ def test_reportable_errors( "documentation_url=https://aka.ms/linuxprovisioningerror", ] - assert error.as_description() == "|".join(data) + assert error.as_encoded_report() == "|".join(data) def test_unhandled_exception(): @@ -136,4 +137,4 @@ def test_unhandled_exception(): assert trace.endswith("ValueError: my value error\n") quoted_value = quote_csv_value(f"exception={source_error!r}") - assert f"|{quoted_value}|" in error.as_description() + assert f"|{quoted_value}|" in error.as_encoded_report() diff --git a/tests/unittests/sources/azure/test_kvp.py b/tests/unittests/sources/azure/test_kvp.py new file mode 100644 index 00000000..f0f4a999 --- /dev/null +++ b/tests/unittests/sources/azure/test_kvp.py @@ -0,0 +1,88 @@ +# This file is part of cloud-init. See LICENSE file for license information. + +from datetime import datetime +from unittest import mock + +import pytest + +from cloudinit import version +from cloudinit.sources.azure import errors, kvp + + +@pytest.fixture() +def fake_utcnow(): + timestamp = datetime.utcnow() + with mock.patch.object(kvp, "datetime", autospec=True) as m: + m.utcnow.return_value = timestamp + yield timestamp + + +@pytest.fixture() +def fake_vm_id(): + vm_id = "fake-vm-id" + with mock.patch.object(kvp.identity, "query_vm_id", autospec=True) as m: + m.return_value = vm_id + yield vm_id + + +@pytest.fixture +def telemetry_reporter(tmp_path): + kvp_file_path = tmp_path / "kvp_pool_file" + kvp_file_path.write_bytes(b"") + reporter = kvp.handlers.HyperVKvpReportingHandler( + kvp_file_path=str(kvp_file_path) + ) + + kvp.instantiated_handler_registry.register_item("telemetry", reporter) + yield reporter + kvp.instantiated_handler_registry.unregister_item("telemetry") + + +class TestReportFailureToHost: + def test_report_failure_to_host(self, caplog, telemetry_reporter): + error = errors.ReportableError(reason="test") + assert kvp.report_failure_to_host(error) is True + assert ( + "KVP handler not enabled, skipping host report." not in caplog.text + ) + + report = { + "key": "PROVISIONING_REPORT", + "value": error.as_encoded_report(), + } + assert report in list(telemetry_reporter._iterate_kvps(0)) + + def test_report_skipped_without_telemetry(self, caplog): + error = errors.ReportableError(reason="test") + + assert kvp.report_failure_to_host(error) is False + assert "KVP handler not enabled, skipping host report." in caplog.text + + +class TestReportSuccessToHost: + def test_report_success_to_host( + self, caplog, fake_utcnow, fake_vm_id, telemetry_reporter + ): + assert kvp.report_success_to_host() is True + assert ( + "KVP handler not enabled, skipping host report." not in caplog.text + ) + + report_value = errors.encode_report( + [ + "result=success", + f"agent=Cloud-Init/{version.version_string()}", + f"timestamp={fake_utcnow.isoformat()}", + f"vm_id={fake_vm_id}", + ] + ) + + report = { + "key": "PROVISIONING_REPORT", + "value": report_value, + } + assert report in list(telemetry_reporter._iterate_kvps(0)) + + def test_report_skipped_without_telemetry(self, caplog): + assert kvp.report_success_to_host() is False + assert "KVP handler not enabled, skipping host report." in caplog.text diff --git a/tests/unittests/sources/test_azure.py b/tests/unittests/sources/test_azure.py index 6251b7ed..47e65658 100644 --- a/tests/unittests/sources/test_azure.py +++ b/tests/unittests/sources/test_azure.py @@ -135,6 +135,26 @@ def mock_ephemeral_dhcp_v4(): yield m +@pytest.fixture +def mock_kvp_report_failure_to_host(): + with mock.patch( + MOCKPATH + "kvp.report_failure_to_host", + return_value=True, + autospec=True, + ) as m: + yield m + + +@pytest.fixture +def mock_kvp_report_success_to_host(): + with mock.patch( + MOCKPATH + "kvp.report_success_to_host", + return_value=True, + autospec=True, + ) as m: + yield m + + @pytest.fixture def mock_net_dhcp_maybe_perform_dhcp_discovery(): with mock.patch( @@ -308,9 +328,9 @@ def telemetry_reporter(tmp_path): kvp_file_path.write_bytes(b"") reporter = HyperVKvpReportingHandler(kvp_file_path=str(kvp_file_path)) - dsaz.instantiated_handler_registry.register_item("telemetry", reporter) + dsaz.kvp.instantiated_handler_registry.register_item("telemetry", reporter) yield reporter - dsaz.instantiated_handler_registry.unregister_item("telemetry") + dsaz.kvp.instantiated_handler_registry.unregister_item("telemetry") def fake_http_error_for_code(status_code: int): @@ -3482,6 +3502,8 @@ class TestProvisioning: mock_dmi_read_dmi_data, mock_get_interfaces, mock_get_interface_mac, + mock_kvp_report_failure_to_host, + mock_kvp_report_success_to_host, mock_netlink, mock_readurl, mock_subp_subp, @@ -3510,6 +3532,8 @@ class TestProvisioning: self.mock_dmi_read_dmi_data = mock_dmi_read_dmi_data self.mock_get_interfaces = mock_get_interfaces self.mock_get_interface_mac = mock_get_interface_mac + self.mock_kvp_report_failure_to_host = mock_kvp_report_failure_to_host + self.mock_kvp_report_success_to_host = mock_kvp_report_success_to_host self.mock_netlink = mock_netlink self.mock_readurl = mock_readurl self.mock_subp_subp = mock_subp_subp @@ -3612,6 +3636,10 @@ class TestProvisioning: assert self.wrapped_util_write_file.mock_calls == [] assert self.patched_reported_ready_marker_path.exists() is False + # Verify reports via KVP. + assert len(self.mock_kvp_report_failure_to_host.mock_calls) == 0 + assert len(self.mock_kvp_report_success_to_host.mock_calls) == 1 + def test_running_pps(self): self.imds_md["extended"]["compute"]["ppsType"] = "Running" @@ -3716,6 +3744,10 @@ class TestProvisioning: ) assert self.patched_reported_ready_marker_path.exists() is False + # Verify reports via KVP. + assert len(self.mock_kvp_report_failure_to_host.mock_calls) == 0 + assert len(self.mock_kvp_report_success_to_host.mock_calls) == 2 + def test_savable_pps(self): self.imds_md["extended"]["compute"]["ppsType"] = "Savable" @@ -3835,6 +3867,10 @@ class TestProvisioning: ) assert self.patched_reported_ready_marker_path.exists() is False + # Verify reports via KVP. + assert len(self.mock_kvp_report_failure_to_host.mock_calls) == 0 + assert len(self.mock_kvp_report_success_to_host.mock_calls) == 2 + @pytest.mark.parametrize( "fabric_side_effect", [ @@ -4072,6 +4108,10 @@ class TestProvisioning: ] assert self.patched_reported_ready_marker_path.exists() is False + # Verify reports via KVP. + assert len(self.mock_kvp_report_failure_to_host.mock_calls) == 0 + assert len(self.mock_kvp_report_success_to_host.mock_calls) == 1 + @pytest.mark.parametrize("pps_type", ["Savable", "Running", "Unknown"]) def test_source_pps_fails_initial_dhcp(self, pps_type): self.imds_md["extended"]["compute"]["ppsType"] = pps_type @@ -4089,18 +4129,21 @@ class TestProvisioning: dhcp.NoDHCPLeaseError() ] - with mock.patch.object(self.azure_ds, "_report_failure") as m_report: - self.azure_ds._get_data() - - assert m_report.mock_calls == [mock.call(mock.ANY)] + assert self.azure_ds._get_data() is False assert self.mock_wrapping_setup_ephemeral_networking.mock_calls == [ mock.call(timeout_minutes=20), + # Second round for _report_failure(). + mock.call(timeout_minutes=20), ] assert self.mock_readurl.mock_calls == [] assert self.mock_azure_get_metadata_from_fabric.mock_calls == [] assert self.mock_netlink.mock_calls == [] + # Verify reports via KVP. + assert len(self.mock_kvp_report_failure_to_host.mock_calls) == 1 + assert len(self.mock_kvp_report_success_to_host.mock_calls) == 0 + @pytest.mark.parametrize( "subp_side_effect", [ @@ -4169,6 +4212,10 @@ class TestProvisioning: assert self.patched_reported_ready_marker_path.exists() is False assert self.wrapped_util_write_file.mock_calls == [] + # Verify reports via KVP. Ignore failure reported after sleep(). + assert len(self.mock_kvp_report_failure_to_host.mock_calls) == 1 + assert len(self.mock_kvp_report_success_to_host.mock_calls) == 1 + class TestValidateIMDSMetadata: @pytest.mark.parametrize( @@ -4390,24 +4437,3 @@ class TestValidateIMDSMetadata: } assert azure_ds.validate_imds_network_metadata(imds_md) is False - - -class TestReportFailureToHost: - def test_report(self, azure_ds, caplog, telemetry_reporter): - error = errors.ReportableError(reason="test") - assert azure_ds._report_failure_to_host(error) is True - assert ( - "KVP handler not enabled, skipping host report." not in caplog.text - ) - - report = { - "key": "PROVISIONING_REPORT", - "value": error.as_description(), - } - assert report in list(telemetry_reporter._iterate_kvps(0)) - - def test_report_skipped_without_telemtry(self, azure_ds, caplog): - error = errors.ReportableError(reason="test") - - assert azure_ds._report_failure_to_host(error) is False - assert "KVP handler not enabled, skipping host report." in caplog.text diff --git a/tests/unittests/sources/test_azure_helper.py b/tests/unittests/sources/test_azure_helper.py index a08011a6..971da9af 100644 --- a/tests/unittests/sources/test_azure_helper.py +++ b/tests/unittests/sources/test_azure_helper.py @@ -1382,7 +1382,7 @@ class TestGetMetadataGoalStateXMLAndReportFailureToFabric(CiTestCase): # default err message description should be shown to the user # if an empty description is passed in self.m_shim.return_value.register_with_azure_and_report_failure.assert_called_once_with( # noqa: E501 - description=error.as_description(), + description=error.as_encoded_report(), ) def test_instantiates_shim_with_kwargs(self): -- cgit v1.2.1