summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Patterson <cpatterson@microsoft.com>2023-04-28 14:29:14 -0700
committerGitHub <noreply@github.com>2023-04-28 16:29:14 -0500
commit967104088db0e9724096e3776c9c31ccbb3f97cb (patch)
treeb3bfb5c07bd22f1beae0b0f1c2b329fbfee61764
parent5abf5f5f2cf93c57ac74220251d2a2acce5f7099 (diff)
downloadcloud-init-git-967104088db0e9724096e3776c9c31ccbb3f97cb.tar.gz
sources/azure: report success to host and introduce kvp module (#2141)
Add success reporting to the host via KVP. - Move _report_failure_to_host() into kvp module. - Tweak error description to use result=error instead of PROVISIONING_ERROR: ... - Use result=success for the successful ("ready") reports. - report_x_via_kvp => report_x_to_host for consistency with fabric. ReportableError.as_description() => as_encoded_report() Signed-off-by: Chris Patterson <cpatterson@microsoft.com>
-rw-r--r--cloudinit/sources/DataSourceAzure.py23
-rw-r--r--cloudinit/sources/azure/errors.py38
-rw-r--r--cloudinit/sources/azure/kvp.py57
-rw-r--r--cloudinit/sources/helpers/azure.py2
-rw-r--r--tests/unittests/sources/azure/test_errors.py7
-rw-r--r--tests/unittests/sources/azure/test_kvp.py88
-rw-r--r--tests/unittests/sources/test_azure.py80
-rw-r--r--tests/unittests/sources/test_azure_helper.py2
8 files changed, 232 insertions, 65 deletions
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
index 969cb376..923644e1 100644
--- a/cloudinit/sources/DataSourceAzure.py
+++ b/cloudinit/sources/DataSourceAzure.py
@@ -25,8 +25,8 @@ from cloudinit.net.dhcp import (
NoDHCPLeaseMissingDhclientError,
)
from cloudinit.net.ephemeral import EphemeralDHCPv4
-from cloudinit.reporting import events, handlers, instantiated_handler_registry
-from cloudinit.sources.azure import errors, identity, imds
+from cloudinit.reporting import events
+from cloudinit.sources.azure import errors, identity, imds, kvp
from cloudinit.sources.helpers import netlink
from cloudinit.sources.helpers.azure import (
DEFAULT_WIRESERVER_ENDPOINT,
@@ -1175,20 +1175,6 @@ class DataSourceAzure(sources.DataSource):
return reprovision_data
@azure_ds_telemetry_reporter
- def _report_failure_to_host(self, error: errors.ReportableError) -> bool:
- """Report failure to host via well-known key."""
- value = error.as_description()
- kvp_handler = instantiated_handler_registry.registered_items.get(
- "telemetry"
- )
- if not isinstance(kvp_handler, handlers.HyperVKvpReportingHandler):
- LOG.debug("KVP handler not enabled, skipping host report.")
- return False
-
- kvp_handler.write_key("PROVISIONING_REPORT", value)
- return True
-
- @azure_ds_telemetry_reporter
def _report_failure(self, error: errors.ReportableError) -> bool:
"""Tells the Azure fabric that provisioning has failed.
@@ -1196,9 +1182,10 @@ class DataSourceAzure(sources.DataSource):
@return: The success status of sending the failure signal.
"""
report_diagnostic_event(
- f"Azure datasource failure occurred: {error.as_description()}",
+ f"Azure datasource failure occurred: {error.as_encoded_report()}",
logger_func=LOG.error,
)
+ kvp.report_failure_to_host(error)
if self._is_ephemeral_networking_up():
try:
@@ -1253,6 +1240,8 @@ class DataSourceAzure(sources.DataSource):
:returns: List of SSH keys, if requested.
"""
+ kvp.report_success_to_host()
+
try:
data = get_metadata_from_fabric(
endpoint=self._wireserver_endpoint,
diff --git a/cloudinit/sources/azure/errors.py b/cloudinit/sources/azure/errors.py
index 1a452635..5c4ad7db 100644
--- a/cloudinit/sources/azure/errors.py
+++ b/cloudinit/sources/azure/errors.py
@@ -8,7 +8,7 @@ import logging
import traceback
from datetime import datetime
from io import StringIO
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional
from cloudinit import version
from cloudinit.sources.azure import identity
@@ -16,6 +16,22 @@ from cloudinit.sources.azure import identity
LOG = logging.getLogger(__name__)
+def encode_report(
+ data: List[str], delimiter: str = "|", quotechar: str = "'"
+) -> str:
+ """Encode report data with csv."""
+ with StringIO() as io:
+ csv.writer(
+ io,
+ delimiter=delimiter,
+ quotechar=quotechar,
+ quoting=csv.QUOTE_MINIMAL,
+ ).writerow(data)
+
+ # strip trailing \r\n
+ return io.getvalue().rstrip()
+
+
class ReportableError(Exception):
def __init__(
self,
@@ -39,10 +55,11 @@ class ReportableError(Exception):
except Exception as id_error:
self.vm_id = f"failed to read vm id: {id_error!r}"
- def as_description(
- self, *, delimiter: str = "|", quotechar: str = "'"
+ def as_encoded_report(
+ self,
) -> str:
data = [
+ "result=error",
f"reason={self.reason}",
f"agent={self.agent}",
]
@@ -53,18 +70,7 @@ class ReportableError(Exception):
f"documentation_url={self.documentation_url}",
]
- with StringIO() as io:
- csv.writer(
- io,
- delimiter=delimiter,
- quotechar=quotechar,
- quoting=csv.QUOTE_MINIMAL,
- ).writerow(data)
-
- # strip trailing \r\n
- csv_data = io.getvalue().rstrip()
-
- return f"PROVISIONING_ERROR: {csv_data}"
+ return encode_report(data)
def __eq__(self, other) -> bool:
return (
@@ -75,7 +81,7 @@ class ReportableError(Exception):
)
def __repr__(self) -> str:
- return self.as_description()
+ return self.as_encoded_report()
class ReportableErrorUnhandledException(ReportableError):
diff --git a/cloudinit/sources/azure/kvp.py b/cloudinit/sources/azure/kvp.py
new file mode 100644
index 00000000..735c4616
--- /dev/null
+++ b/cloudinit/sources/azure/kvp.py
@@ -0,0 +1,57 @@
+# Copyright (C) 2022 Microsoft Corporation.
+#
+# This file is part of cloud-init. See LICENSE file for license information.
+
+import logging
+from datetime import datetime
+from typing import Optional
+
+from cloudinit import version
+from cloudinit.reporting import handlers, instantiated_handler_registry
+from cloudinit.sources.azure import errors, identity
+
+LOG = logging.getLogger(__name__)
+
+
+def get_kvp_handler() -> Optional[handlers.HyperVKvpReportingHandler]:
+ """Get instantiated KVP telemetry handler."""
+ kvp_handler = instantiated_handler_registry.registered_items.get(
+ "telemetry"
+ )
+ if not isinstance(kvp_handler, handlers.HyperVKvpReportingHandler):
+ return None
+
+ return kvp_handler
+
+
+def report_via_kvp(report: str) -> bool:
+ """Report to host via PROVISIONING_REPORT KVP key."""
+ kvp_handler = get_kvp_handler()
+ if kvp_handler is None:
+ LOG.debug("KVP handler not enabled, skipping host report.")
+ return False
+
+ kvp_handler.write_key("PROVISIONING_REPORT", report)
+ return True
+
+
+def report_failure_to_host(error: errors.ReportableError) -> bool:
+ return report_via_kvp(error.as_encoded_report())
+
+
+def report_success_to_host() -> bool:
+ try:
+ vm_id = identity.query_vm_id()
+ except Exception as id_error:
+ vm_id = f"failed to read vm id: {id_error!r}"
+
+ report = errors.encode_report(
+ [
+ "result=success",
+ f"agent=Cloud-Init/{version.version_string()}",
+ f"timestamp={datetime.utcnow().isoformat()}",
+ f"vm_id={vm_id}",
+ ]
+ )
+
+ return report_via_kvp(report)
diff --git a/cloudinit/sources/helpers/azure.py b/cloudinit/sources/helpers/azure.py
index 2413d6b0..6e5c1f43 100644
--- a/cloudinit/sources/helpers/azure.py
+++ b/cloudinit/sources/helpers/azure.py
@@ -1023,7 +1023,7 @@ def get_metadata_from_fabric(
@azure_ds_telemetry_reporter
def report_failure_to_fabric(endpoint: str, error: "errors.ReportableError"):
shim = WALinuxAgentShim(endpoint=endpoint)
- description = error.as_description()
+ description = error.as_encoded_report()
try:
shim.register_with_azure_and_report_failure(description=description)
finally:
diff --git a/tests/unittests/sources/azure/test_errors.py b/tests/unittests/sources/azure/test_errors.py
index 9211d472..d2213613 100644
--- a/tests/unittests/sources/azure/test_errors.py
+++ b/tests/unittests/sources/azure/test_errors.py
@@ -105,7 +105,8 @@ def test_reportable_errors(
)
data = [
- "PROVISIONING_ERROR: " + quote_csv_value(f"reason={reason}"),
+ "result=error",
+ quote_csv_value(f"reason={reason}"),
f"agent=Cloud-Init/{version.version_string()}",
]
data += [quote_csv_value(f"{k}={v}") for k, v in supporting_data.items()]
@@ -115,7 +116,7 @@ def test_reportable_errors(
"documentation_url=https://aka.ms/linuxprovisioningerror",
]
- assert error.as_description() == "|".join(data)
+ assert error.as_encoded_report() == "|".join(data)
def test_unhandled_exception():
@@ -136,4 +137,4 @@ def test_unhandled_exception():
assert trace.endswith("ValueError: my value error\n")
quoted_value = quote_csv_value(f"exception={source_error!r}")
- assert f"|{quoted_value}|" in error.as_description()
+ assert f"|{quoted_value}|" in error.as_encoded_report()
diff --git a/tests/unittests/sources/azure/test_kvp.py b/tests/unittests/sources/azure/test_kvp.py
new file mode 100644
index 00000000..f0f4a999
--- /dev/null
+++ b/tests/unittests/sources/azure/test_kvp.py
@@ -0,0 +1,88 @@
+# This file is part of cloud-init. See LICENSE file for license information.
+
+from datetime import datetime
+from unittest import mock
+
+import pytest
+
+from cloudinit import version
+from cloudinit.sources.azure import errors, kvp
+
+
+@pytest.fixture()
+def fake_utcnow():
+ timestamp = datetime.utcnow()
+ with mock.patch.object(kvp, "datetime", autospec=True) as m:
+ m.utcnow.return_value = timestamp
+ yield timestamp
+
+
+@pytest.fixture()
+def fake_vm_id():
+ vm_id = "fake-vm-id"
+ with mock.patch.object(kvp.identity, "query_vm_id", autospec=True) as m:
+ m.return_value = vm_id
+ yield vm_id
+
+
+@pytest.fixture
+def telemetry_reporter(tmp_path):
+ kvp_file_path = tmp_path / "kvp_pool_file"
+ kvp_file_path.write_bytes(b"")
+ reporter = kvp.handlers.HyperVKvpReportingHandler(
+ kvp_file_path=str(kvp_file_path)
+ )
+
+ kvp.instantiated_handler_registry.register_item("telemetry", reporter)
+ yield reporter
+ kvp.instantiated_handler_registry.unregister_item("telemetry")
+
+
+class TestReportFailureToHost:
+ def test_report_failure_to_host(self, caplog, telemetry_reporter):
+ error = errors.ReportableError(reason="test")
+ assert kvp.report_failure_to_host(error) is True
+ assert (
+ "KVP handler not enabled, skipping host report." not in caplog.text
+ )
+
+ report = {
+ "key": "PROVISIONING_REPORT",
+ "value": error.as_encoded_report(),
+ }
+ assert report in list(telemetry_reporter._iterate_kvps(0))
+
+ def test_report_skipped_without_telemetry(self, caplog):
+ error = errors.ReportableError(reason="test")
+
+ assert kvp.report_failure_to_host(error) is False
+ assert "KVP handler not enabled, skipping host report." in caplog.text
+
+
+class TestReportSuccessToHost:
+ def test_report_success_to_host(
+ self, caplog, fake_utcnow, fake_vm_id, telemetry_reporter
+ ):
+ assert kvp.report_success_to_host() is True
+ assert (
+ "KVP handler not enabled, skipping host report." not in caplog.text
+ )
+
+ report_value = errors.encode_report(
+ [
+ "result=success",
+ f"agent=Cloud-Init/{version.version_string()}",
+ f"timestamp={fake_utcnow.isoformat()}",
+ f"vm_id={fake_vm_id}",
+ ]
+ )
+
+ report = {
+ "key": "PROVISIONING_REPORT",
+ "value": report_value,
+ }
+ assert report in list(telemetry_reporter._iterate_kvps(0))
+
+ def test_report_skipped_without_telemetry(self, caplog):
+ assert kvp.report_success_to_host() is False
+ assert "KVP handler not enabled, skipping host report." in caplog.text
diff --git a/tests/unittests/sources/test_azure.py b/tests/unittests/sources/test_azure.py
index 6251b7ed..47e65658 100644
--- a/tests/unittests/sources/test_azure.py
+++ b/tests/unittests/sources/test_azure.py
@@ -136,6 +136,26 @@ def mock_ephemeral_dhcp_v4():
@pytest.fixture
+def mock_kvp_report_failure_to_host():
+ with mock.patch(
+ MOCKPATH + "kvp.report_failure_to_host",
+ return_value=True,
+ autospec=True,
+ ) as m:
+ yield m
+
+
+@pytest.fixture
+def mock_kvp_report_success_to_host():
+ with mock.patch(
+ MOCKPATH + "kvp.report_success_to_host",
+ return_value=True,
+ autospec=True,
+ ) as m:
+ yield m
+
+
+@pytest.fixture
def mock_net_dhcp_maybe_perform_dhcp_discovery():
with mock.patch(
"cloudinit.net.ephemeral.maybe_perform_dhcp_discovery",
@@ -308,9 +328,9 @@ def telemetry_reporter(tmp_path):
kvp_file_path.write_bytes(b"")
reporter = HyperVKvpReportingHandler(kvp_file_path=str(kvp_file_path))
- dsaz.instantiated_handler_registry.register_item("telemetry", reporter)
+ dsaz.kvp.instantiated_handler_registry.register_item("telemetry", reporter)
yield reporter
- dsaz.instantiated_handler_registry.unregister_item("telemetry")
+ dsaz.kvp.instantiated_handler_registry.unregister_item("telemetry")
def fake_http_error_for_code(status_code: int):
@@ -3482,6 +3502,8 @@ class TestProvisioning:
mock_dmi_read_dmi_data,
mock_get_interfaces,
mock_get_interface_mac,
+ mock_kvp_report_failure_to_host,
+ mock_kvp_report_success_to_host,
mock_netlink,
mock_readurl,
mock_subp_subp,
@@ -3510,6 +3532,8 @@ class TestProvisioning:
self.mock_dmi_read_dmi_data = mock_dmi_read_dmi_data
self.mock_get_interfaces = mock_get_interfaces
self.mock_get_interface_mac = mock_get_interface_mac
+ self.mock_kvp_report_failure_to_host = mock_kvp_report_failure_to_host
+ self.mock_kvp_report_success_to_host = mock_kvp_report_success_to_host
self.mock_netlink = mock_netlink
self.mock_readurl = mock_readurl
self.mock_subp_subp = mock_subp_subp
@@ -3612,6 +3636,10 @@ class TestProvisioning:
assert self.wrapped_util_write_file.mock_calls == []
assert self.patched_reported_ready_marker_path.exists() is False
+ # Verify reports via KVP.
+ assert len(self.mock_kvp_report_failure_to_host.mock_calls) == 0
+ assert len(self.mock_kvp_report_success_to_host.mock_calls) == 1
+
def test_running_pps(self):
self.imds_md["extended"]["compute"]["ppsType"] = "Running"
@@ -3716,6 +3744,10 @@ class TestProvisioning:
)
assert self.patched_reported_ready_marker_path.exists() is False
+ # Verify reports via KVP.
+ assert len(self.mock_kvp_report_failure_to_host.mock_calls) == 0
+ assert len(self.mock_kvp_report_success_to_host.mock_calls) == 2
+
def test_savable_pps(self):
self.imds_md["extended"]["compute"]["ppsType"] = "Savable"
@@ -3835,6 +3867,10 @@ class TestProvisioning:
)
assert self.patched_reported_ready_marker_path.exists() is False
+ # Verify reports via KVP.
+ assert len(self.mock_kvp_report_failure_to_host.mock_calls) == 0
+ assert len(self.mock_kvp_report_success_to_host.mock_calls) == 2
+
@pytest.mark.parametrize(
"fabric_side_effect",
[
@@ -4072,6 +4108,10 @@ class TestProvisioning:
]
assert self.patched_reported_ready_marker_path.exists() is False
+ # Verify reports via KVP.
+ assert len(self.mock_kvp_report_failure_to_host.mock_calls) == 0
+ assert len(self.mock_kvp_report_success_to_host.mock_calls) == 1
+
@pytest.mark.parametrize("pps_type", ["Savable", "Running", "Unknown"])
def test_source_pps_fails_initial_dhcp(self, pps_type):
self.imds_md["extended"]["compute"]["ppsType"] = pps_type
@@ -4089,18 +4129,21 @@ class TestProvisioning:
dhcp.NoDHCPLeaseError()
]
- with mock.patch.object(self.azure_ds, "_report_failure") as m_report:
- self.azure_ds._get_data()
-
- assert m_report.mock_calls == [mock.call(mock.ANY)]
+ assert self.azure_ds._get_data() is False
assert self.mock_wrapping_setup_ephemeral_networking.mock_calls == [
mock.call(timeout_minutes=20),
+ # Second round for _report_failure().
+ mock.call(timeout_minutes=20),
]
assert self.mock_readurl.mock_calls == []
assert self.mock_azure_get_metadata_from_fabric.mock_calls == []
assert self.mock_netlink.mock_calls == []
+ # Verify reports via KVP.
+ assert len(self.mock_kvp_report_failure_to_host.mock_calls) == 1
+ assert len(self.mock_kvp_report_success_to_host.mock_calls) == 0
+
@pytest.mark.parametrize(
"subp_side_effect",
[
@@ -4169,6 +4212,10 @@ class TestProvisioning:
assert self.patched_reported_ready_marker_path.exists() is False
assert self.wrapped_util_write_file.mock_calls == []
+ # Verify reports via KVP. Ignore failure reported after sleep().
+ assert len(self.mock_kvp_report_failure_to_host.mock_calls) == 1
+ assert len(self.mock_kvp_report_success_to_host.mock_calls) == 1
+
class TestValidateIMDSMetadata:
@pytest.mark.parametrize(
@@ -4390,24 +4437,3 @@ class TestValidateIMDSMetadata:
}
assert azure_ds.validate_imds_network_metadata(imds_md) is False
-
-
-class TestReportFailureToHost:
- def test_report(self, azure_ds, caplog, telemetry_reporter):
- error = errors.ReportableError(reason="test")
- assert azure_ds._report_failure_to_host(error) is True
- assert (
- "KVP handler not enabled, skipping host report." not in caplog.text
- )
-
- report = {
- "key": "PROVISIONING_REPORT",
- "value": error.as_description(),
- }
- assert report in list(telemetry_reporter._iterate_kvps(0))
-
- def test_report_skipped_without_telemtry(self, azure_ds, caplog):
- error = errors.ReportableError(reason="test")
-
- assert azure_ds._report_failure_to_host(error) is False
- assert "KVP handler not enabled, skipping host report." in caplog.text
diff --git a/tests/unittests/sources/test_azure_helper.py b/tests/unittests/sources/test_azure_helper.py
index a08011a6..971da9af 100644
--- a/tests/unittests/sources/test_azure_helper.py
+++ b/tests/unittests/sources/test_azure_helper.py
@@ -1382,7 +1382,7 @@ class TestGetMetadataGoalStateXMLAndReportFailureToFabric(CiTestCase):
# default err message description should be shown to the user
# if an empty description is passed in
self.m_shim.return_value.register_with_azure_and_report_failure.assert_called_once_with( # noqa: E501
- description=error.as_description(),
+ description=error.as_encoded_report(),
)
def test_instantiates_shim_with_kwargs(self):