summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cloudinit/sources/DataSourceAzure.py36
-rw-r--r--cloudinit/sources/azure/errors.py15
-rw-r--r--tests/unittests/sources/azure/test_errors.py15
-rw-r--r--tests/unittests/sources/test_azure.py72
4 files changed, 120 insertions, 18 deletions
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
index 77b53aaa..b8087406 100644
--- a/cloudinit/sources/DataSourceAzure.py
+++ b/cloudinit/sources/DataSourceAzure.py
@@ -371,7 +371,8 @@ class DataSourceAzure(sources.DataSource):
)
lease = None
- timeout = timeout_minutes * 60 + time()
+ start_time = time()
+ deadline = start_time + timeout_minutes * 60
with events.ReportEventStack(
name="obtain-dhcp-lease",
description="obtain dhcp lease",
@@ -385,6 +386,12 @@ class DataSourceAzure(sources.DataSource):
report_diagnostic_event(
"Interface not found for DHCP", logger_func=LOG.warning
)
+ self._report_failure(
+ errors.ReportableErrorDhcpInterfaceNotFound(
+ duration=time() - start_time
+ ),
+ host_only=True,
+ )
except NoDHCPLeaseMissingDhclientError:
# No dhclient, no point in retrying.
report_diagnostic_event(
@@ -398,6 +405,12 @@ class DataSourceAzure(sources.DataSource):
"Failed to obtain DHCP lease (iface=%s)" % iface,
logger_func=LOG.error,
)
+ self._report_failure(
+ errors.ReportableErrorDhcpLease(
+ duration=time() - start_time, interface=iface
+ ),
+ host_only=True,
+ )
except subp.ProcessExecutionError as error:
# udevadm settle, ip link set dev eth0 up, etc.
report_diagnostic_event(
@@ -412,8 +425,8 @@ class DataSourceAzure(sources.DataSource):
logger_func=LOG.error,
)
- # Sleep before retrying, otherwise break if we're past timeout.
- if lease is None and time() + retry_sleep < timeout:
+ # Sleep before retrying, otherwise break if past deadline.
+ if lease is None and time() + retry_sleep < deadline:
sleep(retry_sleep)
else:
break
@@ -1154,17 +1167,26 @@ class DataSourceAzure(sources.DataSource):
return reprovision_data
@azure_ds_telemetry_reporter
- def _report_failure(self, error: errors.ReportableError) -> bool:
- """Tells the Azure fabric that provisioning has failed.
+ def _report_failure(
+ self, error: errors.ReportableError, host_only: bool = False
+ ) -> bool:
+ """Report failure to Azure host and fabric.
+
+ For errors that may be recoverable (e.g. DHCP), host_only provides a
+ mechanism to report the failure that can be updated later with success.
+ DHCP will not be attempted if host_only=True and networking is down.
- @param description: A description of the error encountered.
+ @param error: Error to report.
+ @param host_only: Only report to host (error may be recoverable).
@return: The success status of sending the failure signal.
"""
report_diagnostic_event(
f"Azure datasource failure occurred: {error.as_encoded_report()}",
logger_func=LOG.error,
)
- kvp.report_failure_to_host(error)
+ reported = kvp.report_failure_to_host(error)
+ if host_only:
+ return reported
if self._is_ephemeral_networking_up():
try:
diff --git a/cloudinit/sources/azure/errors.py b/cloudinit/sources/azure/errors.py
index 5c4ad7db..ca902a03 100644
--- a/cloudinit/sources/azure/errors.py
+++ b/cloudinit/sources/azure/errors.py
@@ -84,6 +84,21 @@ class ReportableError(Exception):
return self.as_encoded_report()
+class ReportableErrorDhcpInterfaceNotFound(ReportableError):
+ def __init__(self, duration: float) -> None:
+ super().__init__("failure to find DHCP interface")
+
+ self.supporting_data["duration"] = duration
+
+
+class ReportableErrorDhcpLease(ReportableError):
+ def __init__(self, duration: float, interface: Optional[str]) -> None:
+ super().__init__("failure to obtain DHCP lease")
+
+ self.supporting_data["duration"] = duration
+ self.supporting_data["interface"] = interface
+
+
class ReportableErrorUnhandledException(ReportableError):
def __init__(self, exception: Exception) -> None:
super().__init__("unhandled exception")
diff --git a/tests/unittests/sources/azure/test_errors.py b/tests/unittests/sources/azure/test_errors.py
index d2213613..e9d3e039 100644
--- a/tests/unittests/sources/azure/test_errors.py
+++ b/tests/unittests/sources/azure/test_errors.py
@@ -119,6 +119,21 @@ def test_reportable_errors(
assert error.as_encoded_report() == "|".join(data)
+def test_dhcp_lease():
+ error = errors.ReportableErrorDhcpLease(duration=5.6, interface="foo")
+
+ assert error.reason == "failure to obtain DHCP lease"
+ assert error.supporting_data["duration"] == 5.6
+ assert error.supporting_data["interface"] == "foo"
+
+
+def test_dhcp_interface_not_found():
+ error = errors.ReportableErrorDhcpInterfaceNotFound(duration=5.6)
+
+ assert error.reason == "failure to find DHCP interface"
+ assert error.supporting_data["duration"] == 5.6
+
+
def test_unhandled_exception():
source_error = None
try:
diff --git a/tests/unittests/sources/test_azure.py b/tests/unittests/sources/test_azure.py
index 677c250d..59ac6459 100644
--- a/tests/unittests/sources/test_azure.py
+++ b/tests/unittests/sources/test_azure.py
@@ -3337,6 +3337,7 @@ class TestEphemeralNetworking:
self,
azure_ds,
mock_ephemeral_dhcp_v4,
+ mock_kvp_report_failure_to_host,
mock_sleep,
):
lease = {
@@ -3362,6 +3363,12 @@ class TestEphemeralNetworking:
assert azure_ds._wireserver_endpoint == "168.63.129.16"
assert azure_ds._ephemeral_dhcp_ctx.iface == "fakeEth0"
+ error_reasons = [
+ c[0][0].reason
+ for c in mock_kvp_report_failure_to_host.call_args_list
+ ]
+ assert error_reasons == ["failure to find DHCP interface"]
+
def test_retry_process_error(
self,
azure_ds,
@@ -3403,14 +3410,20 @@ class TestEphemeralNetworking:
]
@pytest.mark.parametrize(
- "error_class", [dhcp.NoDHCPLeaseInterfaceError, dhcp.NoDHCPLeaseError]
+ "error_class,error_reason",
+ [
+ (dhcp.NoDHCPLeaseInterfaceError, "failure to find DHCP interface"),
+ (dhcp.NoDHCPLeaseError, "failure to obtain DHCP lease"),
+ ],
)
def test_retry_sleeps(
self,
azure_ds,
mock_ephemeral_dhcp_v4,
+ mock_kvp_report_failure_to_host,
mock_sleep,
error_class,
+ error_reason,
):
lease = {
"interface": "fakeEth0",
@@ -3436,30 +3449,41 @@ class TestEphemeralNetworking:
assert azure_ds._wireserver_endpoint == "168.63.129.16"
assert azure_ds._ephemeral_dhcp_ctx.iface == "fakeEth0"
+ error_reasons = [
+ c[0][0].reason
+ for c in mock_kvp_report_failure_to_host.call_args_list
+ ]
+ assert error_reasons == [error_reason] * 10
+
@pytest.mark.parametrize(
- "error_class", [dhcp.NoDHCPLeaseInterfaceError, dhcp.NoDHCPLeaseError]
+ "error_class,error_reason",
+ [
+ (dhcp.NoDHCPLeaseInterfaceError, "failure to find DHCP interface"),
+ (dhcp.NoDHCPLeaseError, "failure to obtain DHCP lease"),
+ ],
)
def test_retry_times_out(
self,
azure_ds,
mock_ephemeral_dhcp_v4,
+ mock_kvp_report_failure_to_host,
mock_sleep,
mock_time,
error_class,
+ error_reason,
):
mock_time.side_effect = [
0.0, # start
- 60.1, # first
- 120.1, # third
- 180.1, # timeout
+ 60.1, # duration check for host error report
+ 60.11, # loop check
+ 120.1, # duration check for host error report
+ 120.11, # loop check
+ 180.1, # duration check for host error report
+ 180.11, # loop check timeout
]
mock_ephemeral_dhcp_v4.return_value.obtain_lease.side_effect = [
error_class()
- ] * 10 + [
- {
- "interface": "fakeEth0",
- }
- ]
+ ] * 3
with pytest.raises(dhcp.NoDHCPLeaseError):
azure_ds._setup_ephemeral_networking(timeout_minutes=3)
@@ -3472,6 +3496,12 @@ class TestEphemeralNetworking:
assert azure_ds._wireserver_endpoint == "168.63.129.16"
assert azure_ds._ephemeral_dhcp_ctx is None
+ error_reasons = [
+ c[0][0].reason
+ for c in mock_kvp_report_failure_to_host.call_args_list
+ ]
+ assert error_reasons == [error_reason] * 3
+
class TestInstanceId:
def test_metadata(self, azure_ds, mock_dmi_read_dmi_data):
@@ -4138,7 +4168,7 @@ class TestProvisioning:
assert self.mock_netlink.mock_calls == []
# Verify reports via KVP.
- assert len(self.mock_kvp_report_failure_to_host.mock_calls) == 1
+ assert len(self.mock_kvp_report_failure_to_host.mock_calls) == 2
assert len(self.mock_kvp_report_success_to_host.mock_calls) == 0
@pytest.mark.parametrize(
@@ -4214,6 +4244,26 @@ class TestProvisioning:
assert len(self.mock_kvp_report_success_to_host.mock_calls) == 1
+class TestReportFailure:
+ @pytest.mark.parametrize("kvp_enabled", [False, True])
+ def report_host_only_kvp_enabled(
+ self,
+ azure_ds,
+ kvp_enabled,
+ mock_azure_report_failure_to_fabric,
+ mock_kvp_report_failure_to_host,
+ mock_kvp_report_success_to_host,
+ ):
+ mock_kvp_report_failure_to_host.return_value = kvp_enabled
+ error = errors.ReportableError(reason="foo")
+
+ assert azure_ds._report_failure(error, host_only=True) == kvp_enabled
+
+ assert mock_kvp_report_failure_to_host.mock_calls == [mock.call(error)]
+ assert mock_kvp_report_success_to_host.mock_calls == []
+ assert mock_azure_report_failure_to_fabric.mock_calls == []
+
+
class TestValidateIMDSMetadata:
@pytest.mark.parametrize(
"mac,expected",