diff options
-rw-r--r-- | cloudinit/sources/DataSourceAzure.py | 36 | ||||
-rw-r--r-- | cloudinit/sources/azure/errors.py | 15 | ||||
-rw-r--r-- | tests/unittests/sources/azure/test_errors.py | 15 | ||||
-rw-r--r-- | tests/unittests/sources/test_azure.py | 72 |
4 files changed, 120 insertions, 18 deletions
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 77b53aaa..b8087406 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -371,7 +371,8 @@ class DataSourceAzure(sources.DataSource): ) lease = None - timeout = timeout_minutes * 60 + time() + start_time = time() + deadline = start_time + timeout_minutes * 60 with events.ReportEventStack( name="obtain-dhcp-lease", description="obtain dhcp lease", @@ -385,6 +386,12 @@ class DataSourceAzure(sources.DataSource): report_diagnostic_event( "Interface not found for DHCP", logger_func=LOG.warning ) + self._report_failure( + errors.ReportableErrorDhcpInterfaceNotFound( + duration=time() - start_time + ), + host_only=True, + ) except NoDHCPLeaseMissingDhclientError: # No dhclient, no point in retrying. report_diagnostic_event( @@ -398,6 +405,12 @@ class DataSourceAzure(sources.DataSource): "Failed to obtain DHCP lease (iface=%s)" % iface, logger_func=LOG.error, ) + self._report_failure( + errors.ReportableErrorDhcpLease( + duration=time() - start_time, interface=iface + ), + host_only=True, + ) except subp.ProcessExecutionError as error: # udevadm settle, ip link set dev eth0 up, etc. report_diagnostic_event( @@ -412,8 +425,8 @@ class DataSourceAzure(sources.DataSource): logger_func=LOG.error, ) - # Sleep before retrying, otherwise break if we're past timeout. - if lease is None and time() + retry_sleep < timeout: + # Sleep before retrying, otherwise break if past deadline. + if lease is None and time() + retry_sleep < deadline: sleep(retry_sleep) else: break @@ -1154,17 +1167,26 @@ class DataSourceAzure(sources.DataSource): return reprovision_data @azure_ds_telemetry_reporter - def _report_failure(self, error: errors.ReportableError) -> bool: - """Tells the Azure fabric that provisioning has failed. + def _report_failure( + self, error: errors.ReportableError, host_only: bool = False + ) -> bool: + """Report failure to Azure host and fabric. + + For errors that may be recoverable (e.g. DHCP), host_only provides a + mechanism to report the failure that can be updated later with success. + DHCP will not be attempted if host_only=True and networking is down. - @param description: A description of the error encountered. + @param error: Error to report. + @param host_only: Only report to host (error may be recoverable). @return: The success status of sending the failure signal. """ report_diagnostic_event( f"Azure datasource failure occurred: {error.as_encoded_report()}", logger_func=LOG.error, ) - kvp.report_failure_to_host(error) + reported = kvp.report_failure_to_host(error) + if host_only: + return reported if self._is_ephemeral_networking_up(): try: diff --git a/cloudinit/sources/azure/errors.py b/cloudinit/sources/azure/errors.py index 5c4ad7db..ca902a03 100644 --- a/cloudinit/sources/azure/errors.py +++ b/cloudinit/sources/azure/errors.py @@ -84,6 +84,21 @@ class ReportableError(Exception): return self.as_encoded_report() +class ReportableErrorDhcpInterfaceNotFound(ReportableError): + def __init__(self, duration: float) -> None: + super().__init__("failure to find DHCP interface") + + self.supporting_data["duration"] = duration + + +class ReportableErrorDhcpLease(ReportableError): + def __init__(self, duration: float, interface: Optional[str]) -> None: + super().__init__("failure to obtain DHCP lease") + + self.supporting_data["duration"] = duration + self.supporting_data["interface"] = interface + + class ReportableErrorUnhandledException(ReportableError): def __init__(self, exception: Exception) -> None: super().__init__("unhandled exception") diff --git a/tests/unittests/sources/azure/test_errors.py b/tests/unittests/sources/azure/test_errors.py index d2213613..e9d3e039 100644 --- a/tests/unittests/sources/azure/test_errors.py +++ b/tests/unittests/sources/azure/test_errors.py @@ -119,6 +119,21 @@ def test_reportable_errors( assert error.as_encoded_report() == "|".join(data) +def test_dhcp_lease(): + error = errors.ReportableErrorDhcpLease(duration=5.6, interface="foo") + + assert error.reason == "failure to obtain DHCP lease" + assert error.supporting_data["duration"] == 5.6 + assert error.supporting_data["interface"] == "foo" + + +def test_dhcp_interface_not_found(): + error = errors.ReportableErrorDhcpInterfaceNotFound(duration=5.6) + + assert error.reason == "failure to find DHCP interface" + assert error.supporting_data["duration"] == 5.6 + + def test_unhandled_exception(): source_error = None try: diff --git a/tests/unittests/sources/test_azure.py b/tests/unittests/sources/test_azure.py index 677c250d..59ac6459 100644 --- a/tests/unittests/sources/test_azure.py +++ b/tests/unittests/sources/test_azure.py @@ -3337,6 +3337,7 @@ class TestEphemeralNetworking: self, azure_ds, mock_ephemeral_dhcp_v4, + mock_kvp_report_failure_to_host, mock_sleep, ): lease = { @@ -3362,6 +3363,12 @@ class TestEphemeralNetworking: assert azure_ds._wireserver_endpoint == "168.63.129.16" assert azure_ds._ephemeral_dhcp_ctx.iface == "fakeEth0" + error_reasons = [ + c[0][0].reason + for c in mock_kvp_report_failure_to_host.call_args_list + ] + assert error_reasons == ["failure to find DHCP interface"] + def test_retry_process_error( self, azure_ds, @@ -3403,14 +3410,20 @@ class TestEphemeralNetworking: ] @pytest.mark.parametrize( - "error_class", [dhcp.NoDHCPLeaseInterfaceError, dhcp.NoDHCPLeaseError] + "error_class,error_reason", + [ + (dhcp.NoDHCPLeaseInterfaceError, "failure to find DHCP interface"), + (dhcp.NoDHCPLeaseError, "failure to obtain DHCP lease"), + ], ) def test_retry_sleeps( self, azure_ds, mock_ephemeral_dhcp_v4, + mock_kvp_report_failure_to_host, mock_sleep, error_class, + error_reason, ): lease = { "interface": "fakeEth0", @@ -3436,30 +3449,41 @@ class TestEphemeralNetworking: assert azure_ds._wireserver_endpoint == "168.63.129.16" assert azure_ds._ephemeral_dhcp_ctx.iface == "fakeEth0" + error_reasons = [ + c[0][0].reason + for c in mock_kvp_report_failure_to_host.call_args_list + ] + assert error_reasons == [error_reason] * 10 + @pytest.mark.parametrize( - "error_class", [dhcp.NoDHCPLeaseInterfaceError, dhcp.NoDHCPLeaseError] + "error_class,error_reason", + [ + (dhcp.NoDHCPLeaseInterfaceError, "failure to find DHCP interface"), + (dhcp.NoDHCPLeaseError, "failure to obtain DHCP lease"), + ], ) def test_retry_times_out( self, azure_ds, mock_ephemeral_dhcp_v4, + mock_kvp_report_failure_to_host, mock_sleep, mock_time, error_class, + error_reason, ): mock_time.side_effect = [ 0.0, # start - 60.1, # first - 120.1, # third - 180.1, # timeout + 60.1, # duration check for host error report + 60.11, # loop check + 120.1, # duration check for host error report + 120.11, # loop check + 180.1, # duration check for host error report + 180.11, # loop check timeout ] mock_ephemeral_dhcp_v4.return_value.obtain_lease.side_effect = [ error_class() - ] * 10 + [ - { - "interface": "fakeEth0", - } - ] + ] * 3 with pytest.raises(dhcp.NoDHCPLeaseError): azure_ds._setup_ephemeral_networking(timeout_minutes=3) @@ -3472,6 +3496,12 @@ class TestEphemeralNetworking: assert azure_ds._wireserver_endpoint == "168.63.129.16" assert azure_ds._ephemeral_dhcp_ctx is None + error_reasons = [ + c[0][0].reason + for c in mock_kvp_report_failure_to_host.call_args_list + ] + assert error_reasons == [error_reason] * 3 + class TestInstanceId: def test_metadata(self, azure_ds, mock_dmi_read_dmi_data): @@ -4138,7 +4168,7 @@ class TestProvisioning: assert self.mock_netlink.mock_calls == [] # Verify reports via KVP. - assert len(self.mock_kvp_report_failure_to_host.mock_calls) == 1 + assert len(self.mock_kvp_report_failure_to_host.mock_calls) == 2 assert len(self.mock_kvp_report_success_to_host.mock_calls) == 0 @pytest.mark.parametrize( @@ -4214,6 +4244,26 @@ class TestProvisioning: assert len(self.mock_kvp_report_success_to_host.mock_calls) == 1 +class TestReportFailure: + @pytest.mark.parametrize("kvp_enabled", [False, True]) + def report_host_only_kvp_enabled( + self, + azure_ds, + kvp_enabled, + mock_azure_report_failure_to_fabric, + mock_kvp_report_failure_to_host, + mock_kvp_report_success_to_host, + ): + mock_kvp_report_failure_to_host.return_value = kvp_enabled + error = errors.ReportableError(reason="foo") + + assert azure_ds._report_failure(error, host_only=True) == kvp_enabled + + assert mock_kvp_report_failure_to_host.mock_calls == [mock.call(error)] + assert mock_kvp_report_success_to_host.mock_calls == [] + assert mock_azure_report_failure_to_fabric.mock_calls == [] + + class TestValidateIMDSMetadata: @pytest.mark.parametrize( "mac,expected", |