summaryrefslogtreecommitdiff
path: root/cloudinit
diff options
context:
space:
mode:
authorJohnson Shi <Johnson.Shi@microsoft.com>2020-10-16 08:54:38 -0700
committerGitHub <noreply@github.com>2020-10-16 11:54:38 -0400
commit8766784f4b1d1f9f6a9094e1268e4accb811ea7f (patch)
treed654af968ed5b185402802a3cafea7c7357e0b26 /cloudinit
parentc0e8480678e3a9173c9de1271f651fb3ba375f22 (diff)
downloadcloud-init-git-8766784f4b1d1f9f6a9094e1268e4accb811ea7f.tar.gz
DataSourceAzure: write marker file after report ready in preprovisioning (#590)
DataSourceAzure previously writes the preprovisioning reported ready marker file before it goes through the report ready workflow. On certain VM instances, the marker file is successfully written but then reporting ready fails. Upon rare VM reboots by the platform, cloud-init sees that the report ready marker file already exists. The existence of this marker file tells cloud-init not to report ready again (because it mistakenly assumes that it already reported ready in preprovisioning). In this scenario, cloud-init instead erroneously takes the reprovisioning workflow instead of reporting ready again.
Diffstat (limited to 'cloudinit')
-rwxr-xr-xcloudinit/sources/DataSourceAzure.py23
1 files changed, 20 insertions, 3 deletions
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
index 8858fbd5..70e32f46 100755
--- a/cloudinit/sources/DataSourceAzure.py
+++ b/cloudinit/sources/DataSourceAzure.py
@@ -720,12 +720,23 @@ class DataSourceAzure(sources.DataSource):
self._ephemeral_dhcp_ctx.clean_network()
break
+ report_ready_succeeded = self._report_ready(lease=lease)
+ if not report_ready_succeeded:
+ msg = ('Failed reporting ready while in '
+ 'the preprovisioning pool.')
+ report_diagnostic_event(msg, logger_func=LOG.error)
+ self._ephemeral_dhcp_ctx.clean_network()
+ raise sources.InvalidMetaDataException(msg)
+
path = REPORTED_READY_MARKER_FILE
LOG.info(
"Creating a marker file to report ready: %s", path)
util.write_file(path, "{pid}: {time}\n".format(
pid=os.getpid(), time=time()))
- self._report_ready(lease=lease)
+ report_diagnostic_event(
+ 'Successfully created reported ready marker file '
+ 'while in the preprovisioning pool.',
+ logger_func=LOG.debug)
report_ready = False
with events.ReportEventStack(
@@ -773,14 +784,20 @@ class DataSourceAzure(sources.DataSource):
return return_val
@azure_ds_telemetry_reporter
- def _report_ready(self, lease):
- """Tells the fabric provisioning has completed """
+ def _report_ready(self, lease: dict) -> bool:
+ """Tells the fabric provisioning has completed.
+
+ @param lease: dhcp lease to use for sending the ready signal.
+ @return: The success status of sending the ready signal.
+ """
try:
get_metadata_from_fabric(None, lease['unknown-245'])
+ return True
except Exception as e:
report_diagnostic_event(
"Error communicating with Azure fabric; You may experience "
"connectivity issues: %s" % e, logger_func=LOG.warning)
+ return False
def _should_reprovision(self, ret):
"""Whether or not we should poll IMDS for reprovisioning data.