summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Patterson <cpatterson@microsoft.com>2023-03-16 16:22:30 -0400
committerGitHub <noreply@github.com>2023-03-16 15:22:30 -0500
commit5eb43b9548312ecb76a6a7e4567500a836ca3514 (patch)
tree764eac9ebd6915edef3832f35bfa340d779ff3f8
parentfc6c1d3780cd1480b7819df8fbc08eb9247e4eec (diff)
downloadcloud-init-git-5eb43b9548312ecb76a6a7e4567500a836ca3514.tar.gz
sources/azure: add networking check for all source PPS (#2061)
There is a networking check in _poll_imds() which will attempt DHCP again if networking is not up for source PPS. With the previous change to wait at least 20 minutes during provisioning for DHCP, this additional round is not necessary. Report failure if networking is not up for any mode of source PPS. In practice, this is very unlikely as provisioning will typically timeout within the 20 minute window the VM is attempting DHCP and the source PPS VM will be deleted. This fixes an (unobserved) issue where Savable PPS does not have networking prior to _wait_for_all_nics_ready(). Signed-off-by: Chris Patterson <cpatterson@microsoft.com>
-rw-r--r--cloudinit/sources/DataSourceAzure.py13
-rw-r--r--tests/unittests/sources/test_azure.py45
2 files changed, 46 insertions, 12 deletions
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
index dfcb891f..9233384b 100644
--- a/cloudinit/sources/DataSourceAzure.py
+++ b/cloudinit/sources/DataSourceAzure.py
@@ -579,6 +579,12 @@ class DataSourceAzure(sources.DataSource):
report_diagnostic_event(msg, logger_func=LOG.error)
raise sources.InvalidMetaDataException(msg)
+ # Networking is a hard requirement for source PPS, fail without it.
+ if not self._is_ephemeral_networking_up():
+ msg = "DHCP failed while in source PPS"
+ report_diagnostic_event(msg, logger_func=LOG.error)
+ raise sources.InvalidMetaDataException(msg)
+
if pps_type == PPSType.SAVABLE:
self._wait_for_all_nics_ready()
elif pps_type == PPSType.OS_DISK:
@@ -1096,13 +1102,6 @@ class DataSourceAzure(sources.DataSource):
dhcp_attempts = 0
if report_ready:
- # Networking must be up for netlink to detect
- # media disconnect/connect. It may be down to due
- # initial DHCP failure, if so check for it and retry,
- # ensuring we flag it as required.
- if not self._is_ephemeral_networking_up():
- self._setup_ephemeral_networking(timeout_minutes=20)
-
try:
if (
self._ephemeral_dhcp_ctx is None
diff --git a/tests/unittests/sources/test_azure.py b/tests/unittests/sources/test_azure.py
index 166cbe13..04527322 100644
--- a/tests/unittests/sources/test_azure.py
+++ b/tests/unittests/sources/test_azure.py
@@ -3022,8 +3022,10 @@ class TestPreprovisioningPollIMDS(CiTestCase):
m_media_switch.return_value = None
dhcp_ctx = mock.MagicMock(lease=lease)
dhcp_ctx.obtain_lease.return_value = lease
+ dhcp_ctx.iface = lease["interface"]
dsa = dsaz.DataSourceAzure({}, distro=mock.Mock(), paths=self.paths)
+ dsa._ephemeral_dhcp_ctx = dhcp_ctx
with mock.patch.object(
dsa, "_reported_ready_marker_file", report_file
):
@@ -3031,7 +3033,7 @@ class TestPreprovisioningPollIMDS(CiTestCase):
assert m_report_ready.mock_calls == [mock.call()]
- self.assertEqual(3, m_dhcp.call_count, "Expected 3 DHCP calls")
+ self.assertEqual(2, m_dhcp.call_count, "Expected 2 DHCP calls")
assert m_fetch_reprovisiondata.call_count == 2
@mock.patch("os.path.isfile")
@@ -3162,6 +3164,7 @@ class TestPreprovisioningPollIMDS(CiTestCase):
distro.get_tmp_exec_path = self.tmp_dir
dsa = dsaz.DataSourceAzure({}, distro=distro, paths=self.paths)
self.assertFalse(os.path.exists(report_file))
+ dsa._ephemeral_dhcp_ctx = mock.Mock(interface="eth9")
with mock.patch.object(
dsa, "_reported_ready_marker_file", report_file
):
@@ -3196,6 +3199,7 @@ class TestPreprovisioningPollIMDS(CiTestCase):
distro.get_tmp_exec_path = self.tmp_dir
dsa = dsaz.DataSourceAzure({}, distro=distro, paths=self.paths)
self.assertFalse(os.path.exists(report_file))
+ dsa._ephemeral_dhcp_ctx = mock.Mock(interface="eth9")
with mock.patch.object(
dsa, "_reported_ready_marker_file", report_file
):
@@ -3237,8 +3241,9 @@ class TestAzureDataSourcePreprovisioning(CiTestCase):
}
]
dsa = dsaz.DataSourceAzure({}, distro=mock.Mock(), paths=self.paths)
+ dsa._ephemeral_dhcp_ctx = mock.Mock(interface="eth9")
self.assertTrue(len(dsa._poll_imds()) > 0)
- self.assertEqual(m_dhcp.call_count, 2)
+ self.assertEqual(m_dhcp.call_count, 1)
m_net.assert_any_call(
broadcast="192.168.2.255",
interface="eth9",
@@ -3247,7 +3252,7 @@ class TestAzureDataSourcePreprovisioning(CiTestCase):
router="192.168.2.1",
static_routes=None,
)
- self.assertEqual(m_net.call_count, 2)
+ self.assertEqual(m_net.call_count, 1)
def test__reprovision_calls__poll_imds(
self, m_fetch_reprovisiondata, m_dhcp, m_net, m_media_switch
@@ -3268,10 +3273,11 @@ class TestAzureDataSourcePreprovisioning(CiTestCase):
content = construct_ovf_env(username=username, hostname=hostname)
m_fetch_reprovisiondata.side_effect = [content]
dsa = dsaz.DataSourceAzure({}, distro=mock.Mock(), paths=self.paths)
+ dsa._ephemeral_dhcp_ctx = mock.Mock(interface="eth9")
md, _ud, cfg, _d = dsa._reprovision()
self.assertEqual(md["local-hostname"], hostname)
self.assertEqual(cfg["system_info"]["default_user"]["name"], username)
- self.assertEqual(m_dhcp.call_count, 2)
+ self.assertEqual(m_dhcp.call_count, 1)
m_net.assert_any_call(
broadcast="192.168.2.255",
interface="eth9",
@@ -3280,7 +3286,7 @@ class TestAzureDataSourcePreprovisioning(CiTestCase):
router="192.168.2.1",
static_routes=None,
)
- self.assertEqual(m_net.call_count, 2)
+ self.assertEqual(m_net.call_count, 1)
class TestRemoveUbuntuNetworkConfigScripts(CiTestCase):
@@ -4175,6 +4181,35 @@ class TestProvisioning:
# Verify no netlink operations for recovering PPS.
assert self.mock_netlink.mock_calls == []
+ @pytest.mark.parametrize("pps_type", ["Savable", "Running", "Unknown"])
+ def test_source_pps_fails_initial_dhcp(self, pps_type):
+ self.imds_md["extended"]["compute"]["ppsType"] = pps_type
+
+ nl_sock = mock.MagicMock()
+ self.mock_netlink.create_bound_netlink_socket.return_value = nl_sock
+ self.mock_readurl.side_effect = [
+ mock.MagicMock(contents=json.dumps(self.imds_md).encode()),
+ mock.MagicMock(contents=construct_ovf_env().encode()),
+ mock.MagicMock(contents=json.dumps(self.imds_md).encode()),
+ ]
+ self.mock_azure_get_metadata_from_fabric.return_value = []
+
+ self.mock_net_dhcp_maybe_perform_dhcp_discovery.side_effect = [
+ dhcp.NoDHCPLeaseError()
+ ]
+
+ with mock.patch.object(self.azure_ds, "_report_failure") as m_report:
+ self.azure_ds._get_data()
+
+ assert m_report.mock_calls == [mock.call()]
+
+ assert self.mock_wrapping_setup_ephemeral_networking.mock_calls == [
+ mock.call(timeout_minutes=20),
+ ]
+ assert self.mock_readurl.mock_calls == []
+ assert self.mock_azure_get_metadata_from_fabric.mock_calls == []
+ assert self.mock_netlink.mock_calls == []
+
@pytest.mark.parametrize(
"subp_side_effect",
[