summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoraswinrajamannar <39812128+aswinrajamannar@users.noreply.github.com>2021-08-20 15:53:18 -0700
committerGitHub <noreply@github.com>2021-08-20 16:53:18 -0600
commit3ec8ddde0d1d2fd8597f7d2915baa3e328552ab1 (patch)
treeca76749caeeb00faccfd1c7668868ceb2ed74ced
parent7d3f5d750f6111c2716143364ea33486df67c927 (diff)
downloadcloud-init-git-3ec8ddde0d1d2fd8597f7d2915baa3e328552ab1.tar.gz
Azure: During primary nic detection, check interface status continuously before rebinding again (#990)
Add 10 second polling loop in wait_for_link_up after performing an unbind and re-bind of primary NIC in hv_netvsc driver. Also reduce cloud-init logging levels to debug for these operations.
-rwxr-xr-xcloudinit/sources/DataSourceAzure.py38
-rw-r--r--tests/unittests/test_datasource/test_azure.py20
2 files changed, 35 insertions, 23 deletions
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
index ba23139b..fddfe363 100755
--- a/cloudinit/sources/DataSourceAzure.py
+++ b/cloudinit/sources/DataSourceAzure.py
@@ -892,12 +892,12 @@ class DataSourceAzure(sources.DataSource):
logger_func=LOG.info)
return
- LOG.info("Attempting to bring %s up", ifname)
+ LOG.debug("Attempting to bring %s up", ifname)
attempts = 0
+ LOG.info("Unbinding and binding the interface %s", ifname)
while True:
- LOG.info("Unbinding and binding the interface %s", ifname)
devicename = net.read_sys_net(ifname,
'device/device_id').strip('{}')
util.write_file('/sys/bus/vmbus/drivers/hv_netvsc/unbind',
@@ -912,26 +912,28 @@ class DataSourceAzure(sources.DataSource):
report_diagnostic_event(msg, logger_func=LOG.info)
return
- sleep_duration = 1
- msg = ("Link is not up after %d attempts with %d seconds sleep "
- "between attempts." % (attempts, sleep_duration))
-
if attempts % 10 == 0:
+ msg = ("Link is not up after %d attempts to rebind" % attempts)
report_diagnostic_event(msg, logger_func=LOG.info)
- else:
LOG.info(msg)
- sleep(sleep_duration)
-
- # Since we just did a unbind and bind, check again after sleep
- # but before doing unbind and bind again to avoid races where the
- # link might take a slight delay after bind to be up.
- if self.distro.networking.is_up(ifname):
- msg = ("Link is up after checking after sleeping for %d secs"
- " after %d attempts" %
- (sleep_duration, attempts))
- report_diagnostic_event(msg, logger_func=LOG.info)
- return
+ # It could take some time after rebind for the interface to be up.
+ # So poll for the status for some time before attempting to rebind
+ # again.
+ sleep_duration = 0.5
+ max_status_polls = 20
+ LOG.debug("Polling %d seconds for primary NIC link up after "
+ "rebind.", sleep_duration * max_status_polls)
+
+ for i in range(0, max_status_polls):
+ if self.distro.networking.is_up(ifname):
+ msg = ("After %d attempts to rebind, link is up after "
+ "polling the link status %d times" % (attempts, i))
+ report_diagnostic_event(msg, logger_func=LOG.info)
+ LOG.debug(msg)
+ return
+ else:
+ sleep(sleep_duration)
@azure_ds_telemetry_reporter
def _create_report_ready_marker(self):
diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py
index 03609c3d..851cf82e 100644
--- a/tests/unittests/test_datasource/test_azure.py
+++ b/tests/unittests/test_datasource/test_azure.py
@@ -2912,19 +2912,29 @@ class TestPreprovisioningHotAttachNics(CiTestCase):
@mock.patch('cloudinit.net.read_sys_net')
@mock.patch('cloudinit.distros.networking.LinuxNetworking.try_set_link_up')
def test_wait_for_link_up_checks_link_after_sleep(
- self, m_is_link_up, m_read_sys_net, m_writefile, m_is_up):
+ self, m_try_set_link_up, m_read_sys_net, m_writefile, m_is_up):
"""Waiting for link to be up should return immediately if the link is
already up."""
distro_cls = distros.fetch('ubuntu')
distro = distro_cls('ubuntu', {}, self.paths)
dsa = dsaz.DataSourceAzure({}, distro=distro, paths=self.paths)
- m_is_link_up.return_value = False
- m_is_up.return_value = True
+ m_try_set_link_up.return_value = False
+
+ callcount = 0
+
+ def is_up_mock(key):
+ nonlocal callcount
+ if callcount == 0:
+ callcount += 1
+ return False
+ return True
+
+ m_is_up.side_effect = is_up_mock
dsa.wait_for_link_up("eth0")
- self.assertEqual(2, m_is_link_up.call_count)
- self.assertEqual(1, m_is_up.call_count)
+ self.assertEqual(2, m_try_set_link_up.call_count)
+ self.assertEqual(2, m_is_up.call_count)
@mock.patch(MOCKPATH + 'util.write_file')
@mock.patch('cloudinit.net.read_sys_net')