diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 85f0211bfb6..80b9226328d 100755 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -925,70 +925,24 @@ def _wait_for_nic_detach(self, nl_sock): raise @azure_ds_telemetry_reporter - def wait_for_link_up(self, ifname): - """In cases where the link state is still showing down after a nic is - hot-attached, we can attempt to bring it up by forcing the hv_netvsc - drivers to query the link state by unbinding and then binding the - device. This function attempts infinitely until the link is up, - because we cannot proceed further until we have a stable link.""" - - if self.distro.networking.try_set_link_up(ifname): - report_diagnostic_event( - "The link %s is already up." % ifname, logger_func=LOG.info - ) - return - - LOG.debug("Attempting to bring %s up", ifname) - - attempts = 0 - LOG.info("Unbinding and binding the interface %s", ifname) - while True: - device_id = net.read_sys_net(ifname, "device/device_id") - if device_id is False or not isinstance(device_id, str): - raise RuntimeError("Unable to read device ID: %s" % device_id) - devicename = device_id.strip("{}") - util.write_file( - "/sys/bus/vmbus/drivers/hv_netvsc/unbind", devicename - ) - util.write_file( - "/sys/bus/vmbus/drivers/hv_netvsc/bind", devicename - ) - - attempts = attempts + 1 + def wait_for_link_up( + self, ifname: str, retries: int = 100, retry_sleep: float = 0.1 + ): + for i in range(retries): if self.distro.networking.try_set_link_up(ifname): - msg = "The link %s is up after %s attempts" % ( - ifname, - attempts, + report_diagnostic_event( + "The link %s is up." % ifname, logger_func=LOG.info ) - report_diagnostic_event(msg, logger_func=LOG.info) - return - - if attempts % 10 == 0: - msg = "Link is not up after %d attempts to rebind" % attempts - report_diagnostic_event(msg, logger_func=LOG.info) - LOG.info(msg) - - # It could take some time after rebind for the interface to be up. - # So poll for the status for some time before attempting to rebind - # again. - sleep_duration = 0.5 - max_status_polls = 20 - LOG.debug( - "Polling %d seconds for primary NIC link up after rebind.", - sleep_duration * max_status_polls, - ) + break - for i in range(0, max_status_polls): - if self.distro.networking.is_up(ifname): - msg = ( - "After %d attempts to rebind, link is up after " - "polling the link status %d times" % (attempts, i) - ) - report_diagnostic_event(msg, logger_func=LOG.info) - LOG.debug(msg) - return - else: - sleep(sleep_duration) + if (i + 1) < retries: + sleep(retry_sleep) + else: + report_diagnostic_event( + "The link %s is not up after %f seconds, continuing anyways." + % (ifname, retries * retry_sleep), + logger_func=LOG.info, + ) @azure_ds_telemetry_reporter def _create_report_ready_marker(self): diff --git a/tests/unittests/sources/test_azure.py b/tests/unittests/sources/test_azure.py index 62e5134b709..ac87f5bea33 100644 --- a/tests/unittests/sources/test_azure.py +++ b/tests/unittests/sources/test_azure.py @@ -3126,12 +3126,10 @@ def test_wait_for_link_up_returns_if_already_up(self, m_is_link_up): dsa.wait_for_link_up("eth0") self.assertEqual(1, m_is_link_up.call_count) - @mock.patch(MOCKPATH + "net.is_up", autospec=True) - @mock.patch(MOCKPATH + "util.write_file") - @mock.patch("cloudinit.net.read_sys_net", return_value="device-id") @mock.patch("cloudinit.distros.networking.LinuxNetworking.try_set_link_up") + @mock.patch(MOCKPATH + "sleep") def test_wait_for_link_up_checks_link_after_sleep( - self, m_try_set_link_up, m_read_sys_net, m_writefile, m_is_up + self, m_sleep, m_try_set_link_up ): """Waiting for link to be up should return immediately if the link is already up.""" @@ -3141,50 +3139,10 @@ def test_wait_for_link_up_checks_link_after_sleep( dsa = dsaz.DataSourceAzure({}, distro=distro, paths=self.paths) m_try_set_link_up.return_value = False - callcount = 0 - - def is_up_mock(key): - nonlocal callcount - if callcount == 0: - callcount += 1 - return False - return True - - m_is_up.side_effect = is_up_mock - - with mock.patch("cloudinit.sources.DataSourceAzure.sleep"): - dsa.wait_for_link_up("eth0") - self.assertEqual(2, m_try_set_link_up.call_count) - self.assertEqual(2, m_is_up.call_count) - - @mock.patch(MOCKPATH + "util.write_file") - @mock.patch("cloudinit.net.read_sys_net", return_value="device-id") - @mock.patch("cloudinit.distros.networking.LinuxNetworking.try_set_link_up") - def test_wait_for_link_up_writes_to_device_file( - self, m_is_link_up, m_read_sys_net, m_writefile - ): - """Waiting for link to be up should return immediately if the link is - already up.""" - - distro_cls = distros.fetch("ubuntu") - distro = distro_cls("ubuntu", {}, self.paths) - dsa = dsaz.DataSourceAzure({}, distro=distro, paths=self.paths) - - callcount = 0 - - def linkup(key): - nonlocal callcount - if callcount == 0: - callcount += 1 - return False - return True - - m_is_link_up.side_effect = linkup - dsa.wait_for_link_up("eth0") - self.assertEqual(2, m_is_link_up.call_count) - self.assertEqual(1, m_read_sys_net.call_count) - self.assertEqual(2, m_writefile.call_count) + + self.assertEqual(100, m_try_set_link_up.call_count) + self.assertEqual(99 * [mock.call(0.1)], m_sleep.mock_calls) @mock.patch( "cloudinit.sources.helpers.netlink.create_bound_netlink_socket"