From a69e775b91d35654695f637d17396ccbcbae1b12 Mon Sep 17 00:00:00 2001 From: Anh Vo Date: Fri, 8 Apr 2022 10:39:58 -0700 Subject: [PATCH 1/2] only wait for primary nic to be attached during restore --- cloudinit/sources/DataSourceAzure.py | 32 +++++++++------------------ tests/unittests/sources/test_azure.py | 31 +++++++++++++++++++++----- 2 files changed, 37 insertions(+), 26 deletions(-) diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 80b9226328d..2aed71bbda5 100755 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -1068,22 +1068,16 @@ def network_metadata_exc_cb(msg, exc): return is_primary, expected_nic_count @azure_ds_telemetry_reporter - def _wait_for_hot_attached_nics(self, nl_sock): - """Wait until all the expected nics for the vm are hot-attached. - The expected nic count is obtained by requesting the network metadata - from IMDS. - """ - LOG.info("Waiting for nics to be hot-attached") + def _wait_for_hot_attached_primary_nic(self, nl_sock): + """Wait until the primary nic for the vm is hot-attached.""" + LOG.info("Waiting for primary nic to be hot-attached") try: - # Wait for nics to be attached one at a time, until we know for - # sure that all nics have been attached. nics_found = [] primary_nic_found = False - expected_nic_count = -1 # Wait for netlink nic attach events. After the first nic is # attached, we are already in the customer vm deployment path and - # so eerything from then on should happen fast and avoid + # so everything from then on should happen fast and avoid # unnecessary delays wherever possible. while True: ifname = None @@ -1114,17 +1108,13 @@ def _wait_for_hot_attached_nics(self, nl_sock): # won't be in primary_nic_found = false state for long. if not primary_nic_found: LOG.info("Checking if %s is the primary nic", ifname) - ( - primary_nic_found, - expected_nic_count, - ) = self._check_if_nic_is_primary(ifname) + primary_nic_found, _ = self._check_if_nic_is_primary( + ifname + ) - # Exit criteria: check if we've discovered all nics - if ( - expected_nic_count != -1 - and len(nics_found) >= expected_nic_count - ): - LOG.info("Found all the nics for this VM.") + # Exit criteria: check if we've discovered primary nic + if primary_nic_found: + LOG.info("Found primary nic for this VM.") break except AssertionError as error: @@ -1144,7 +1134,7 @@ def _wait_for_all_nics_ready(self): self._report_ready_for_pps() self._teardown_ephemeral_networking() self._wait_for_nic_detach(nl_sock) - self._wait_for_hot_attached_nics(nl_sock) + self._wait_for_hot_attached_primary_nic(nl_sock) except netlink.NetlinkCreateSocketError as e: report_diagnostic_event(str(e), logger_func=LOG.warning) raise diff --git a/tests/unittests/sources/test_azure.py b/tests/unittests/sources/test_azure.py index ac87f5bea33..c9a82fca0a6 100644 --- a/tests/unittests/sources/test_azure.py +++ b/tests/unittests/sources/test_azure.py @@ -2987,12 +2987,14 @@ def setUp(self): @mock.patch(MOCKPATH + "util.write_file", autospec=True) @mock.patch(MOCKPATH + "DataSourceAzure._report_ready") - @mock.patch(MOCKPATH + "DataSourceAzure._wait_for_hot_attached_nics") + @mock.patch( + MOCKPATH + "DataSourceAzure._wait_for_hot_attached_primary_nic" + ) @mock.patch(MOCKPATH + "DataSourceAzure._wait_for_nic_detach") def test_detect_nic_attach_reports_ready_and_waits_for_detach( self, m_detach, - m_wait_for_hot_attached_nics, + m_wait_for_hot_attached_primary_nic, m_report_ready, m_writefile, ): @@ -3000,7 +3002,7 @@ def test_detect_nic_attach_reports_ready_and_waits_for_detach( dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths) dsa._wait_for_all_nics_ready() self.assertEqual(1, m_report_ready.call_count) - self.assertEqual(1, m_wait_for_hot_attached_nics.call_count) + self.assertEqual(1, m_wait_for_hot_attached_primary_nic.call_count) self.assertEqual(1, m_detach.call_count) self.assertEqual(1, m_writefile.call_count) m_writefile.assert_called_with( @@ -3026,7 +3028,8 @@ def test_wait_for_nic_attach_multinic_attach( m_report_ready, m_writefile, ): - """Wait for nic attach if we do not have a fallback interface""" + """Wait for nic attach if we do not have a fallback interface. + Skip waiting for additional nics after we have found primary""" dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths) lease = { "interface": "eth9", @@ -3057,10 +3060,28 @@ def test_wait_for_nic_attach_multinic_attach( dsa._wait_for_all_nics_ready() self.assertEqual(1, m_detach.call_count) - self.assertEqual(2, m_attach.call_count) + # only wait for primary nic + self.assertEqual(1, m_attach.call_count) # DHCP and network metadata calls will only happen on the primary NIC. self.assertEqual(1, m_dhcpv4.call_count) self.assertEqual(1, m_imds.call_count) + # no call to bring link up on secondary nic + self.assertEqual(1, m_link_up.call_count) + + # reset mock to test again with primary nic being eth1 + m_detach.reset_mock() + m_attach.reset_mock() + m_dhcpv4.reset_mock() + m_link_up.reset_mock() + m_attach.side_effect = ["eth0", "eth1"] + m_imds.reset_mock() + m_imds.side_effect = [None, md] + dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths) + dsa._wait_for_all_nics_ready() + self.assertEqual(1, m_detach.call_count) + self.assertEqual(2, m_attach.call_count) + self.assertEqual(2, m_dhcpv4.call_count) + self.assertEqual(2, m_imds.call_count) self.assertEqual(2, m_link_up.call_count) @mock.patch("cloudinit.url_helper.time.sleep", autospec=True) From ed832b5bd06c7ff0ddea31acc68e741659d3fbe7 Mon Sep 17 00:00:00 2001 From: Anh Vo Date: Mon, 18 Apr 2022 09:02:34 -0700 Subject: [PATCH 2/2] fixed unit test --- tests/unittests/sources/test_azure.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unittests/sources/test_azure.py b/tests/unittests/sources/test_azure.py index c9a82fca0a6..21e2201260e 100644 --- a/tests/unittests/sources/test_azure.py +++ b/tests/unittests/sources/test_azure.py @@ -3075,7 +3075,7 @@ def test_wait_for_nic_attach_multinic_attach( m_link_up.reset_mock() m_attach.side_effect = ["eth0", "eth1"] m_imds.reset_mock() - m_imds.side_effect = [None, md] + m_imds.side_effect = [{}, md] dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths) dsa._wait_for_all_nics_ready() self.assertEqual(1, m_detach.call_count)