From c830a7da859bdc1c1285b5c5dd8375f156b41984 Mon Sep 17 00:00:00 2001 From: Chris Patterson Date: Tue, 10 Feb 2026 01:29:09 +0000 Subject: [PATCH 1/2] feat(azure): introduce experimental skip_ready_report for Azure There are some scenarios where we wish to skip cloud-init's health ready reporting for Azure when it completes init-local phase. Prefix it with experimental_ so it's clear it's not supported. We can rename and document it in the future if we want to keep it. There are also some edge cases that aren't fully supported as-is as this process skips fetching ssh keys from wireserver in the cases that require it (e.g. CRLF or x509 certs). To test, configure this flag in VM/image: ```yaml datasource: Azure: experimental_skip_ready_report: true ``` Verify behavior in logs: ```bash $ grep ready_report /var/log/cloud-init.log 2026-02-27 17:57:53,058 - DataSourceAzure.py[DEBUG]: Skipping final health report as experimental_skip_ready_report is enabled. ``` Signed-off-by: Chris Patterson --- cloudinit/sources/DataSourceAzure.py | 34 +++-- tests/unittests/sources/test_azure.py | 207 ++++++++++++++++++++++++++ 2 files changed, 228 insertions(+), 13 deletions(-) diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 261993fe1ce..299f2c781c5 100644 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -292,6 +292,7 @@ def get_resource_disk_on_freebsd(port_id) -> Optional[str]: "disk_aliases": {"ephemeral0": RESOURCE_DISK_PATH}, "apply_network_config": True, # Use IMDS published network configuration "apply_network_config_for_secondary_ips": True, # Configure secondary ips + "experimental_skip_ready_report": False, # Skip final ready report } BUILTIN_CLOUD_EPHEMERAL_DISK_CONFIG = { @@ -838,21 +839,28 @@ def crawl_metadata(self): crawled_data["metadata"]["instance-id"] = self._iid() if self._negotiated is False and self._is_ephemeral_networking_up(): - # Report ready and fetch public-keys from Wireserver, if required. - pubkey_info = self._determine_wireserver_pubkey_info( - cfg=cfg, imds_md=imds_md - ) - try: - ssh_keys = self._report_ready(pubkey_info=pubkey_info) - except Exception: - # Failed to report ready, but continue with best effort. - pass + if self.ds_cfg.get("experimental_skip_ready_report", False): + LOG.debug( + "Skipping final health report as " + "experimental_skip_ready_report is enabled." + ) else: - LOG.debug("negotiating returned %s", ssh_keys) - if ssh_keys: - crawled_data["metadata"]["public-keys"] = ssh_keys + # Report ready and fetch public-keys from Wireserver, + # if required. + pubkey_info = self._determine_wireserver_pubkey_info( + cfg=cfg, imds_md=imds_md + ) + try: + ssh_keys = self._report_ready(pubkey_info=pubkey_info) + except Exception: + # Failed to report ready, but continue with best effort. + pass + else: + LOG.debug("negotiating returned %s", ssh_keys) + if ssh_keys: + crawled_data["metadata"]["public-keys"] = ssh_keys - self._cleanup_markers() + self._cleanup_markers() return crawled_data diff --git a/tests/unittests/sources/test_azure.py b/tests/unittests/sources/test_azure.py index 67373e1ef1e..7dcb088df72 100644 --- a/tests/unittests/sources/test_azure.py +++ b/tests/unittests/sources/test_azure.py @@ -5212,6 +5212,213 @@ def test_os_disk_pps(self, mock_sleep, subp_side_effect): assert len(self.mock_kvp_report_via_kvp.mock_calls) == 1 assert len(self.mock_kvp_report_success_to_host.mock_calls) == 1 + @pytest.mark.parametrize("pps_type", ["None", "Running", "Savable"]) + def test_skip_ready_report(self, pps_type): + """Verify ready report is skipped when experimental_skip_ready_report=True.""" + self.azure_ds.ds_cfg["experimental_skip_ready_report"] = True + + is_pps = pps_type in ("Running", "Savable") + + imds_md_source = copy.deepcopy(self.imds_md) + imds_md_source["extended"]["compute"]["ppsType"] = pps_type + + nl_sock = mock.MagicMock() + self.mock_netlink.create_bound_netlink_socket.return_value = nl_sock + if pps_type == "Savable": + self.mock_netlink.wait_for_nic_detach_event.return_value = "eth9" + self.mock_netlink.wait_for_nic_attach_event.return_value = ( + "ethAttached1" + ) + + if is_pps: + self.mock_readurl.side_effect = [ + mock.MagicMock(contents=json.dumps(imds_md_source).encode()), + mock.MagicMock( + contents=construct_ovf_env( + provision_guest_proxy_agent=False + ).encode() + ), + mock.MagicMock(contents=json.dumps(self.imds_md).encode()), + ] + else: + ovf = construct_ovf_env(provision_guest_proxy_agent=False) + md, ud, cfg = dsaz.read_azure_ovf(ovf) + self.mock_util_mount_cb.return_value = (md, ud, cfg, {}) + self.mock_readurl.side_effect = [ + mock.MagicMock(contents=json.dumps(self.imds_md).encode()), + ] + + self.mock_azure_get_metadata_from_fabric.return_value = [] + + self.azure_ds._check_and_get_data() + + assert self.mock_subp_subp.mock_calls == [] + + # Verify IMDS calls. + if is_pps: + assert self.mock_readurl.mock_calls == [ + mock.call( + "http://169.254.169.254/metadata/instance?" + "api-version=2021-08-01&extended=true", + exception_cb=mock.ANY, + headers_cb=imds.headers_cb, + infinite=True, + log_req_resp=True, + timeout=30, + ), + mock.call( + "http://169.254.169.254/metadata/reprovisiondata?" + "api-version=2019-06-01", + exception_cb=mock.ANY, + headers_cb=imds.headers_cb, + log_req_resp=False, + infinite=True, + timeout=30, + ), + mock.call( + "http://169.254.169.254/metadata/instance?" + "api-version=2021-08-01&extended=true", + exception_cb=mock.ANY, + headers_cb=imds.headers_cb, + infinite=True, + log_req_resp=True, + timeout=30, + ), + ] + else: + assert self.mock_readurl.mock_calls == [ + mock.call( + "http://169.254.169.254/metadata/instance?" + "api-version=2021-08-01&extended=true", + timeout=30, + headers_cb=imds.headers_cb, + exception_cb=mock.ANY, + infinite=True, + log_req_resp=True, + ), + ] + + # Verify DHCP setup. + if pps_type == "Running": + assert ( + self.mock_wrapping_setup_ephemeral_networking.mock_calls + == [ + mock.call(timeout_minutes=20), + mock.call(timeout_minutes=5), + ] + ) + assert ( + self.mock_net_dhcp_maybe_perform_dhcp_discovery.mock_calls + == [ + mock.call(self.azure_ds.distro, None, dsaz.dhcp_log_cb), + mock.call(self.azure_ds.distro, None, dsaz.dhcp_log_cb), + ] + ) + elif pps_type == "Savable": + assert ( + self.mock_wrapping_setup_ephemeral_networking.mock_calls + == [ + mock.call(timeout_minutes=20), + mock.call( + iface="ethAttached1", + timeout_minutes=20, + report_failure_if_not_primary=False, + ), + ] + ) + assert ( + self.mock_net_dhcp_maybe_perform_dhcp_discovery.mock_calls + == [ + mock.call(self.azure_ds.distro, None, dsaz.dhcp_log_cb), + mock.call( + self.azure_ds.distro, + "ethAttached1", + dsaz.dhcp_log_cb, + ), + ] + ) + else: + assert ( + self.mock_wrapping_setup_ephemeral_networking.mock_calls + == [mock.call(timeout_minutes=20)] + ) + assert ( + self.mock_net_dhcp_maybe_perform_dhcp_discovery.mock_calls + == [ + mock.call(self.azure_ds.distro, None, dsaz.dhcp_log_cb), + ] + ) + + assert self.azure_ds._wireserver_endpoint == "10.11.12.13" + assert self.azure_ds._is_ephemeral_networking_up() is False + + # Verify DMI usage. + assert self.mock_dmi_read_dmi_data.mock_calls == [ + mock.call("chassis-asset-tag"), + mock.call("system-uuid"), + ] + assert ( + self.azure_ds.metadata["instance-id"] + == "50109936-ef07-47fe-ac82-890c853f60d5" + ) + + # Verify IMDS metadata. + assert self.azure_ds.metadata["imds"] == self.imds_md + + # PPS types still report ready once (source), no-PPS skips entirely. + if is_pps: + assert self.mock_azure_get_metadata_from_fabric.mock_calls == [ + mock.call( + endpoint="10.11.12.13", + distro=self.azure_ds.distro, + iso_dev="/dev/sr0", + pubkey_info=None, + ), + ] + else: + assert self.mock_azure_get_metadata_from_fabric.mock_calls == [] + + # Verify netlink operations. + if pps_type == "Running": + assert self.mock_netlink.mock_calls == [ + mock.call.create_bound_netlink_socket(), + mock.call.wait_for_media_disconnect_connect( + mock.ANY, "ethBoot0" + ), + mock.call.create_bound_netlink_socket().close(), + ] + elif pps_type == "Savable": + assert self.mock_netlink.mock_calls == [ + mock.call.create_bound_netlink_socket(), + mock.call.wait_for_nic_detach_event(nl_sock), + mock.call.wait_for_nic_attach_event(nl_sock, ["ethAttached1"]), + mock.call.create_bound_netlink_socket().close(), + ] + else: + assert self.mock_netlink.mock_calls == [] + + # Verify reported_ready marker cleaned up. + if is_pps: + assert self.wrapped_util_write_file.mock_calls[0] == mock.call( + self.patched_reported_ready_marker_path.as_posix(), + mock.ANY, + ) + else: + assert self.wrapped_util_write_file.mock_calls == [] + assert self.patched_reported_ready_marker_path.exists() is False + + # Verify KVP reports. + assert not self.mock_kvp_report_via_kvp.mock_calls + assert not self.mock_azure_report_failure_to_fabric.mock_calls + expected_kvp_count = 1 if is_pps else 0 + assert ( + len(self.mock_kvp_report_success_to_host.mock_calls) + == expected_kvp_count + ) + assert ( + len(self.mock_report_dmesg_to_kvp.mock_calls) == expected_kvp_count + ) + def test_imds_failure_results_in_provisioning_failure(self): self.mock_readurl.side_effect = url_helper.UrlError( requests.ConnectionError( From 513dacc4d556da3a1180a0bb93f87e03681e2ed5 Mon Sep 17 00:00:00 2001 From: Chris Patterson Date: Fri, 27 Feb 2026 18:25:31 +0000 Subject: [PATCH 2/2] fix lint --- tests/unittests/sources/test_azure.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unittests/sources/test_azure.py b/tests/unittests/sources/test_azure.py index 7dcb088df72..06bbff5d871 100644 --- a/tests/unittests/sources/test_azure.py +++ b/tests/unittests/sources/test_azure.py @@ -5214,7 +5214,7 @@ def test_os_disk_pps(self, mock_sleep, subp_side_effect): @pytest.mark.parametrize("pps_type", ["None", "Running", "Savable"]) def test_skip_ready_report(self, pps_type): - """Verify ready report is skipped when experimental_skip_ready_report=True.""" + """Verify ready report is skipped when configured to.""" self.azure_ds.ds_cfg["experimental_skip_ready_report"] = True is_pps = pps_type in ("Running", "Savable")