diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 222f99f2013..44efd358666 100755 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -6,6 +6,7 @@ import base64 import crypt +import datetime import os import os.path import re @@ -24,15 +25,16 @@ from cloudinit import net, sources, ssh_util, subp, util from cloudinit.event import EventScope, EventType from cloudinit.net import device_driver -from cloudinit.net.dhcp import NoDHCPLeaseError +from cloudinit.net.dhcp import EphemeralDHCPv4, NoDHCPLeaseError from cloudinit.reporting import events from cloudinit.sources.helpers import netlink from cloudinit.sources.helpers.azure import ( DEFAULT_REPORT_FAILURE_USER_VISIBLE_MESSAGE, - EphemeralDHCPv4WithReporting, + DEFAULT_WIRESERVER_ENDPOINT, azure_ds_reporter, azure_ds_telemetry_reporter, build_minimal_ovf, + dhcp_log_cb, get_boot_telemetry, get_metadata_from_fabric, get_system_info, @@ -303,6 +305,7 @@ def __init__(self, sys_cfg, distro, paths): self.dhclient_lease_file = self.ds_cfg.get("dhclient_lease_file") self._network_config = None self._ephemeral_dhcp_ctx = None + self._wireserver_endpoint = DEFAULT_WIRESERVER_ENDPOINT self.iso_dev = None def _unpickle(self, ci_pkl_version: int) -> None: @@ -311,6 +314,7 @@ def _unpickle(self, ci_pkl_version: int) -> None: self._ephemeral_dhcp_ctx = None if not hasattr(self, "iso_dev"): self.iso_dev = None + self._wireserver_endpoint = DEFAULT_WIRESERVER_ENDPOINT def __str__(self): root = sources.DataSource.__str__(self) @@ -326,6 +330,76 @@ def _get_subplatform(self): subplatform_type = "seed-dir" return "%s (%s)" % (subplatform_type, self.seed) + @azure_ds_telemetry_reporter + def _setup_ephemeral_networking( + self, *, iface: Optional[str] = None, timeout_minutes: int = 5 + ) -> None: + """Setup ephemeral networking. + + Keep retrying DHCP up to specified number of minutes. This does + not kill dhclient, so the timeout in practice may be up to + timeout_minutes + the system-configured timeout for dhclient. + + :param timeout_minutes: Number of minutes to keep retrying for. + + :raises NoDHCPLeaseError: If unable to obtain DHCP lease. + """ + if self._ephemeral_dhcp_ctx is not None: + raise RuntimeError( + "Bringing up networking when already configured." + ) + + LOG.debug("Requested ephemeral networking (iface=%s)", iface) + + start = datetime.datetime.utcnow() + timeout = start + datetime.timedelta(minutes=timeout_minutes) + + self._ephemeral_dhcp_ctx = EphemeralDHCPv4( + iface=iface, dhcp_log_func=dhcp_log_cb + ) + + lease = None + with events.ReportEventStack( + name="obtain-dhcp-lease", + description="obtain dhcp lease", + parent=azure_ds_reporter, + ): + while datetime.datetime.utcnow() < timeout: + try: + lease = self._ephemeral_dhcp_ctx.obtain_lease() + break + except NoDHCPLeaseError: + continue + + if lease is None: + msg = "Failed to obtain DHCP lease (iface=%s)" % iface + report_diagnostic_event(msg, logger_func=LOG.error) + self._ephemeral_dhcp_ctx = None + raise NoDHCPLeaseError() + else: + # Ensure iface is set. + self._ephemeral_dhcp_ctx.iface = lease["interface"] + + # Update wireserver IP from DHCP options. + if "unknown-245" in lease: + self._wireserver_endpoint = lease["unknown-245"] + + @azure_ds_telemetry_reporter + def _teardown_ephemeral_networking(self) -> None: + """Teardown ephemeral networking.""" + if self._ephemeral_dhcp_ctx is None: + return + + self._ephemeral_dhcp_ctx.clean_network() + self._ephemeral_dhcp_ctx = None + + def _is_ephemeral_networking_up(self) -> bool: + """Check if networking is configured.""" + return not ( + self._ephemeral_dhcp_ctx is None + or self._ephemeral_dhcp_ctx.lease is None + ) + @azure_ds_telemetry_reporter def crawl_metadata(self): """Walk all instance metadata sources returning a dict on success. @@ -350,6 +424,8 @@ def crawl_metadata(self): userdata_raw = "" cfg = {} files = {} + + iso_dev = None if os.path.isfile(REPROVISION_MARKER_FILE): metadata_source = "IMDS" report_diagnostic_event( @@ -370,7 +446,7 @@ def crawl_metadata(self): src, load_azure_ds_dir ) # save the device for ejection later - self.iso_dev = src + iso_dev = src else: md, userdata_raw, cfg, files = load_azure_ds_dir(src) ovf_is_accessible = True @@ -400,19 +476,31 @@ def crawl_metadata(self): logger_func=LOG.debug, ) - imds_md = self.get_imds_data_with_api_fallback( - self.fallback_interface, retries=10 - ) + # If we read OVF from attached media, we are provisioning. If OVF + # is not found, we are probably provisioning on a system which does + # not have UDF support. In either case, require IMDS metadata. + # If we require IMDS metadata, try harder to obtain networking, waiting + # for at least 20 minutes. Otherwise only wait 5 minutes. + requires_imds_metadata = bool(iso_dev) or not ovf_is_accessible + timeout_minutes = 5 if requires_imds_metadata else 20 + try: + self._setup_ephemeral_networking(timeout_minutes=timeout_minutes) + except NoDHCPLeaseError: + pass - # reset _fallback_interface so that if the code enters reprovisioning - # flow, it will force re-evaluation of new fallback nic. - self._fallback_interface = None + if self._is_ephemeral_networking_up(): + imds_md = self.get_imds_data_with_api_fallback(retries=10) + else: + imds_md = {} if not imds_md and not ovf_is_accessible: msg = "No OVF or IMDS available" report_diagnostic_event(msg) raise sources.InvalidMetaDataException(msg) + self.iso_dev = iso_dev + + # Refresh PPS type using metadata. pps_type = self._determine_pps_type(cfg, imds_md) if pps_type != PPSType.NONE: if util.is_FreeBSD(): @@ -427,9 +515,7 @@ def crawl_metadata(self): md, userdata_raw, cfg, files = self._reprovision() # fetch metadata again as it has changed after reprovisioning - imds_md = self.get_imds_data_with_api_fallback( - self.fallback_interface, retries=10 - ) + imds_md = self.get_imds_data_with_api_fallback(retries=10) self.seed = metadata_source crawled_data.update( @@ -509,26 +595,8 @@ def crawl_metadata(self): if pps_type != PPSType.NONE: LOG.info("Reporting ready to Azure after getting ReprovisionData") - use_cached_ephemeral = ( - self.distro.networking.is_up(self.fallback_interface) - and self._ephemeral_dhcp_ctx - and self._ephemeral_dhcp_ctx.lease - ) - if use_cached_ephemeral: - self._report_ready(lease=self._ephemeral_dhcp_ctx.lease) - self._ephemeral_dhcp_ctx.clean_network() # Teardown ephemeral - else: - try: - with EphemeralDHCPv4WithReporting( - azure_ds_reporter - ) as lease: - self._report_ready(lease=lease) - except Exception as e: - report_diagnostic_event( - "exception while reporting ready: %s" % e, - logger_func=LOG.error, - ) - raise + self._report_ready() + return crawled_data def _is_platform_viable(self): @@ -575,6 +643,8 @@ def _get_data(self): description=DEFAULT_REPORT_FAILURE_USER_VISIBLE_MESSAGE ) return False + finally: + self._teardown_ephemeral_networking() if ( self.distro @@ -626,7 +696,7 @@ def _get_data(self): @azure_ds_telemetry_reporter def get_imds_data_with_api_fallback( self, - fallback_nic, + *, retries, md_type=MetadataType.ALL, exc_cb=retry_on_url_exc, @@ -643,7 +713,6 @@ def get_imds_data_with_api_fallback( try: LOG.info("Attempting IMDS api-version: %s", IMDS_VER_WANT) return get_metadata_from_imds( - fallback_nic=fallback_nic, retries=0, md_type=md_type, api_version=IMDS_VER_WANT, @@ -660,7 +729,6 @@ def get_imds_data_with_api_fallback( LOG.info("Using IMDS api-version: %s", IMDS_VER_MIN) return get_metadata_from_imds( - fallback_nic=fallback_nic, retries=retries, md_type=md_type, api_version=IMDS_VER_MIN, @@ -891,12 +959,12 @@ def _create_report_ready_marker(self): ) @azure_ds_telemetry_reporter - def _report_ready_for_pps(self, lease: dict) -> None: + def _report_ready_for_pps(self) -> None: """Report ready for PPS, creating the marker file upon completion. :raises sources.InvalidMetaDataException: On error reporting ready. """ - report_ready_succeeded = self._report_ready(lease=lease) + report_ready_succeeded = self._report_ready() if not report_ready_succeeded: msg = "Failed reporting ready while in the preprovisioning pool." report_diagnostic_event(msg, logger_func=LOG.error) @@ -919,22 +987,8 @@ def _check_if_nic_is_primary(self, ifname): # For now, only a VM's primary NIC can contact IMDS and WireServer. If # DHCP fails for a NIC, we have no mechanism to determine if the NIC is - # primary or secondary. In this case, the desired behavior is to fail - # VM provisioning if there is any DHCP failure when trying to determine - # the primary NIC. - try: - dhcp_ctx = EphemeralDHCPv4WithReporting( - azure_ds_reporter, iface=ifname - ) - dhcp_ctx.obtain_lease() - except Exception as e: - report_diagnostic_event( - "Giving up. Failed to obtain dhcp lease " - "for %s when attempting to determine " - "primary NIC during reprovision due to %s" % (ifname, e), - logger_func=LOG.error, - ) - raise + # primary or secondary. In this case, retry DHCP until successful. + self._setup_ephemeral_networking(iface=ifname, timeout_minutes=20) # Retry polling network metadata for a limited duration only when the # calls fail due to network unreachable error or timeout. @@ -982,7 +1036,10 @@ def network_metadata_exc_cb(msg, exc): # could add several seconds of delay. try: imds_md = self.get_imds_data_with_api_fallback( - ifname, 0, MetadataType.NETWORK, network_metadata_exc_cb, True + retries=0, + md_type=MetadataType.NETWORK, + exc_cb=network_metadata_exc_cb, + infinite=True, ) except Exception as e: LOG.warning( @@ -992,25 +1049,21 @@ def network_metadata_exc_cb(msg, exc): ifname, e, ) - finally: - # If we are not the primary nic, then clean the dhcp context. - if not imds_md: - dhcp_ctx.clean_network() if imds_md: # Only primary NIC will get a response from IMDS. LOG.info("%s is the primary nic", ifname) is_primary = True - # If primary, set ephemeral dhcp ctx so we can report ready - self._ephemeral_dhcp_ctx = dhcp_ctx - # Set the expected nic count based on the response received. expected_nic_count = len(imds_md["interface"]) report_diagnostic_event( "Expected nic count: %d" % expected_nic_count, logger_func=LOG.info, ) + else: + # If we are not the primary nic, then clean the dhcp context. + self._teardown_ephemeral_networking() return is_primary, expected_nic_count @@ -1097,19 +1150,7 @@ def _wait_for_all_nics_ready(self): # The nic of the preprovisioned vm gets hot-detached as soon as # we report ready. So no need to save the dhcp context. if not os.path.isfile(REPORTED_READY_MARKER_FILE): - try: - with EphemeralDHCPv4WithReporting( - azure_ds_reporter - ) as lease: - self._report_ready_for_pps(lease) - except NoDHCPLeaseError as error: - report_diagnostic_event( - "DHCP failed while in provisioning pool", - logger_func=LOG.warning, - ) - raise sources.InvalidMetaDataException( - "Failed to report ready while in provisioning pool." - ) from error + self._report_ready_for_pps() has_nic_been_detached = bool( os.path.isfile(REPROVISION_NIC_DETACHED_MARKER_FILE) @@ -1117,6 +1158,7 @@ def _wait_for_all_nics_ready(self): if not has_nic_been_detached: LOG.info("NIC has not been detached yet.") + self._teardown_ephemeral_networking() self._wait_for_nic_detach(nl_sock) # If we know that the preprovisioned nic has been detached, and we @@ -1193,31 +1235,23 @@ def exc_cb(msg, exception): ) return False - # When the interface is hot-attached, we would have already - # done dhcp and set the dhcp context. In that case, skip - # the attempt to do dhcp. - msg = ( - "Unexpected error. Dhcp context is not expected to be already " - "set when we need to wait for vnet switch" - ) - if self._ephemeral_dhcp_ctx is not None and report_ready: - report_diagnostic_event(msg, logger_func=LOG.error) - raise RuntimeError(msg) - if report_ready: + # Networking must be up for netlink to detect + # media disconnect/connect. It may be down to due + # initial DHCP failure, if so check for it and retry, + # ensuring we flag it as required. + if not self._is_ephemeral_networking_up(): + self._setup_ephemeral_networking(timeout_minutes=20) + try: - self._ephemeral_dhcp_ctx = EphemeralDHCPv4WithReporting( - azure_ds_reporter - ) - lease = self._ephemeral_dhcp_ctx.obtain_lease() + iface = self._ephemeral_dhcp_ctx.iface + nl_sock = netlink.create_bound_netlink_socket() - self._report_ready_for_pps(lease) + self._report_ready_for_pps() - # Networking must remain up for netlink to detect - # media disconnect/connect. LOG.debug( "Wait for vnetswitch to happen on %s", - lease["interface"], + iface, ) with events.ReportEventStack( name="wait-for-media-disconnect-connect", @@ -1226,7 +1260,7 @@ def exc_cb(msg, exception): ): try: netlink.wait_for_media_disconnect_connect( - nl_sock, lease["interface"] + nl_sock, iface ) except AssertionError as e: report_diagnostic_event( @@ -1254,17 +1288,13 @@ def exc_cb(msg, exception): nl_sock.close() # Teardown old network configuration. - self._ephemeral_dhcp_ctx.clean_network() - self._ephemeral_dhcp_ctx = None + self._teardown_ephemeral_networking() while not reprovision_data: - if self._ephemeral_dhcp_ctx is None: - self._ephemeral_dhcp_ctx = EphemeralDHCPv4WithReporting( - azure_ds_reporter - ) + if not self._is_ephemeral_networking_up(): dhcp_attempts += 1 try: - self._ephemeral_dhcp_ctx.obtain_lease() + self._setup_ephemeral_networking(timeout_minutes=5) except NoDHCPLeaseError: continue @@ -1283,8 +1313,7 @@ def exc_cb(msg, exception): log_req_resp=False, ).contents except UrlError: - self._ephemeral_dhcp_ctx.clean_network() - self._ephemeral_dhcp_ctx = None + self._teardown_ephemeral_networking() continue report_diagnostic_event( @@ -1305,42 +1334,39 @@ def _report_failure(self, description: Optional[str] = None) -> bool: @param description: A description of the error encountered. @return: The success status of sending the failure signal. """ - unknown_245_key = "unknown-245" - - try: - if ( - self.distro.networking.is_up(self.fallback_interface) - and self._ephemeral_dhcp_ctx - and self._ephemeral_dhcp_ctx.lease - and unknown_245_key in self._ephemeral_dhcp_ctx.lease - ): + if self._is_ephemeral_networking_up(): + try: report_diagnostic_event( "Using cached ephemeral dhcp context " "to report failure to Azure", logger_func=LOG.debug, ) report_failure_to_fabric( - dhcp_opts=self._ephemeral_dhcp_ctx.lease[unknown_245_key], + dhcp_opts=self._wireserver_endpoint, description=description, ) - self._ephemeral_dhcp_ctx.clean_network() # Teardown ephemeral return True - except Exception as e: - report_diagnostic_event( - "Failed to report failure using " - "cached ephemeral dhcp context: %s" % e, - logger_func=LOG.error, - ) + except Exception as e: + report_diagnostic_event( + "Failed to report failure using " + "cached ephemeral dhcp context: %s" % e, + logger_func=LOG.error, + ) try: report_diagnostic_event( "Using new ephemeral dhcp to report failure to Azure", logger_func=LOG.debug, ) - with EphemeralDHCPv4WithReporting(azure_ds_reporter) as lease: - report_failure_to_fabric( - dhcp_opts=lease[unknown_245_key], description=description - ) + self._teardown_ephemeral_networking() + try: + self._setup_ephemeral_networking(timeout_minutes=20) + except NoDHCPLeaseError: + # Reporting failure will fail, but it will emit telemetry. + pass + report_failure_to_fabric( + dhcp_opts=self._wireserver_endpoint, description=description + ) return True except Exception as e: report_diagnostic_event( @@ -1350,16 +1376,15 @@ def _report_failure(self, description: Optional[str] = None) -> bool: return False - def _report_ready(self, lease: dict) -> bool: + def _report_ready(self) -> bool: """Tells the fabric provisioning has completed. - @param lease: dhcp lease to use for sending the ready signal. @return: The success status of sending the ready signal. """ try: get_metadata_from_fabric( fallback_lease_file=None, - dhcp_opts=lease["unknown-245"], + dhcp_opts=self._wireserver_endpoint, iso_dev=self.iso_dev, ) return True @@ -1371,7 +1396,7 @@ def _report_ready(self, lease: dict) -> bool: ) return False - def _ppstype_from_imds(self, imds_md: dict = None) -> str: + def _ppstype_from_imds(self, imds_md: dict) -> Optional[str]: try: return imds_md["extended"]["compute"]["ppsType"] except Exception as e: @@ -1416,7 +1441,10 @@ def _write_reprovision_marker(self): ) def _reprovision(self): - """Initiate the reprovisioning workflow.""" + """Initiate the reprovisioning workflow. + + Ephemeral networking is up upon successful reprovisioning. + """ contents = self._poll_imds() with events.ReportEventStack( name="reprovisioning-read-azure-ovf", @@ -2206,7 +2234,6 @@ def _generate_network_config_from_fallback_config() -> dict: @azure_ds_telemetry_reporter def get_metadata_from_imds( - fallback_nic, retries, md_type=MetadataType.ALL, api_version=IMDS_VER_MIN, @@ -2215,12 +2242,9 @@ def get_metadata_from_imds( ): """Query Azure's instance metadata service, returning a dictionary. - If network is not up, setup ephemeral dhcp on fallback_nic to talk to the - IMDS. For more info on IMDS: + For more info on IMDS: https://docs.microsoft.com/en-us/azure/virtual-machines/windows/instance-metadata-service - @param fallback_nic: String. The name of the nic which requires active - network in order to query IMDS. @param retries: The number of retries of the IMDS_URL. @param md_type: Metadata type for IMDS request. @param api_version: IMDS api-version to use in the request. @@ -2234,18 +2258,14 @@ def get_metadata_from_imds( "func": _get_metadata_from_imds, "args": (retries, exc_cb, md_type, api_version, infinite), } - if net.is_up(fallback_nic): + try: return util.log_time(**kwargs) - else: - try: - with EphemeralDHCPv4WithReporting(azure_ds_reporter, fallback_nic): - return util.log_time(**kwargs) - except Exception as e: - report_diagnostic_event( - "exception while getting metadata: %s" % e, - logger_func=LOG.warning, - ) - raise + except Exception as e: + report_diagnostic_event( + "exception while getting metadata: %s" % e, + logger_func=LOG.warning, + ) + raise @azure_ds_telemetry_reporter @@ -2295,7 +2315,8 @@ def _get_metadata_from_imds( except json_decode_error as e: report_diagnostic_event( "Ignoring non-json IMDS instance metadata response: %s. " - "Loading non-json IMDS response failed: %s" % (str(response), e), + "Loading non-json IMDS response failed: %s" + % (response.contents, e), logger_func=LOG.warning, ) return {} diff --git a/cloudinit/sources/helpers/azure.py b/cloudinit/sources/helpers/azure.py index e0a1abb69ec..1a8cd34f22f 100755 --- a/cloudinit/sources/helpers/azure.py +++ b/cloudinit/sources/helpers/azure.py @@ -25,7 +25,6 @@ version, ) from cloudinit.net import dhcp -from cloudinit.net.dhcp import EphemeralDHCPv4 from cloudinit.reporting import events from cloudinit.settings import CFG_BUILTIN @@ -215,6 +214,7 @@ def report_diagnostic_event( msg: str, *, logger_func=None ) -> events.ReportingEvent: """Report a diagnostic event""" + print(msg) if callable(logger_func): logger_func(msg) evt = events.ReportingEvent( @@ -1243,21 +1243,4 @@ def dhcp_log_cb(out, err): ) -class EphemeralDHCPv4WithReporting(EphemeralDHCPv4): - def __init__(self, reporter, iface=None): - self.reporter = reporter - - super(EphemeralDHCPv4WithReporting, self).__init__( - iface=iface, dhcp_log_func=dhcp_log_cb - ) - - def __enter__(self): - with events.ReportEventStack( - name="obtain-dhcp-lease", - description="obtain dhcp lease", - parent=self.reporter, - ): - return super(EphemeralDHCPv4WithReporting, self).__enter__() - - # vi: ts=4 expandtab diff --git a/tests/unittests/sources/test_azure.py b/tests/unittests/sources/test_azure.py index 26fed1c4008..a6c43ea7c78 100644 --- a/tests/unittests/sources/test_azure.py +++ b/tests/unittests/sources/test_azure.py @@ -472,44 +472,15 @@ def setUp(self): dsaz.IMDS_URL ) - @mock.patch(MOCKPATH + "readurl") - @mock.patch(MOCKPATH + "EphemeralDHCPv4WithReporting", autospec=True) - @mock.patch(MOCKPATH + "net.is_up", autospec=True) - def test_get_metadata_does_not_dhcp_if_network_is_up( - self, m_net_is_up, m_dhcp, m_readurl - ): - """Do not perform DHCP setup when nic is already up.""" - m_net_is_up.return_value = True - m_readurl.return_value = url_helper.StringResponse( - json.dumps(NETWORK_METADATA).encode("utf-8") - ) - self.assertEqual( - NETWORK_METADATA, dsaz.get_metadata_from_imds("eth9", retries=3) - ) - - m_net_is_up.assert_called_with("eth9") - m_dhcp.assert_not_called() - self.assertIn( - "Crawl of Azure Instance Metadata Service (IMDS) took", # log_time - self.logs.getvalue(), - ) - @mock.patch(MOCKPATH + "readurl", autospec=True) - @mock.patch(MOCKPATH + "EphemeralDHCPv4WithReporting", autospec=True) - @mock.patch(MOCKPATH + "net.is_up") - def test_get_metadata_uses_instance_url( - self, m_net_is_up, m_dhcp, m_readurl - ): + def test_get_metadata_uses_instance_url(self, m_readurl): """Make sure readurl is called with the correct url when accessing metadata""" - m_net_is_up.return_value = True m_readurl.return_value = url_helper.StringResponse( json.dumps(IMDS_NETWORK_METADATA).encode("utf-8") ) - dsaz.get_metadata_from_imds( - "eth0", retries=3, md_type=dsaz.MetadataType.ALL - ) + dsaz.get_metadata_from_imds(retries=3, md_type=dsaz.MetadataType.ALL) m_readurl.assert_called_with( "http://169.254.169.254/metadata/instance?api-version=2019-06-01", exception_cb=mock.ANY, @@ -520,20 +491,15 @@ def test_get_metadata_uses_instance_url( ) @mock.patch(MOCKPATH + "readurl", autospec=True) - @mock.patch(MOCKPATH + "EphemeralDHCPv4WithReporting", autospec=True) - @mock.patch(MOCKPATH + "net.is_up") - def test_get_network_metadata_uses_network_url( - self, m_net_is_up, m_dhcp, m_readurl - ): + def test_get_network_metadata_uses_network_url(self, m_readurl): """Make sure readurl is called with the correct url when accessing network metadata""" - m_net_is_up.return_value = True m_readurl.return_value = url_helper.StringResponse( json.dumps(IMDS_NETWORK_METADATA).encode("utf-8") ) dsaz.get_metadata_from_imds( - "eth0", retries=3, md_type=dsaz.MetadataType.NETWORK + retries=3, md_type=dsaz.MetadataType.NETWORK ) m_readurl.assert_called_with( "http://169.254.169.254/metadata/instance/network?api-version=" @@ -546,19 +512,15 @@ def test_get_network_metadata_uses_network_url( ) @mock.patch(MOCKPATH + "readurl", autospec=True) - @mock.patch(MOCKPATH + "EphemeralDHCPv4WithReporting", autospec=True) - @mock.patch(MOCKPATH + "net.is_up") - def test_get_default_metadata_uses_instance_url( - self, m_net_is_up, m_dhcp, m_readurl - ): + @mock.patch(MOCKPATH + "EphemeralDHCPv4", autospec=True) + def test_get_default_metadata_uses_instance_url(self, m_dhcp, m_readurl): """Make sure readurl is called with the correct url when accessing metadata""" - m_net_is_up.return_value = True m_readurl.return_value = url_helper.StringResponse( json.dumps(IMDS_NETWORK_METADATA).encode("utf-8") ) - dsaz.get_metadata_from_imds("eth0", retries=3) + dsaz.get_metadata_from_imds(retries=3) m_readurl.assert_called_with( "http://169.254.169.254/metadata/instance?api-version=2019-06-01", exception_cb=mock.ANY, @@ -569,20 +531,14 @@ def test_get_default_metadata_uses_instance_url( ) @mock.patch(MOCKPATH + "readurl", autospec=True) - @mock.patch(MOCKPATH + "EphemeralDHCPv4WithReporting", autospec=True) - @mock.patch(MOCKPATH + "net.is_up") - def test_get_metadata_uses_extended_url( - self, m_net_is_up, m_dhcp, m_readurl - ): + def test_get_metadata_uses_extended_url(self, m_readurl): """Make sure readurl is called with the correct url when accessing metadata""" - m_net_is_up.return_value = True m_readurl.return_value = url_helper.StringResponse( json.dumps(IMDS_NETWORK_METADATA).encode("utf-8") ) dsaz.get_metadata_from_imds( - "eth0", retries=3, md_type=dsaz.MetadataType.ALL, api_version="2021-08-01", @@ -598,23 +554,16 @@ def test_get_metadata_uses_extended_url( ) @mock.patch(MOCKPATH + "readurl", autospec=True) - @mock.patch(MOCKPATH + "EphemeralDHCPv4WithReporting", autospec=True) - @mock.patch(MOCKPATH + "net.is_up", autospec=True) - def test_get_metadata_performs_dhcp_when_network_is_down( - self, m_net_is_up, m_dhcp, m_readurl - ): + def test_get_metadata_performs_dhcp_when_network_is_down(self, m_readurl): """Perform DHCP setup when nic is not up.""" - m_net_is_up.return_value = False m_readurl.return_value = url_helper.StringResponse( json.dumps(NETWORK_METADATA).encode("utf-8") ) self.assertEqual( - NETWORK_METADATA, dsaz.get_metadata_from_imds("eth9", retries=2) + NETWORK_METADATA, dsaz.get_metadata_from_imds(retries=2) ) - m_net_is_up.assert_called_with("eth9") - m_dhcp.assert_called_with(mock.ANY, "eth9") self.assertIn( "Crawl of Azure Instance Metadata Service (IMDS) took", # log_time self.logs.getvalue(), @@ -630,10 +579,7 @@ def test_get_metadata_performs_dhcp_when_network_is_down( ) @mock.patch("cloudinit.url_helper.time.sleep") - @mock.patch(MOCKPATH + "net.is_up", autospec=True) - def test_get_metadata_from_imds_empty_when_no_imds_present( - self, m_net_is_up, m_sleep - ): + def test_get_metadata_from_imds_empty_when_no_imds_present(self, m_sleep): """Return empty dict when IMDS network metadata is absent.""" httpretty.register_uri( httpretty.GET, @@ -642,11 +588,8 @@ def test_get_metadata_from_imds_empty_when_no_imds_present( status=404, ) - m_net_is_up.return_value = True # skips dhcp + self.assertEqual({}, dsaz.get_metadata_from_imds(retries=2)) - self.assertEqual({}, dsaz.get_metadata_from_imds("eth9", retries=2)) - - m_net_is_up.assert_called_with("eth9") self.assertEqual([mock.call(1), mock.call(1)], m_sleep.call_args_list) self.assertIn( "Crawl of Azure Instance Metadata Service (IMDS) took", # log_time @@ -655,9 +598,8 @@ def test_get_metadata_from_imds_empty_when_no_imds_present( @mock.patch("requests.Session.request") @mock.patch("cloudinit.url_helper.time.sleep") - @mock.patch(MOCKPATH + "net.is_up", autospec=True) def test_get_metadata_from_imds_retries_on_timeout( - self, m_net_is_up, m_sleep, m_request + self, m_sleep, m_request ): """Retry IMDS network metadata on timeout errors.""" @@ -674,11 +616,8 @@ def retry_callback(request, uri, headers): body=retry_callback, ) - m_net_is_up.return_value = True # skips dhcp - - self.assertEqual({}, dsaz.get_metadata_from_imds("eth9", retries=3)) + self.assertEqual({}, dsaz.get_metadata_from_imds(retries=3)) - m_net_is_up.assert_called_with("eth9") self.assertEqual([mock.call(1)] * 3, m_sleep.call_args_list) self.assertIn( "Crawl of Azure Instance Metadata Service (IMDS) took", # log_time @@ -710,10 +649,12 @@ def setUp(self): self.m_dhcp = self.patches.enter_context( mock.patch.object( dsaz, - "EphemeralDHCPv4WithReporting", + "EphemeralDHCPv4", autospec=True, ) ) + self.m_dhcp.return_value.lease = {} + self.m_dhcp.return_value.iface = "eth4" self.m_get_metadata_from_imds = self.patches.enter_context( mock.patch.object( @@ -1254,7 +1195,8 @@ def test_crawl_metadata_waits_for_nic_on_savable_vms( "cloudinit.sources.helpers.netlink.wait_for_media_disconnect_connect" ) @mock.patch( - "cloudinit.sources.DataSourceAzure.DataSourceAzure._report_ready" + "cloudinit.sources.DataSourceAzure.DataSourceAzure._report_ready", + return_value=True, ) @mock.patch("cloudinit.sources.DataSourceAzure.readurl") def test_crawl_metadata_on_reprovision_reports_ready_using_lease( @@ -1268,34 +1210,24 @@ def test_crawl_metadata_on_reprovision_reports_ready_using_lease( data = {"ovfcontent": ovfenv, "sys_cfg": {}} dsrc = self._get_ds(data) - with mock.patch.object( - dsrc.distro.networking, "is_up" - ) as m_dsrc_distro_networking_is_up: - - # For this mock, net should not be up, - # so that cached ephemeral won't be used. - # This is so that a NEW ephemeral dhcp lease will be discovered - # and used instead. - m_dsrc_distro_networking_is_up.return_value = False + lease = { + "interface": "eth9", + "fixed-address": "192.168.2.9", + "routers": "192.168.2.1", + "subnet-mask": "255.255.255.0", + "unknown-245": "624c3620", + } + self.m_dhcp.return_value.obtain_lease.return_value = lease + m_media_switch.return_value = None - lease = { - "interface": "eth9", - "fixed-address": "192.168.2.9", - "routers": "192.168.2.1", - "subnet-mask": "255.255.255.0", - "unknown-245": "624c3620", - } - self.m_dhcp.return_value.__enter__.return_value = lease - m_media_switch.return_value = None + reprovision_ovfenv = construct_valid_ovf_env() + m_readurl.return_value = url_helper.StringResponse( + reprovision_ovfenv.encode("utf-8") + ) - reprovision_ovfenv = construct_valid_ovf_env() - m_readurl.return_value = url_helper.StringResponse( - reprovision_ovfenv.encode("utf-8") - ) + dsrc.crawl_metadata() - dsrc.crawl_metadata() - self.assertEqual(2, m_report_ready.call_count) - m_report_ready.assert_called_with(lease=lease) + assert m_report_ready.mock_calls == [mock.call(), mock.call()] def test_waagent_d_has_0700_perms(self): # we expect /var/lib/waagent to be created 0700 @@ -1673,12 +1605,12 @@ def test_ovf_can_include_unicode(self): def test_dsaz_report_ready_returns_true_when_report_succeeds(self): dsrc = self._get_ds({"ovfcontent": construct_valid_ovf_env()}) - self.assertTrue(dsrc._report_ready(lease=mock.MagicMock())) + self.assertTrue(dsrc._report_ready()) def test_dsaz_report_ready_returns_false_and_does_not_propagate_exc(self): dsrc = self._get_ds({"ovfcontent": construct_valid_ovf_env()}) self.m_get_metadata_from_fabric.side_effect = Exception - self.assertFalse(dsrc._report_ready(lease=mock.MagicMock())) + self.assertFalse(dsrc._report_ready()) def test_dsaz_report_failure_returns_true_when_report_succeeds(self): dsrc = self._get_ds({"ovfcontent": construct_valid_ovf_env()}) @@ -1750,46 +1682,31 @@ def test_dsaz_report_failure_uses_cached_ephemeral_dhcp_ctx_lease(self): with mock.patch.object( dsrc, "crawl_metadata" ) as m_crawl_metadata, mock.patch.object( - dsrc, "_ephemeral_dhcp_ctx" - ) as m_ephemeral_dhcp_ctx, mock.patch.object( - dsrc.distro.networking, "is_up" - ) as m_dsrc_distro_networking_is_up: + dsrc, "_wireserver_endpoint", return_value="test-ep" + ) as m_wireserver_endpoint: # mock crawl metadata failure to cause report failure m_crawl_metadata.side_effect = Exception - # setup mocks to allow using cached ephemeral dhcp lease - m_dsrc_distro_networking_is_up.return_value = True - test_lease_dhcp_option_245 = "test_lease_dhcp_option_245" - test_lease = {"unknown-245": test_lease_dhcp_option_245} - m_ephemeral_dhcp_ctx.lease = test_lease - self.assertTrue(dsrc._report_failure()) # ensure called with cached ephemeral dhcp lease option 245 self.m_report_failure_to_fabric.assert_called_once_with( - description=mock.ANY, dhcp_opts=test_lease_dhcp_option_245 + description=mock.ANY, dhcp_opts=m_wireserver_endpoint ) - # ensure cached ephemeral is cleaned - self.assertEqual(1, m_ephemeral_dhcp_ctx.clean_network.call_count) - def test_dsaz_report_failure_no_net_uses_new_ephemeral_dhcp_lease(self): dsrc = self._get_ds({"ovfcontent": construct_valid_ovf_env()}) - with mock.patch.object( - dsrc, "crawl_metadata" - ) as m_crawl_metadata, mock.patch.object( - dsrc.distro.networking, "is_up" - ) as m_dsrc_distro_networking_is_up: + with mock.patch.object(dsrc, "crawl_metadata") as m_crawl_metadata: # mock crawl metadata failure to cause report failure m_crawl_metadata.side_effect = Exception - # net is not up and cannot use cached ephemeral dhcp - m_dsrc_distro_networking_is_up.return_value = False - # setup ephemeral dhcp lease discovery mock test_lease_dhcp_option_245 = "test_lease_dhcp_option_245" - test_lease = {"unknown-245": test_lease_dhcp_option_245} - self.m_dhcp.return_value.__enter__.return_value = test_lease + test_lease = { + "unknown-245": test_lease_dhcp_option_245, + "interface": "eth0", + } + self.m_dhcp.return_value.obtain_lease.return_value = test_lease self.assertTrue(dsrc._report_failure()) @@ -2119,14 +2036,12 @@ def get_metadata_from_imds_side_eff(*args, **kwargs): assert m_get_metadata_from_imds.mock_calls == [ mock.call( - fallback_nic="eth9", retries=0, md_type=dsaz.MetadataType.ALL, api_version="2021-08-01", exc_cb=mock.ANY, ), mock.call( - fallback_nic="eth9", retries=10, md_type=dsaz.MetadataType.ALL, api_version="2019-06-01", @@ -2151,7 +2066,6 @@ def test_imds_api_version_wanted_exists(self, m_get_metadata_from_imds): assert m_get_metadata_from_imds.mock_calls == [ mock.call( - fallback_nic="eth9", retries=0, md_type=dsaz.MetadataType.ALL, api_version="2021-08-01", @@ -2862,11 +2776,10 @@ def test_nic_detach_writes_marker(self, m_writefile, m_detach): @mock.patch(MOCKPATH + "util.write_file", autospec=True) @mock.patch(MOCKPATH + "DataSourceAzure.fallback_interface") - @mock.patch(MOCKPATH + "EphemeralDHCPv4WithReporting", autospec=True) @mock.patch(MOCKPATH + "DataSourceAzure._report_ready") @mock.patch(MOCKPATH + "DataSourceAzure._wait_for_nic_detach") def test_detect_nic_attach_reports_ready_and_waits_for_detach( - self, m_detach, m_report_ready, m_dhcp, m_fallback_if, m_writefile + self, m_detach, m_report_ready, m_fallback_if, m_writefile ): """Report ready first and then wait for nic detach""" dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths) @@ -2875,14 +2788,13 @@ def test_detect_nic_attach_reports_ready_and_waits_for_detach( self.assertEqual(1, m_report_ready.call_count) self.assertEqual(1, m_detach.call_count) self.assertEqual(1, m_writefile.call_count) - self.assertEqual(1, m_dhcp.call_count) m_writefile.assert_called_with( dsaz.REPORTED_READY_MARKER_FILE, mock.ANY ) @mock.patch("os.path.isfile") @mock.patch(MOCKPATH + "DataSourceAzure.fallback_interface") - @mock.patch(MOCKPATH + "EphemeralDHCPv4WithReporting", autospec=True) + @mock.patch(MOCKPATH + "EphemeralDHCPv4", autospec=True) @mock.patch(MOCKPATH + "DataSourceAzure._report_ready") @mock.patch(MOCKPATH + "DataSourceAzure._wait_for_nic_detach") def test_detect_nic_attach_skips_report_ready_when_marker_present( @@ -2903,7 +2815,7 @@ def isfile(key): @mock.patch("os.path.isfile") @mock.patch(MOCKPATH + "DataSourceAzure.fallback_interface") - @mock.patch(MOCKPATH + "EphemeralDHCPv4WithReporting") + @mock.patch(MOCKPATH + "EphemeralDHCPv4") @mock.patch(MOCKPATH + "DataSourceAzure._report_ready") @mock.patch(MOCKPATH + "DataSourceAzure._wait_for_nic_detach") def test_detect_nic_attach_skips_nic_detach_when_marker_present( @@ -2923,7 +2835,7 @@ def test_detect_nic_attach_skips_nic_detach_when_marker_present( @mock.patch("cloudinit.sources.helpers.netlink.wait_for_nic_attach_event") @mock.patch("cloudinit.sources.net.find_fallback_nic") @mock.patch(MOCKPATH + "get_metadata_from_imds") - @mock.patch(MOCKPATH + "EphemeralDHCPv4WithReporting", autospec=True) + @mock.patch(MOCKPATH + "EphemeralDHCPv4", autospec=True) @mock.patch(MOCKPATH + "DataSourceAzure._wait_for_nic_detach") @mock.patch("os.path.isfile") def test_wait_for_nic_attach_if_no_fallback_interface( @@ -2967,7 +2879,7 @@ def test_wait_for_nic_attach_if_no_fallback_interface( @mock.patch("cloudinit.sources.helpers.netlink.wait_for_nic_attach_event") @mock.patch("cloudinit.sources.net.find_fallback_nic") @mock.patch(MOCKPATH + "DataSourceAzure.get_imds_data_with_api_fallback") - @mock.patch(MOCKPATH + "EphemeralDHCPv4WithReporting", autospec=True) + @mock.patch(MOCKPATH + "EphemeralDHCPv4", autospec=True) @mock.patch(MOCKPATH + "DataSourceAzure._wait_for_nic_detach") @mock.patch("os.path.isfile") def test_wait_for_nic_attach_multinic_attach( @@ -3020,7 +2932,7 @@ def test_wait_for_nic_attach_multinic_attach( @mock.patch("cloudinit.url_helper.time.sleep", autospec=True) @mock.patch("requests.Session.request", autospec=True) - @mock.patch(MOCKPATH + "EphemeralDHCPv4WithReporting", autospec=True) + @mock.patch(MOCKPATH + "EphemeralDHCPv4", autospec=True) def test_check_if_nic_is_primary_retries_on_failures( self, m_dhcpv4, m_request, m_sleep ): @@ -3043,16 +2955,13 @@ def test_check_if_nic_is_primary_retries_on_failures( ] } - dhcp_ctx = mock.MagicMock(lease=lease) - dhcp_ctx.obtain_lease.return_value = lease - m_dhcpv4.return_value = dhcp_ctx - m_req = mock.Mock(content=json.dumps(md)) m_request.side_effect = [ requests.Timeout("Fake connection timeout"), requests.ConnectionError("Fake Network Unreachable"), m_req, ] + m_dhcpv4.return_value.lease = lease is_primary, expected_nic_count = dsa._check_if_nic_is_primary("eth0") self.assertEqual(True, is_primary) @@ -3162,13 +3071,14 @@ def test_wait_for_all_nics_ready_raises_if_socket_fails(self, m_socket): # dsa._wait_for_all_nics_ready() +@mock.patch("cloudinit.net.find_fallback_nic", return_value="eth9") @mock.patch("cloudinit.net.dhcp.EphemeralIPv4Network") @mock.patch("cloudinit.net.dhcp.maybe_perform_dhcp_discovery") @mock.patch( "cloudinit.sources.helpers.netlink.wait_for_media_disconnect_connect" ) @mock.patch("requests.Session.request") -@mock.patch(MOCKPATH + "DataSourceAzure._report_ready") +@mock.patch(MOCKPATH + "DataSourceAzure._report_ready", return_value=True) class TestPreprovisioningPollIMDS(CiTestCase): def setUp(self): super(TestPreprovisioningPollIMDS, self).setUp() @@ -3185,6 +3095,7 @@ def test_poll_imds_re_dhcp_on_timeout( m_media_switch, m_dhcp, m_net, + m_fallback, ): """The poll_imds will retry DHCP on IMDS timeout.""" report_file = self.tmp_path("report_marker", self.tmp) @@ -3220,8 +3131,9 @@ def fake_timeout_once(**kwargs): dsa = dsaz.DataSourceAzure({}, distro=mock.Mock(), paths=self.paths) with mock.patch(MOCKPATH + "REPORTED_READY_MARKER_FILE", report_file): dsa._poll_imds() - self.assertEqual(m_report_ready.call_count, 1) - m_report_ready.assert_called_with(lease=lease) + + assert m_report_ready.mock_calls == [mock.call()] + self.assertEqual(3, m_dhcp.call_count, "Expected 3 DHCP calls") self.assertEqual(4, self.tries, "Expected 4 total reads from IMDS") @@ -3234,6 +3146,7 @@ def test_poll_imds_skips_dhcp_if_ctx_present( m_media_switch, m_dhcp, m_net, + m_fallback, ): """The poll_imds function should reuse the dhcp ctx if it is already present. This happens when we wait for nic to be hot-attached before @@ -3243,14 +3156,14 @@ def test_poll_imds_skips_dhcp_if_ctx_present( report_file = self.tmp_path("report_marker", self.tmp) m_isfile.return_value = True dsa = dsaz.DataSourceAzure({}, distro=None, paths=self.paths) - dsa._ephemeral_dhcp_ctx = "Dummy dhcp ctx" + dsa._ephemeral_dhcp_ctx = mock.Mock(lease={}) with mock.patch(MOCKPATH + "REPORTED_READY_MARKER_FILE", report_file): dsa._poll_imds() self.assertEqual(0, m_dhcp.call_count) self.assertEqual(0, m_media_switch.call_count) @mock.patch("os.path.isfile") - @mock.patch(MOCKPATH + "EphemeralDHCPv4WithReporting", autospec=True) + @mock.patch(MOCKPATH + "EphemeralDHCPv4", autospec=True) def test_poll_imds_does_dhcp_on_retries_if_ctx_present( self, m_ephemeral_dhcpv4, @@ -3260,6 +3173,7 @@ def test_poll_imds_does_dhcp_on_retries_if_ctx_present( m_media_switch, m_dhcp, m_net, + m_fallback, ): """The poll_imds function should reuse the dhcp ctx if it is already present. This happens when we wait for nic to be hot-attached before @@ -3292,7 +3206,13 @@ def fake_timeout_once(**kwargs): self.assertEqual(2, m_request.call_count) def test_does_not_poll_imds_report_ready_when_marker_file_exists( - self, m_report_ready, m_request, m_media_switch, m_dhcp, m_net + self, + m_report_ready, + m_request, + m_media_switch, + m_dhcp, + m_net, + m_fallback, ): """poll_imds should not call report ready when the reported ready marker file exists""" @@ -3314,7 +3234,13 @@ def test_does_not_poll_imds_report_ready_when_marker_file_exists( self.assertEqual(m_report_ready.call_count, 0) def test_poll_imds_report_ready_success_writes_marker_file( - self, m_report_ready, m_request, m_media_switch, m_dhcp, m_net + self, + m_report_ready, + m_request, + m_media_switch, + m_dhcp, + m_net, + m_fallback, ): """poll_imds should write the report_ready marker file if reporting ready succeeds""" @@ -3337,7 +3263,13 @@ def test_poll_imds_report_ready_success_writes_marker_file( self.assertTrue(os.path.exists(report_file)) def test_poll_imds_report_ready_failure_raises_exc_and_doesnt_write_marker( - self, m_report_ready, m_request, m_media_switch, m_dhcp, m_net + self, + m_report_ready, + m_request, + m_media_switch, + m_dhcp, + m_net, + m_fallback, ): """poll_imds should write the report_ready marker file if reporting ready succeeds"""