From 5ef45ee157ed56bc842b305c5f0a69a7f19ff6d7 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Sun, 12 Dec 2021 15:06:31 -0700 Subject: [PATCH 01/54] rebase --- cloudinit/url_helper.py | 166 ++++++++++++++++++++++++++--- tests/unittests/test_url_helper.py | 108 +++++++++++++++++++ 2 files changed, 261 insertions(+), 13 deletions(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index b827eba9140..ecea8df6dce 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -12,13 +12,19 @@ import json import os import time +from typing import Tuple, Any, Union, Callable, Mapping, Optional from email.utils import parsedate from errno import ENOENT from functools import partial from http.client import NOT_FOUND from itertools import count -from urllib.parse import quote, urlparse, urlunparse +from urllib.parse import urlparse, urlunparse, quote +from urllib3.exceptions import InsecureRequestWarning +from urllib3._collections import RecentlyUsedContainer +from urllib3.poolmanager import PoolKey, key_fn_by_scheme +from urllib3.util import Url +import urllib3 import requests from requests import exceptions @@ -153,6 +159,84 @@ def __init__(self, cause, code=None, headers=None, url=None): self.url = url +class HTTPConnectionPoolEarlyConnect(urllib3.HTTPConnectionPool): + """Allow socket connection prior to http request for dual-stack address + selection followed by socket reuse. + + A connection pool enables connection reuse to a single host. + """ + def _validate_conn(self, conn) -> None: + """ + Called right before a request is made, after the socket is created. + """ + super()._validate_conn(conn) + + # Force connect early to allow us to validate the connection. + if not conn.sock: + conn.connect() + + if not conn.is_verified: + print( + ( + "Unverified HTTPS request is being made to host '{}'. " + "Adding certificate verification is strongly advised. See: " + "https://urllib3.readthedocs.io/en/latest/advanced-usage.html" + "#tls-warnings".format(conn.host) + ), + InsecureRequestWarning, + ) + + def connect(self): + """Create a connection without creating a request + """ + # Make space in the pool and init an HTTPConnection + conn = self._get_conn() + + # connect and validate the connection immediately + self._validate_conn(conn) + + # insert connection into pool immediately for reuse + self._put_conn(conn) + return conn + + +# TODO: add https? +pool_classes_by_scheme = {"http": HTTPConnectionPoolEarlyConnect} + +class PoolManagerEarlyConnect(urllib3.PoolManager): + """Enable early connection to multiple "hosts" for dual-stack addresses + selection. + + Requests and urllib3 expose a high level API wherein the socket connection + occurs with the first http request. Allow initializing the connection + separately which enables dual stack selection (rfc 6555, aka "happy + eyeballs") prior to the http request + """ + proxy: Optional[Url] = None + proxy_config: Optional[Any] = None + + + def __init__( + self, + num_pools: int = 10, + headers: Optional[Mapping[str, str]] = None, + **connection_pool_kw: Any, + ) -> None: + super().__init__(headers) + self.connection_pool_kw = connection_pool_kw + + def dispose_func(p: Any) -> None: + p.close() + + self.pools: RecentlyUsedContainer[PoolKey, HTTPConnectionPoolEarlyConnect] + self.pools = RecentlyUsedContainer(num_pools, dispose_func=dispose_func) + + # Locally set the pool classes and keys so other PoolManagers can + # override them. + self.pool_classes_by_scheme = pool_classes_by_scheme + self.key_fn_by_scheme = key_fn_by_scheme.copy() + + def _get_ssl_args(url, ssl_details): ssl_args = {} scheme = urlparse(url).scheme @@ -347,18 +431,74 @@ def _cb(url): raise excps[-1] -def wait_for_url( - urls, - max_wait=None, - timeout=None, - status_cb=None, - headers_cb=None, - headers_redact=None, - sleep_time=1, - exception_cb=None, - sleep_time_cb=None, - request_method=None, -): +def dual_stack( + func: Callable[..., Any], + *addresses: str, + stagger_delay: float = 0.150, + max_timeout: int = 10) -> Any: + """attempt connecting to multiple addresses asynchronously + + Run blocking func against two different addresses staggered with a + delay. The first call to return is returned from this function and + remaining unfinished calls will be canceled. + + TODO: + - replace print() w/logging + """ + return_result = None + + from concurrent.futures import ( + ThreadPoolExecutor, as_completed, TimeoutError) + + def _run_func(func, addr, delay=None): + """Execute func with optional delay + """ + if delay: + time.sleep(delay) + return func(addr) + + executor = ThreadPoolExecutor(max_workers=len(addresses)) + try: + futures = { + executor.submit( + _run_func, + func=func, + addr=addr, + delay=(None if i == 0 else stagger_delay) + ): addr for i, addr in enumerate(addresses) + } + + # handle the first function to complete from the threadpool executor + future = next(as_completed(futures, timeout=max_timeout)) + + returned_address = futures[future] + return_result = future.result() + return_exception = future.exception() + if return_exception: + print("Got exception %s" % return_exception) + raise return_exception + elif return_result: + print("Address {} returned" .format(returned_address)) + else: + print( + "Empty result for address: {}".format(returned_address)) + + # when max_timeout expires + except TimeoutError: + print("Timed out waiting for addresses: {}".format( + " ".join(map(str, addresses)))) + + # Executor doesn't provide kwargs for setting shutdown behavior + # in the constructor, otherwise the context manager would be preferred + # think they would take a PR implementing that? + finally: + executor.shutdown(wait=False, cancel_futures=True) + return return_result + + +def wait_for_url(urls, max_wait=None, timeout=None, status_cb=None, + headers_cb=None, headers_redact=None, sleep_time=1, + exception_cb=None, sleep_time_cb=None, request_method=None): """ urls: a list of urls to try max_wait: roughly the maximum time to wait before giving up diff --git a/tests/unittests/test_url_helper.py b/tests/unittests/test_url_helper.py index 059809d9e27..ddbff2463bb 100644 --- a/tests/unittests/test_url_helper.py +++ b/tests/unittests/test_url_helper.py @@ -5,6 +5,9 @@ import httpretty import pytest import requests +import pytest +from functools import partial +from time import sleep, process_time from cloudinit import util, version from cloudinit.url_helper import ( @@ -250,4 +253,109 @@ def test_perform_retries_on_timeout(self): self.assertTrue(retry_on_url_exc(msg="", exc=myerror)) +from typing import Tuple, Callable + + +def _raise(a): + raise a + +def assert_time(func, max_time=1): + """Assert function time is bounded by a max (default=1s) + + The following async tests should canceled in under 1ms and have stagger + delay and max_ + It is possible that this could yield a false positive, but this should + basically never happen (esp under normal system load). + """ + start = process_time() + try: + out = func() + finally: + diff = (process_time() - start) + assert diff < max_time + return out + + +class TestDualStack: + + @pytest.mark.parametrize( + "func," + "addresses," + "stagger_delay," + "max_timeout," + "expected_val," + "expected_exc", + [ + # Assert order based on timeout + (lambda x:x, ("one", "two"), 1, 1, "one", None), + (lambda x:sleep(1) if x != "two" else x, ("one", "two"), 0, 1, "two", None), + (lambda x:sleep(1) if x != "tri" else x, ("one", "two", "tri"), 0, 1, "tri", None), + + # Assert timeout results in (None, None) + (lambda _:sleep(1), ("one", "two"), 1, 0, None, None), + + # Assert that exception in func is raised + (lambda _:1/0, ("one", "two"), 1, 1, None, ZeroDivisionError), + + # TODO: add httpretty tests + ]) + def test_dual_stack( + self, + func, + addresses, + stagger_delay, + max_timeout, + expected_val, + expected_exc): + """Assert various failure modes behave as expected + """ + + gen = partial( + dual_stack, + func, + *addresses, + stagger_delay=stagger_delay, + max_timeout=max_timeout) + if expected_exc: + with pytest.raises(expected_exc): + assert expected_val == assert_time(gen) + else: + assert expected_val == assert_time(gen) + + +class TestHTTPConnectionPoolEarlyConnect: + """ TODO: use httpretty, not google.com + """ + def test_instantiate(self): + pool = HTTPConnectionPoolEarlyConnect("google.com") + out = pool.urlopen("GET", "google.com", assert_same_host=False) + assert 200 == out.status + + def test_instantiate_init(self): + pool = HTTPConnectionPoolEarlyConnect("google.com") + pool.connect() + out = (pool.urlopen("GET", "google.com", assert_same_host=False)) + assert 200 == out.status + + +#class TestHTTPAdapterEarlyConnect: +# """ TODO: add tests for adapter class +# """ +# def test_instantiate_dualstack(self): +# assert False +# pool = HTTPConnectionPoolEarlyConnect("google.com") +# +# # out = (pool.urlopen("GET", "google.com", assert_same_host=False)) +# first = "google.com" +# not_first = "yahoo.com" +# gen = partial( +# dual_stack, +# pool.connect, +# first, not_first, +# stagger_delay=1, +# max_timeout=1) +# out = assert_time(gen) +# out = pool.urlopen("GET", "google.com", assert_same_host=False) +# assert out + # vi: ts=4 expandtab From e980d81fa6e4d909cf2ed745c3b93ec0d0e99a96 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Tue, 11 Jan 2022 18:13:27 -0700 Subject: [PATCH 02/54] merge --- cloudinit/url_helper.py | 52 ++++++++++++++++--- tests/unittests/test_url_helper.py | 82 ++++++++++++++++++++---------- 2 files changed, 99 insertions(+), 35 deletions(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index ecea8df6dce..623432114f3 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -12,21 +12,23 @@ import json import os import time -from typing import Tuple, Any, Union, Callable, Mapping, Optional +from typing import Any, Callable, Mapping, Optional, Dict from email.utils import parsedate from errno import ENOENT from functools import partial from http.client import NOT_FOUND from itertools import count + +import urllib3 from urllib.parse import urlparse, urlunparse, quote from urllib3.exceptions import InsecureRequestWarning from urllib3._collections import RecentlyUsedContainer from urllib3.poolmanager import PoolKey, key_fn_by_scheme from urllib3.util import Url -import urllib3 import requests -from requests import exceptions +from requests import exceptions, Session +from requests.adapters import HTTPAdapter from cloudinit import log as logging from cloudinit import version @@ -185,6 +187,7 @@ def _validate_conn(self, conn) -> None: ), InsecureRequestWarning, ) + print("HTTPConnectionPool: validate_conn(): {} ".format(conn)) def connect(self): """Create a connection without creating a request @@ -197,12 +200,14 @@ def connect(self): # insert connection into pool immediately for reuse self._put_conn(conn) + print("HTTPConnectionPool: connect(): {} ".format(conn)) return conn -# TODO: add https? +# TODO: add https? (doesn't looks like any sources use https currently) pool_classes_by_scheme = {"http": HTTPConnectionPoolEarlyConnect} + class PoolManagerEarlyConnect(urllib3.PoolManager): """Enable early connection to multiple "hosts" for dual-stack addresses selection. @@ -215,7 +220,6 @@ class PoolManagerEarlyConnect(urllib3.PoolManager): proxy: Optional[Url] = None proxy_config: Optional[Any] = None - def __init__( self, num_pools: int = 10, @@ -231,10 +235,42 @@ def dispose_func(p: Any) -> None: self.pools: RecentlyUsedContainer[PoolKey, HTTPConnectionPoolEarlyConnect] self.pools = RecentlyUsedContainer(num_pools, dispose_func=dispose_func) - # Locally set the pool classes and keys so other PoolManagers can - # override them. + # Override pool classes from base class self.pool_classes_by_scheme = pool_classes_by_scheme self.key_fn_by_scheme = key_fn_by_scheme.copy() + print("PoolManager: __init__") + + def connection_from_url( + self, url: str, pool_kwargs: Optional[Dict[str, Any]] = None + ) -> HTTPConnectionPoolEarlyConnect: + """Init pool and connect + """ + pool = super().connection_from_url(url, pool_kwargs) + pool.connect() + print("PoolManager: connection_from_url: {}".format(url)) + return pool + + +class HTTPAdapterEarlyConnect(HTTPAdapter): + def init_poolmanager( + self, connections, maxsize, block=False): + self.poolmanager = PoolManagerEarlyConnect( + num_pools=connections, + maxsize=maxsize, + block=block) + print("HTTPAdapter: init_poolmanager") + + def connect(self, prefix) -> HTTPConnectionPoolEarlyConnect: + print("HTTPAdapter: init_poolmanager: {}".format(prefix)) + return self.poolmanager.connection_from_url(prefix) + + +class SessionEarlyConnect(Session): + def mount(self, prefix, adapter): + """Register a connection adapter and create the initial connection.""" + super().mount(prefix, adapter) + print("Session: mount(): {}, {}".format(prefix, adapter)) + return adapter.connect(prefix) def _get_ssl_args(url, ssl_details): @@ -453,8 +489,10 @@ def dual_stack( def _run_func(func, addr, delay=None): """Execute func with optional delay """ + if delay: time.sleep(delay) + print("dual_stack()._run_func(): {}({})".format(func, addr)) return func(addr) executor = ThreadPoolExecutor(max_workers=len(addresses)) diff --git a/tests/unittests/test_url_helper.py b/tests/unittests/test_url_helper.py index ddbff2463bb..6e4e93ad534 100644 --- a/tests/unittests/test_url_helper.py +++ b/tests/unittests/test_url_helper.py @@ -8,16 +8,13 @@ import pytest from functools import partial from time import sleep, process_time +from typing import Tuple from cloudinit import util, version from cloudinit.url_helper import ( - NOT_FOUND, - REDACTED, - UrlError, - oauth_headers, - read_file_or_url, - retry_on_url_exc, -) + NOT_FOUND, UrlError, REDACTED, oauth_headers, read_file_or_url, + retry_on_url_exc, dual_stack, HTTPConnectionPoolEarlyConnect, + HTTPAdapterEarlyConnect) from tests.unittests.helpers import CiTestCase, mock, skipIf try: @@ -253,8 +250,6 @@ def test_perform_retries_on_timeout(self): self.assertTrue(retry_on_url_exc(msg="", exc=myerror)) -from typing import Tuple, Callable - def _raise(a): raise a @@ -338,24 +333,55 @@ def test_instantiate_init(self): assert 200 == out.status -#class TestHTTPAdapterEarlyConnect: -# """ TODO: add tests for adapter class -# """ -# def test_instantiate_dualstack(self): -# assert False -# pool = HTTPConnectionPoolEarlyConnect("google.com") -# -# # out = (pool.urlopen("GET", "google.com", assert_same_host=False)) -# first = "google.com" -# not_first = "yahoo.com" -# gen = partial( -# dual_stack, -# pool.connect, -# first, not_first, -# stagger_delay=1, -# max_timeout=1) -# out = assert_time(gen) -# out = pool.urlopen("GET", "google.com", assert_same_host=False) -# assert out +def mount(session, adapter, delay_prefix=None, delay=1): + """Return closure for executing mount""" + def do_mount(prefix): + print("do_mount(): session.mount(prefix, adapter): {}.mount({}, {})".format(session, prefix, adapter)) + if delay_prefix == prefix: + sleep(delay) + session.mount(prefix, adapter) + return session.get(prefix) + return do_mount + + +class TestHTTPAdapterEarlyConnect: + """ TODO + """ + + def test_instantiate_dualstack(self): + s = requests.Session() + a = HTTPAdapterEarlyConnect() + + # out = (pool.urlopen("GET", "google.com", assert_same_host=False)) + first = "http://www.google.com/" + not_first = "http://www.yahoo.com/" + gen = partial( + dual_stack, + mount(s, a), + first, + not_first, + stagger_delay=1, + max_timeout=1) + out = assert_time(gen) + assert 200 == out.status_code + assert first == out.url + + def test_instantiate_dualstack_second_first(self): + s = requests.Session() + a = HTTPAdapterEarlyConnect() + + # out = (pool.urlopen("GET", "google.com", assert_same_host=False)) + first = "http://www.google.com/" + not_first = "http://www.example.com/" + gen = partial( + dual_stack, + mount(s, a, delay_prefix=first), + first, + not_first, + stagger_delay=0, + max_timeout=1) + out = assert_time(gen) + assert 200 == out.status_code + assert not_first == out.url # vi: ts=4 expandtab From 610e3e0a9f850d2f0060993897a05163b090ade2 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 15 Dec 2021 08:07:19 -0700 Subject: [PATCH 03/54] Add test demonstrating the connection and request as separate calls --- tests/unittests/test_url_helper.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/unittests/test_url_helper.py b/tests/unittests/test_url_helper.py index 6e4e93ad534..e0bf92768e5 100644 --- a/tests/unittests/test_url_helper.py +++ b/tests/unittests/test_url_helper.py @@ -339,20 +339,22 @@ def do_mount(prefix): print("do_mount(): session.mount(prefix, adapter): {}.mount({}, {})".format(session, prefix, adapter)) if delay_prefix == prefix: sleep(delay) + print("mount: {}:{}".format(session, prefix)) session.mount(prefix, adapter) - return session.get(prefix) + return (session, prefix) return do_mount class TestHTTPAdapterEarlyConnect: - """ TODO + """ These two tests demonstrate asynchronously requesting two connections, + staggered, and returning a connection to the first address to respond, and + then making a request on that connection. """ def test_instantiate_dualstack(self): s = requests.Session() a = HTTPAdapterEarlyConnect() - # out = (pool.urlopen("GET", "google.com", assert_same_host=False)) first = "http://www.google.com/" not_first = "http://www.yahoo.com/" gen = partial( @@ -362,7 +364,8 @@ def test_instantiate_dualstack(self): not_first, stagger_delay=1, max_timeout=1) - out = assert_time(gen) + (session, prefix) = assert_time(gen) + out = assert_time(partial(session.get, prefix)) assert 200 == out.status_code assert first == out.url @@ -380,7 +383,8 @@ def test_instantiate_dualstack_second_first(self): not_first, stagger_delay=0, max_timeout=1) - out = assert_time(gen) + (session, prefix) = assert_time(gen) + out = assert_time(partial(session.get, prefix)) assert 200 == out.status_code assert not_first == out.url From 8aecd35c8da8d50bb586178fae986a17a1315549 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Tue, 11 Jan 2022 18:15:21 -0700 Subject: [PATCH 04/54] merge --- cloudinit/ec2_utils.py | 81 +++++++++++----------------- cloudinit/url_helper.py | 85 +++++++++++++++++++++++++----- tests/unittests/test_url_helper.py | 14 +---- 3 files changed, 103 insertions(+), 77 deletions(-) diff --git a/cloudinit/ec2_utils.py b/cloudinit/ec2_utils.py index d401955787e..5fb8e928441 100644 --- a/cloudinit/ec2_utils.py +++ b/cloudinit/ec2_utils.py @@ -219,56 +219,37 @@ def mcaller(url): return {} -def get_instance_metadata( - api_version="latest", - metadata_address="http://169.254.169.254", - ssl_details=None, - timeout=5, - retries=5, - leaf_decoder=None, - headers_cb=None, - headers_redact=None, - exception_cb=None, -): +# Todo: update these to use happy eyes +def get_instance_metadata(api_version='latest', + metadata_address='http://169.254.169.254', + ssl_details=None, timeout=5, retries=5, + leaf_decoder=None, headers_cb=None, + headers_redact=None, + exception_cb=None): # Note, 'meta-data' explicitly has trailing /. # this is required for CloudStack (LP: #1356855) - return _get_instance_metadata( - tree="meta-data/", - api_version=api_version, - metadata_address=metadata_address, - ssl_details=ssl_details, - timeout=timeout, - retries=retries, - leaf_decoder=leaf_decoder, - headers_redact=headers_redact, - headers_cb=headers_cb, - exception_cb=exception_cb, - ) - - -def get_instance_identity( - api_version="latest", - metadata_address="http://169.254.169.254", - ssl_details=None, - timeout=5, - retries=5, - leaf_decoder=None, - headers_cb=None, - headers_redact=None, - exception_cb=None, -): - return _get_instance_metadata( - tree="dynamic/instance-identity", - api_version=api_version, - metadata_address=metadata_address, - ssl_details=ssl_details, - timeout=timeout, - retries=retries, - leaf_decoder=leaf_decoder, - headers_redact=headers_redact, - headers_cb=headers_cb, - exception_cb=exception_cb, - ) - - + return _get_instance_metadata(tree='meta-data/', api_version=api_version, + metadata_address=metadata_address, + ssl_details=ssl_details, timeout=timeout, + retries=retries, leaf_decoder=leaf_decoder, + headers_redact=headers_redact, + headers_cb=headers_cb, + exception_cb=exception_cb) + + +# Todo: update these to use happy eyes +def get_instance_identity(api_version='latest', + metadata_address='http://169.254.169.254', + ssl_details=None, timeout=5, retries=5, + leaf_decoder=None, headers_cb=None, + headers_redact=None, + exception_cb=None): + return _get_instance_metadata(tree='dynamic/instance-identity', + api_version=api_version, + metadata_address=metadata_address, + ssl_details=ssl_details, timeout=timeout, + retries=retries, leaf_decoder=leaf_decoder, + headers_redact=headers_redact, + headers_cb=headers_cb, + exception_cb=exception_cb) # vi: ts=4 expandtab diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 623432114f3..df0d0dd42db 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -18,6 +18,7 @@ from functools import partial from http.client import NOT_FOUND from itertools import count +from collections import namedtuple import urllib3 from urllib.parse import urlparse, urlunparse, quote @@ -35,6 +36,13 @@ LOG = logging.getLogger(__name__) +# Store a url and its (optional) session together +UrlSession = namedtuple("UrlSession", ["url", "session"]) + +# Check if requests has ssl support (added in requests >= 0.8.8) +SSL_ENABLED = False +CONFIG_ENABLED = False # This was added in 0.7 (but taken out in >=1.0) +_REQ_VER = None REDACTED = "REDACTED" @@ -163,9 +171,12 @@ def __init__(self, cause, code=None, headers=None, url=None): class HTTPConnectionPoolEarlyConnect(urllib3.HTTPConnectionPool): """Allow socket connection prior to http request for dual-stack address - selection followed by socket reuse. + selection. - A connection pool enables connection reuse to a single host. + In HTTPConnectionPool "connections" are reused to a single host, but socket + doesn't actually get connected until the first http(s) request when using + HTTPConnectionPool. Allow early socket opening for negotiating address + selection. """ def _validate_conn(self, conn) -> None: """ @@ -212,10 +223,11 @@ class PoolManagerEarlyConnect(urllib3.PoolManager): """Enable early connection to multiple "hosts" for dual-stack addresses selection. + PoolManager handles HTTPConnectionPool allocation based on connection. Requests and urllib3 expose a high level API wherein the socket connection occurs with the first http request. Allow initializing the connection separately which enables dual stack selection (rfc 6555, aka "happy - eyeballs") prior to the http request + eyeballs") prior to the http request. """ proxy: Optional[Url] = None proxy_config: Optional[Any] = None @@ -243,7 +255,7 @@ def dispose_func(p: Any) -> None: def connection_from_url( self, url: str, pool_kwargs: Optional[Dict[str, Any]] = None ) -> HTTPConnectionPoolEarlyConnect: - """Init pool and connect + """Override parent class: Init pool and connect """ pool = super().connection_from_url(url, pool_kwargs) pool.connect() @@ -261,13 +273,30 @@ def init_poolmanager( print("HTTPAdapter: init_poolmanager") def connect(self, prefix) -> HTTPConnectionPoolEarlyConnect: + """Override parent class: Init pool and connect + """ print("HTTPAdapter: init_poolmanager: {}".format(prefix)) return self.poolmanager.connection_from_url(prefix) class SessionEarlyConnect(Session): + """Allow early connection for address negotiation prior to http request. + + Example: + ``` + # create session + s = requests.Session() + + # Mount adapter and initialize socket connection + s.mount('https://github.com/', HTTPAdapterEarlyConnect()) + + # Make http request and reuse socket from mount + s.get('https://github.com/') + ``` + """ def mount(self, prefix, adapter): - """Register a connection adapter and create the initial connection.""" + """Override parent class: Register a connection adapter and create the + initial connection.""" super().mount(prefix, adapter) print("Session: mount(): {}, {}".format(prefix, adapter)) return adapter.connect(prefix) @@ -340,6 +369,7 @@ def readurl( Typically GET, or POST. Default: POST if data is provided, GET otherwise. """ + print("in readurl") url = _cleanurl(url) req_args = { "url": url, @@ -409,6 +439,8 @@ def _cb(url): if session is None: session = requests.Session() + else: + print("reuse session") with session as sess: r = sess.request(**req_args) @@ -467,6 +499,32 @@ def _cb(url): raise excps[-1] +def get_session_to_first_response(*urls): + """ Helper takes list of urls and returns the first + """ + s = requests.Session() + a = HTTPAdapterEarlyConnect() + (session, url) = dual_stack( + mount(s, a), + *urls, + stagger_delay=0.150, + max_timeout=1) + return UrlSession(url, session) + + +def mount(session, adapter, delay_prefix=None, delay=1): + """Return closure for executing mount""" + def do_mount(prefix): + print("do_mount(): session.mount(prefix, adapter):" + "{}.mount({}, {})".format(session, prefix, adapter)) + if delay_prefix == prefix: + time.sleep(delay) + print("mount: {}:{}".format(session, prefix)) + session.mount(prefix, adapter) + return (session, prefix) + return do_mount + + def dual_stack( func: Callable[..., Any], *addresses: str, @@ -587,12 +645,15 @@ def timeup(max_wait, start_time): loop_n = 0 response = None + print("before loop") while True: + last_reason = None if sleep_time_cb is not None: sleep_time = sleep_time_cb(response, loop_n) else: sleep_time = int(loop_n / 5) + 1 - for url in urls: + for url_session in urls: + url = url_session.url now = time.time() if loop_n != 0: if timeup(max_wait, start_time): @@ -614,13 +675,9 @@ def timeup(max_wait, start_time): headers = {} response = readurl( - url, - headers=headers, - headers_redact=headers_redact, - timeout=timeout, - check_status=False, - request_method=request_method, - ) + url, headers=headers, headers_redact=headers_redact, + timeout=timeout, check_status=False, + request_method=request_method, session=url_session.session) if not response.contents: reason = "empty response [%s]" % (response.code) url_exc = UrlError( @@ -645,7 +702,6 @@ def timeup(max_wait, start_time): except Exception as e: reason = "unexpected error [%s]" % e url_exc = e - time_taken = int(time.time() - start_time) max_wait_str = "%ss" % max_wait if max_wait else "unlimited" status_msg = "Calling '%s' failed [%s/%s]: %s" % ( @@ -669,6 +725,7 @@ def timeup(max_wait, start_time): "Please wait %s seconds while we wait to try again", sleep_time ) time.sleep(sleep_time) + print("after loop") return False, None diff --git a/tests/unittests/test_url_helper.py b/tests/unittests/test_url_helper.py index e0bf92768e5..f0e8ab52a87 100644 --- a/tests/unittests/test_url_helper.py +++ b/tests/unittests/test_url_helper.py @@ -14,7 +14,7 @@ from cloudinit.url_helper import ( NOT_FOUND, UrlError, REDACTED, oauth_headers, read_file_or_url, retry_on_url_exc, dual_stack, HTTPConnectionPoolEarlyConnect, - HTTPAdapterEarlyConnect) + HTTPAdapterEarlyConnect, mount) from tests.unittests.helpers import CiTestCase, mock, skipIf try: @@ -333,18 +333,6 @@ def test_instantiate_init(self): assert 200 == out.status -def mount(session, adapter, delay_prefix=None, delay=1): - """Return closure for executing mount""" - def do_mount(prefix): - print("do_mount(): session.mount(prefix, adapter): {}.mount({}, {})".format(session, prefix, adapter)) - if delay_prefix == prefix: - sleep(delay) - print("mount: {}:{}".format(session, prefix)) - session.mount(prefix, adapter) - return (session, prefix) - return do_mount - - class TestHTTPAdapterEarlyConnect: """ These two tests demonstrate asynchronously requesting two connections, staggered, and returning a connection to the first address to respond, and From 59ca67b82b5d717f9907a26a9ed2f03063f96431 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 15 Dec 2021 13:25:56 -0700 Subject: [PATCH 05/54] add tests for get_session_to_first_response --- tests/unittests/test_url_helper.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/tests/unittests/test_url_helper.py b/tests/unittests/test_url_helper.py index f0e8ab52a87..647f651185d 100644 --- a/tests/unittests/test_url_helper.py +++ b/tests/unittests/test_url_helper.py @@ -8,13 +8,12 @@ import pytest from functools import partial from time import sleep, process_time -from typing import Tuple from cloudinit import util, version from cloudinit.url_helper import ( NOT_FOUND, UrlError, REDACTED, oauth_headers, read_file_or_url, retry_on_url_exc, dual_stack, HTTPConnectionPoolEarlyConnect, - HTTPAdapterEarlyConnect, mount) + HTTPAdapterEarlyConnect, mount, get_session_to_first_response) from tests.unittests.helpers import CiTestCase, mock, skipIf try: @@ -272,6 +271,10 @@ def assert_time(func, max_time=1): class TestDualStack: + """Async testing suggestions welcome - these basically all rely on + sleep and time-bounded assertions for proving ordering. I assume there are + better ways of doing this. + """ @pytest.mark.parametrize( "func," @@ -339,6 +342,17 @@ class TestHTTPAdapterEarlyConnect: then making a request on that connection. """ + def test_instantiate_dualstack_helper(self): + """Test helper "get_session_to_first_response" + """ + first = "http://www.google.com/" + not_first = "http://www.yahoo.com/" + u = assert_time( + partial(get_session_to_first_response, first, not_first)) + out = assert_time(partial(u.session.get, u.url)) + assert 200 == out.status_code + assert first == out.url + def test_instantiate_dualstack(self): s = requests.Session() a = HTTPAdapterEarlyConnect() From 3a9e54aec7528d6689bcdad54295f9365a278493 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Tue, 11 Jan 2022 18:16:37 -0700 Subject: [PATCH 06/54] merge --- cloudinit/sources/DataSourceEc2.py | 6 +++++- cloudinit/url_helper.py | 6 +++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index a030b4987b9..28263052dd3 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -55,7 +55,11 @@ class DataSourceEc2(sources.DataSource): # Default metadata urls that will be used if none are provided # They will be checked for 'resolveability' and some of the # following may be discarded if they do not resolve - metadata_urls = ["http://169.254.169.254", "http://instance-data.:8773"] + metadata_urls = [ + "http://[fd00:ec2::254]", + "http://169.254.169.254", + "http://instance-data.:8773" + ] # The minimum supported metadata_version from the ec2 metadata apis min_metadata_version = "2009-04-04" diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index df0d0dd42db..92cabbc652c 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -652,8 +652,7 @@ def timeup(max_wait, start_time): sleep_time = sleep_time_cb(response, loop_n) else: sleep_time = int(loop_n / 5) + 1 - for url_session in urls: - url = url_session.url + for url in urls: now = time.time() if loop_n != 0: if timeup(max_wait, start_time): @@ -677,7 +676,8 @@ def timeup(max_wait, start_time): response = readurl( url, headers=headers, headers_redact=headers_redact, timeout=timeout, check_status=False, - request_method=request_method, session=url_session.session) + request_method=request_method) + if not response.contents: reason = "empty response [%s]" % (response.code) url_exc = UrlError( From c6fe5faaaa352ab2093efb5b6441f7eb3a872b25 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Tue, 11 Jan 2022 18:19:54 -0700 Subject: [PATCH 07/54] merge --- cloudinit/sources/DataSourceEc2.py | 14 +-- cloudinit/url_helper.py | 161 +++++++++++++++++------------ 2 files changed, 101 insertions(+), 74 deletions(-) diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index 28263052dd3..8327951527c 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -257,7 +257,7 @@ def _maybe_fetch_api_token(self, mdurls, timeout=None, max_wait=None): exception_cb=self._imds_exception_cb, request_method=request_method, headers_redact=AWS_TOKEN_REDACT, - ) + connect_synchronously=False) except uhelp.UrlError: # We use the raised exception to interupt the retry loop. # Nothing else to do here. @@ -315,14 +315,10 @@ def wait_for_metadata_service(self): start_time = time.time() url, _ = uhelp.wait_for_url( - urls=urls, - max_wait=url_params.max_wait_seconds, - timeout=url_params.timeout_seconds, - status_cb=LOG.warning, - headers_redact=AWS_TOKEN_REDACT, - headers_cb=self._get_headers, - request_method=request_method, - ) + urls=urls, max_wait=url_params.max_wait_seconds, + timeout=url_params.timeout_seconds, status_cb=LOG.warning, + headers_redact=AWS_TOKEN_REDACT, headers_cb=self._get_headers, + request_method=request_method, connect_synchronously=False) if url: metadata_address = url2base[url] diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 92cabbc652c..a1af72a6732 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -594,7 +594,8 @@ def _run_func(func, addr, delay=None): def wait_for_url(urls, max_wait=None, timeout=None, status_cb=None, headers_cb=None, headers_redact=None, sleep_time=1, - exception_cb=None, sleep_time_cb=None, request_method=None): + exception_cb=None, sleep_time_cb=None, request_method=None, + connect_synchronously=True): """ urls: a list of urls to try max_wait: roughly the maximum time to wait before giving up @@ -643,79 +644,109 @@ def timeup(max_wait, start_time): return False return (max_wait <= 0) or (time.time() - start_time > max_wait) + def read_url_handle_response(response, url): + if not response.contents: + reason = "empty response [%s]" % (response.code) + url_exc = UrlError(ValueError(reason), code=response.code, + headers=response.headers, url=url) + elif not response.ok(): + reason = "bad status code [%s]" % (response.code) + url_exc = UrlError(ValueError(reason), code=response.code, + headers=response.headers, url=url) + else: + reason = "" + url_exc = None + return (url_exc, reason) + + def readurl_handle_exceptions(url_reader, url): + reason = "" + url_exc = None + try: + + response = url_reader(url) + + url_exc, reason = read_url_handle_response(response, url) + print("handled response: {} {}".format(url_exc, reason)) + if not url_exc: + return url, response.contents + except UrlError as e: + reason = "request error [%s]" % e + url_exc = e + except Exception as e: + reason = "unexpected error [%s]" % e + url_exc = e + print("Exception: [{}]".format(url_exc)) + time_taken = int(time.time() - start_time) + max_wait_str = "%ss" % max_wait if max_wait else "unlimited" + status_msg = "Calling '%s' failed [%s/%s]: %s" % (url, + time_taken, + max_wait_str, + reason) + status_cb(status_msg) + if exception_cb: + # This can be used to alter the headers that will be sent + # in the future, for example this is what the MAAS datasource + # does. + exception_cb(msg=status_msg, exception=url_exc) + + def url_reader_serial(url): + if headers_cb is not None: + headers = headers_cb(url) + else: + headers = {} + + return readurl( + url, + headers=headers, + headers_redact=headers_redact, + timeout=timeout, + check_status=False, + request_method=request_method) + + url_reader_parallel = partial( + dual_stack, + url_reader_serial, + stagger_delay=0.150, + max_timeout=timeout + ) + + def read_url_serial(timeout): + for url in urls: + now = time.time() + if loop_n != 0: + if timeup(max_wait, start_time): + return + if (max_wait is not None and + timeout and (now + timeout > (start_time + max_wait))): + # shorten timeout to not run way over max_time + timeout = int((start_time + max_wait) - now) + + out = readurl_handle_exceptions(url_reader_serial, url) + if out: + return out + + def read_url_parallel(): + out = readurl_handle_exceptions(url_reader_parallel, urls) + if out: + return out + loop_n = 0 response = None print("before loop") while True: - last_reason = None if sleep_time_cb is not None: sleep_time = sleep_time_cb(response, loop_n) else: sleep_time = int(loop_n / 5) + 1 - for url in urls: - now = time.time() - if loop_n != 0: - if timeup(max_wait, start_time): - break - if ( - max_wait is not None - and timeout - and (now + timeout > (start_time + max_wait)) - ): - # shorten timeout to not run way over max_time - timeout = int((start_time + max_wait) - now) - reason = "" - url_exc = None - try: - if headers_cb is not None: - headers = headers_cb(url) - else: - headers = {} - - response = readurl( - url, headers=headers, headers_redact=headers_redact, - timeout=timeout, check_status=False, - request_method=request_method) - - if not response.contents: - reason = "empty response [%s]" % (response.code) - url_exc = UrlError( - ValueError(reason), - code=response.code, - headers=response.headers, - url=url, - ) - elif not response.ok(): - reason = "bad status code [%s]" % (response.code) - url_exc = UrlError( - ValueError(reason), - code=response.code, - headers=response.headers, - url=url, - ) - else: - return url, response.contents - except UrlError as e: - reason = "request error [%s]" % e - url_exc = e - except Exception as e: - reason = "unexpected error [%s]" % e - url_exc = e - time_taken = int(time.time() - start_time) - max_wait_str = "%ss" % max_wait if max_wait else "unlimited" - status_msg = "Calling '%s' failed [%s/%s]: %s" % ( - url, - time_taken, - max_wait_str, - reason, - ) - status_cb(status_msg) - if exception_cb: - # This can be used to alter the headers that will be sent - # in the future, for example this is what the MAAS datasource - # does. - exception_cb(msg=status_msg, exception=url_exc) + if connect_synchronously: + out = read_url_serial(timeout) + if out: + return out + else: + out = read_url_parallel() + if out: + return out if timeup(max_wait, start_time): break From 9ca2317313de1f1fb7b9eb761292eea85b3b06fd Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Tue, 11 Jan 2022 18:20:55 -0700 Subject: [PATCH 08/54] merge --- cloudinit/url_helper.py | 8 +------- tests/unittests/sources/test_ec2.py | 11 ++++++----- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index a1af72a6732..4e854f1d3d4 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -550,7 +550,6 @@ def _run_func(func, addr, delay=None): if delay: time.sleep(delay) - print("dual_stack()._run_func(): {}({})".format(func, addr)) return func(addr) executor = ThreadPoolExecutor(max_workers=len(addresses)) @@ -666,7 +665,6 @@ def readurl_handle_exceptions(url_reader, url): response = url_reader(url) url_exc, reason = read_url_handle_response(response, url) - print("handled response: {} {}".format(url_exc, reason)) if not url_exc: return url, response.contents except UrlError as e: @@ -675,7 +673,6 @@ def readurl_handle_exceptions(url_reader, url): except Exception as e: reason = "unexpected error [%s]" % e url_exc = e - print("Exception: [{}]".format(url_exc)) time_taken = int(time.time() - start_time) max_wait_str = "%ss" % max_wait if max_wait else "unlimited" status_msg = "Calling '%s' failed [%s/%s]: %s" % (url, @@ -726,13 +723,12 @@ def read_url_serial(timeout): return out def read_url_parallel(): - out = readurl_handle_exceptions(url_reader_parallel, urls) + out = readurl_handle_exceptions(url_reader_parallel, urls[0]) if out: return out loop_n = 0 response = None - print("before loop") while True: if sleep_time_cb is not None: sleep_time = sleep_time_cb(response, loop_n) @@ -752,11 +748,9 @@ def read_url_parallel(): break loop_n = loop_n + 1 - LOG.debug( "Please wait %s seconds while we wait to try again", sleep_time ) time.sleep(sleep_time) - print("after loop") return False, None diff --git a/tests/unittests/sources/test_ec2.py b/tests/unittests/sources/test_ec2.py index 7c8a5ea5ef9..58e042f6bf5 100644 --- a/tests/unittests/sources/test_ec2.py +++ b/tests/unittests/sources/test_ec2.py @@ -524,11 +524,12 @@ def test_network_config_cached_property_refreshed_on_upgrade(self, m_dhcp): self.assertTrue(ds.get_data()) # Provide new revision of metadata that contains network data register_mock_metaserver( - "http://169.254.169.254/2009-04-04/meta-data/", DEFAULT_METADATA - ) - mac1 = "06:17:04:d7:26:09" # Defined in DEFAULT_METADATA - get_interface_mac_path = M_PATH_NET + "get_interfaces_by_mac" - ds.fallback_nic = "eth9" + 'http://[fd00:ec2::254]/2009-04-04/meta-data/', DEFAULT_METADATA) + register_mock_metaserver( + 'http://169.254.169.254/2009-04-04/meta-data/', DEFAULT_METADATA) + mac1 = '06:17:04:d7:26:09' # Defined in DEFAULT_METADATA + get_interface_mac_path = M_PATH_NET + 'get_interfaces_by_mac' + ds.fallback_nic = 'eth9' with mock.patch(get_interface_mac_path) as m_get_interfaces_by_mac: m_get_interfaces_by_mac.return_value = {mac1: "eth9"} nc = ds.network_config # Will re-crawl network metadata From ce7157b6fcfc15dd67561e1e5ba954cf8023f171 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 12 Jan 2022 07:21:47 -0700 Subject: [PATCH 09/54] merge --- cloudinit/ec2_utils.py | 75 +++++++++----- cloudinit/sources/DataSourceEc2.py | 18 ++-- cloudinit/url_helper.py | 155 ++++++++++++++++++---------- tests/unittests/sources/test_ec2.py | 12 ++- tests/unittests/test_url_helper.py | 91 ++++++++++------ 5 files changed, 225 insertions(+), 126 deletions(-) diff --git a/cloudinit/ec2_utils.py b/cloudinit/ec2_utils.py index 5fb8e928441..b158d62e738 100644 --- a/cloudinit/ec2_utils.py +++ b/cloudinit/ec2_utils.py @@ -220,36 +220,57 @@ def mcaller(url): # Todo: update these to use happy eyes -def get_instance_metadata(api_version='latest', - metadata_address='http://169.254.169.254', - ssl_details=None, timeout=5, retries=5, - leaf_decoder=None, headers_cb=None, - headers_redact=None, - exception_cb=None): +def get_instance_metadata( + api_version="latest", + metadata_address="http://169.254.169.254", + ssl_details=None, + timeout=5, + retries=5, + leaf_decoder=None, + headers_cb=None, + headers_redact=None, + exception_cb=None, +): # Note, 'meta-data' explicitly has trailing /. # this is required for CloudStack (LP: #1356855) - return _get_instance_metadata(tree='meta-data/', api_version=api_version, - metadata_address=metadata_address, - ssl_details=ssl_details, timeout=timeout, - retries=retries, leaf_decoder=leaf_decoder, - headers_redact=headers_redact, - headers_cb=headers_cb, - exception_cb=exception_cb) + return _get_instance_metadata( + tree="meta-data/", + api_version=api_version, + metadata_address=metadata_address, + ssl_details=ssl_details, + timeout=timeout, + retries=retries, + leaf_decoder=leaf_decoder, + headers_redact=headers_redact, + headers_cb=headers_cb, + exception_cb=exception_cb, + ) # Todo: update these to use happy eyes -def get_instance_identity(api_version='latest', - metadata_address='http://169.254.169.254', - ssl_details=None, timeout=5, retries=5, - leaf_decoder=None, headers_cb=None, - headers_redact=None, - exception_cb=None): - return _get_instance_metadata(tree='dynamic/instance-identity', - api_version=api_version, - metadata_address=metadata_address, - ssl_details=ssl_details, timeout=timeout, - retries=retries, leaf_decoder=leaf_decoder, - headers_redact=headers_redact, - headers_cb=headers_cb, - exception_cb=exception_cb) +def get_instance_identity( + api_version="latest", + metadata_address="http://169.254.169.254", + ssl_details=None, + timeout=5, + retries=5, + leaf_decoder=None, + headers_cb=None, + headers_redact=None, + exception_cb=None, +): + return _get_instance_metadata( + tree="dynamic/instance-identity", + api_version=api_version, + metadata_address=metadata_address, + ssl_details=ssl_details, + timeout=timeout, + retries=retries, + leaf_decoder=leaf_decoder, + headers_redact=headers_redact, + headers_cb=headers_cb, + exception_cb=exception_cb, + ) + + # vi: ts=4 expandtab diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index 8327951527c..9e7ac53bbc9 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -58,7 +58,7 @@ class DataSourceEc2(sources.DataSource): metadata_urls = [ "http://[fd00:ec2::254]", "http://169.254.169.254", - "http://instance-data.:8773" + "http://instance-data.:8773", ] # The minimum supported metadata_version from the ec2 metadata apis @@ -257,7 +257,8 @@ def _maybe_fetch_api_token(self, mdurls, timeout=None, max_wait=None): exception_cb=self._imds_exception_cb, request_method=request_method, headers_redact=AWS_TOKEN_REDACT, - connect_synchronously=False) + connect_synchronously=False, + ) except uhelp.UrlError: # We use the raised exception to interupt the retry loop. # Nothing else to do here. @@ -315,10 +316,15 @@ def wait_for_metadata_service(self): start_time = time.time() url, _ = uhelp.wait_for_url( - urls=urls, max_wait=url_params.max_wait_seconds, - timeout=url_params.timeout_seconds, status_cb=LOG.warning, - headers_redact=AWS_TOKEN_REDACT, headers_cb=self._get_headers, - request_method=request_method, connect_synchronously=False) + urls=urls, + max_wait=url_params.max_wait_seconds, + timeout=url_params.timeout_seconds, + status_cb=LOG.warning, + headers_redact=AWS_TOKEN_REDACT, + headers_cb=self._get_headers, + request_method=request_method, + connect_synchronously=False, + ) if url: metadata_address = url2base[url] diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 4e854f1d3d4..8868f346ee6 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -178,6 +178,7 @@ class HTTPConnectionPoolEarlyConnect(urllib3.HTTPConnectionPool): HTTPConnectionPool. Allow early socket opening for negotiating address selection. """ + def _validate_conn(self, conn) -> None: """ Called right before a request is made, after the socket is created. @@ -201,8 +202,7 @@ def _validate_conn(self, conn) -> None: print("HTTPConnectionPool: validate_conn(): {} ".format(conn)) def connect(self): - """Create a connection without creating a request - """ + """Create a connection without creating a request""" # Make space in the pool and init an HTTPConnection conn = self._get_conn() @@ -229,6 +229,7 @@ class PoolManagerEarlyConnect(urllib3.PoolManager): separately which enables dual stack selection (rfc 6555, aka "happy eyeballs") prior to the http request. """ + proxy: Optional[Url] = None proxy_config: Optional[Any] = None @@ -244,8 +245,12 @@ def __init__( def dispose_func(p: Any) -> None: p.close() - self.pools: RecentlyUsedContainer[PoolKey, HTTPConnectionPoolEarlyConnect] - self.pools = RecentlyUsedContainer(num_pools, dispose_func=dispose_func) + self.pools: RecentlyUsedContainer[ + PoolKey, HTTPConnectionPoolEarlyConnect + ] + self.pools = RecentlyUsedContainer( + num_pools, dispose_func=dispose_func + ) # Override pool classes from base class self.pool_classes_by_scheme = pool_classes_by_scheme @@ -254,9 +259,8 @@ def dispose_func(p: Any) -> None: def connection_from_url( self, url: str, pool_kwargs: Optional[Dict[str, Any]] = None - ) -> HTTPConnectionPoolEarlyConnect: - """Override parent class: Init pool and connect - """ + ) -> HTTPConnectionPoolEarlyConnect: + """Override parent class: Init pool and connect""" pool = super().connection_from_url(url, pool_kwargs) pool.connect() print("PoolManager: connection_from_url: {}".format(url)) @@ -264,17 +268,14 @@ def connection_from_url( class HTTPAdapterEarlyConnect(HTTPAdapter): - def init_poolmanager( - self, connections, maxsize, block=False): + def init_poolmanager(self, connections, maxsize, block=False): self.poolmanager = PoolManagerEarlyConnect( - num_pools=connections, - maxsize=maxsize, - block=block) + num_pools=connections, maxsize=maxsize, block=block + ) print("HTTPAdapter: init_poolmanager") def connect(self, prefix) -> HTTPConnectionPoolEarlyConnect: - """Override parent class: Init pool and connect - """ + """Override parent class: Init pool and connect""" print("HTTPAdapter: init_poolmanager: {}".format(prefix)) return self.poolmanager.connection_from_url(prefix) @@ -294,6 +295,7 @@ class SessionEarlyConnect(Session): s.get('https://github.com/') ``` """ + def mount(self, prefix, adapter): """Override parent class: Register a connection adapter and create the initial connection.""" @@ -386,6 +388,20 @@ def readurl( req_args["timeout"] = max(float(timeout), 0) if headers_redact is None: headers_redact = [] + # It doesn't seem like config + # was added in older library versions (or newer ones either), thus we + # need to manually do the retries if it wasn't... + if CONFIG_ENABLED: + req_config = { + "store_cookies": False, + } + # Don't use the retry support built-in + # since it doesn't allow for 'sleep_times' + # in between tries.... + # if retries: + # req_config['max_retries'] = max(int(retries), 0) + req_args["config"] = req_config + print("manual_retries") manual_tries = 1 if retries: manual_tries = max(int(retries) + 1, 1) @@ -413,6 +429,7 @@ def _cb(url): # doesn't handle sleeping between tries... # Infinitely retry if infinite is True for i in count() if infinite else range(0, manual_tries): + print("retry") req_args["headers"] = headers_cb(url) filtered_req_args = {} for (k, v) in req_args.items(): @@ -500,36 +517,38 @@ def _cb(url): def get_session_to_first_response(*urls): - """ Helper takes list of urls and returns the first - """ + """Helper takes list of urls and returns the first""" s = requests.Session() a = HTTPAdapterEarlyConnect() (session, url) = dual_stack( - mount(s, a), - *urls, - stagger_delay=0.150, - max_timeout=1) + mount(s, a), *urls, stagger_delay=0.150, max_timeout=1 + ) return UrlSession(url, session) def mount(session, adapter, delay_prefix=None, delay=1): """Return closure for executing mount""" + def do_mount(prefix): - print("do_mount(): session.mount(prefix, adapter):" - "{}.mount({}, {})".format(session, prefix, adapter)) + print( + "do_mount(): session.mount(prefix, adapter):" + "{}.mount({}, {})".format(session, prefix, adapter) + ) if delay_prefix == prefix: time.sleep(delay) print("mount: {}:{}".format(session, prefix)) session.mount(prefix, adapter) return (session, prefix) + return do_mount def dual_stack( - func: Callable[..., Any], - *addresses: str, - stagger_delay: float = 0.150, - max_timeout: int = 10) -> Any: + func: Callable[..., Any], + *addresses: str, + stagger_delay: float = 0.150, + max_timeout: int = 10, +) -> Any: """attempt connecting to multiple addresses asynchronously Run blocking func against two different addresses staggered with a @@ -542,11 +561,13 @@ def dual_stack( return_result = None from concurrent.futures import ( - ThreadPoolExecutor, as_completed, TimeoutError) + ThreadPoolExecutor, + as_completed, + TimeoutError, + ) def _run_func(func, addr, delay=None): - """Execute func with optional delay - """ + """Execute func with optional delay""" if delay: time.sleep(delay) @@ -559,8 +580,9 @@ def _run_func(func, addr, delay=None): _run_func, func=func, addr=addr, - delay=(None if i == 0 else stagger_delay) - ): addr for i, addr in enumerate(addresses) + delay=(None if i == 0 else stagger_delay), + ): addr + for i, addr in enumerate(addresses) } # handle the first function to complete from the threadpool executor @@ -573,15 +595,17 @@ def _run_func(func, addr, delay=None): print("Got exception %s" % return_exception) raise return_exception elif return_result: - print("Address {} returned" .format(returned_address)) + print("Address {} returned".format(returned_address)) else: - print( - "Empty result for address: {}".format(returned_address)) + print("Empty result for address: {}".format(returned_address)) # when max_timeout expires except TimeoutError: - print("Timed out waiting for addresses: {}".format( - " ".join(map(str, addresses)))) + print( + "Timed out waiting for addresses: {}".format( + " ".join(map(str, addresses)) + ) + ) # Executor doesn't provide kwargs for setting shutdown behavior # in the constructor, otherwise the context manager would be preferred @@ -591,10 +615,19 @@ def _run_func(func, addr, delay=None): return return_result -def wait_for_url(urls, max_wait=None, timeout=None, status_cb=None, - headers_cb=None, headers_redact=None, sleep_time=1, - exception_cb=None, sleep_time_cb=None, request_method=None, - connect_synchronously=True): +def wait_for_url( + urls, + max_wait=None, + timeout=None, + status_cb=None, + headers_cb=None, + headers_redact=None, + sleep_time=1, + exception_cb=None, + sleep_time_cb=None, + request_method=None, + connect_synchronously=True, +): """ urls: a list of urls to try max_wait: roughly the maximum time to wait before giving up @@ -646,12 +679,20 @@ def timeup(max_wait, start_time): def read_url_handle_response(response, url): if not response.contents: reason = "empty response [%s]" % (response.code) - url_exc = UrlError(ValueError(reason), code=response.code, - headers=response.headers, url=url) + url_exc = UrlError( + ValueError(reason), + code=response.code, + headers=response.headers, + url=url, + ) elif not response.ok(): reason = "bad status code [%s]" % (response.code) - url_exc = UrlError(ValueError(reason), code=response.code, - headers=response.headers, url=url) + url_exc = UrlError( + ValueError(reason), + code=response.code, + headers=response.headers, + url=url, + ) else: reason = "" url_exc = None @@ -675,10 +716,12 @@ def readurl_handle_exceptions(url_reader, url): url_exc = e time_taken = int(time.time() - start_time) max_wait_str = "%ss" % max_wait if max_wait else "unlimited" - status_msg = "Calling '%s' failed [%s/%s]: %s" % (url, - time_taken, - max_wait_str, - reason) + status_msg = "Calling '%s' failed [%s/%s]: %s" % ( + url, + time_taken, + max_wait_str, + reason, + ) status_cb(status_msg) if exception_cb: # This can be used to alter the headers that will be sent @@ -698,13 +741,11 @@ def url_reader_serial(url): headers_redact=headers_redact, timeout=timeout, check_status=False, - request_method=request_method) + request_method=request_method, + ) url_reader_parallel = partial( - dual_stack, - url_reader_serial, - stagger_delay=0.150, - max_timeout=timeout + dual_stack, url_reader_serial, stagger_delay=0.150, max_timeout=timeout ) def read_url_serial(timeout): @@ -713,8 +754,11 @@ def read_url_serial(timeout): if loop_n != 0: if timeup(max_wait, start_time): return - if (max_wait is not None and - timeout and (now + timeout > (start_time + max_wait))): + if ( + max_wait is not None + and timeout + and (now + timeout > (start_time + max_wait)) + ): # shorten timeout to not run way over max_time timeout = int((start_time + max_wait) - now) @@ -748,6 +792,7 @@ def read_url_parallel(): break loop_n = loop_n + 1 + LOG.debug( "Please wait %s seconds while we wait to try again", sleep_time ) time.sleep(sleep_time) diff --git a/tests/unittests/sources/test_ec2.py b/tests/unittests/sources/test_ec2.py index 58e042f6bf5..4ffb1e26f43 100644 --- a/tests/unittests/sources/test_ec2.py +++ b/tests/unittests/sources/test_ec2.py @@ -524,12 +524,14 @@ def test_network_config_cached_property_refreshed_on_upgrade(self, m_dhcp): self.assertTrue(ds.get_data()) # Provide new revision of metadata that contains network data register_mock_metaserver( - 'http://[fd00:ec2::254]/2009-04-04/meta-data/', DEFAULT_METADATA) + "http://[fd00:ec2::254]/2009-04-04/meta-data/", DEFAULT_METADATA + ) register_mock_metaserver( - 'http://169.254.169.254/2009-04-04/meta-data/', DEFAULT_METADATA) - mac1 = '06:17:04:d7:26:09' # Defined in DEFAULT_METADATA - get_interface_mac_path = M_PATH_NET + 'get_interfaces_by_mac' - ds.fallback_nic = 'eth9' + "http://169.254.169.254/2009-04-04/meta-data/", DEFAULT_METADATA + ) + mac1 = "06:17:04:d7:26:09" # Defined in DEFAULT_METADATA + get_interface_mac_path = M_PATH_NET + "get_interfaces_by_mac" + ds.fallback_nic = "eth9" with mock.patch(get_interface_mac_path) as m_get_interfaces_by_mac: m_get_interfaces_by_mac.return_value = {mac1: "eth9"} nc = ds.network_config # Will re-crawl network metadata diff --git a/tests/unittests/test_url_helper.py b/tests/unittests/test_url_helper.py index 647f651185d..4e28411b610 100644 --- a/tests/unittests/test_url_helper.py +++ b/tests/unittests/test_url_helper.py @@ -11,9 +11,18 @@ from cloudinit import util, version from cloudinit.url_helper import ( - NOT_FOUND, UrlError, REDACTED, oauth_headers, read_file_or_url, - retry_on_url_exc, dual_stack, HTTPConnectionPoolEarlyConnect, - HTTPAdapterEarlyConnect, mount, get_session_to_first_response) + NOT_FOUND, + UrlError, + REDACTED, + oauth_headers, + read_file_or_url, + retry_on_url_exc, + dual_stack, + HTTPConnectionPoolEarlyConnect, + HTTPAdapterEarlyConnect, + mount, + get_session_to_first_response, +) from tests.unittests.helpers import CiTestCase, mock, skipIf try: @@ -249,10 +258,10 @@ def test_perform_retries_on_timeout(self): self.assertTrue(retry_on_url_exc(msg="", exc=myerror)) - def _raise(a): raise a + def assert_time(func, max_time=1): """Assert function time is bounded by a max (default=1s) @@ -265,7 +274,7 @@ def assert_time(func, max_time=1): try: out = func() finally: - diff = (process_time() - start) + diff = process_time() - start assert diff < max_time return out @@ -285,35 +294,48 @@ class TestDualStack: "expected_exc", [ # Assert order based on timeout - (lambda x:x, ("one", "two"), 1, 1, "one", None), - (lambda x:sleep(1) if x != "two" else x, ("one", "two"), 0, 1, "two", None), - (lambda x:sleep(1) if x != "tri" else x, ("one", "two", "tri"), 0, 1, "tri", None), - + (lambda x: x, ("one", "two"), 1, 1, "one", None), + ( + lambda x: sleep(1) if x != "two" else x, + ("one", "two"), + 0, + 1, + "two", + None, + ), + ( + lambda x: sleep(1) if x != "tri" else x, + ("one", "two", "tri"), + 0, + 1, + "tri", + None, + ), # Assert timeout results in (None, None) - (lambda _:sleep(1), ("one", "two"), 1, 0, None, None), - + (lambda _: sleep(1), ("one", "two"), 1, 0, None, None), # Assert that exception in func is raised - (lambda _:1/0, ("one", "two"), 1, 1, None, ZeroDivisionError), - + (lambda _: 1 / 0, ("one", "two"), 1, 1, None, ZeroDivisionError), # TODO: add httpretty tests - ]) + ], + ) def test_dual_stack( - self, - func, - addresses, - stagger_delay, - max_timeout, - expected_val, - expected_exc): - """Assert various failure modes behave as expected - """ + self, + func, + addresses, + stagger_delay, + max_timeout, + expected_val, + expected_exc, + ): + """Assert various failure modes behave as expected""" gen = partial( dual_stack, func, *addresses, stagger_delay=stagger_delay, - max_timeout=max_timeout) + max_timeout=max_timeout + ) if expected_exc: with pytest.raises(expected_exc): assert expected_val == assert_time(gen) @@ -322,8 +344,8 @@ def test_dual_stack( class TestHTTPConnectionPoolEarlyConnect: - """ TODO: use httpretty, not google.com - """ + """TODO: use httpretty, not google.com""" + def test_instantiate(self): pool = HTTPConnectionPoolEarlyConnect("google.com") out = pool.urlopen("GET", "google.com", assert_same_host=False) @@ -332,23 +354,23 @@ def test_instantiate(self): def test_instantiate_init(self): pool = HTTPConnectionPoolEarlyConnect("google.com") pool.connect() - out = (pool.urlopen("GET", "google.com", assert_same_host=False)) + out = pool.urlopen("GET", "google.com", assert_same_host=False) assert 200 == out.status class TestHTTPAdapterEarlyConnect: - """ These two tests demonstrate asynchronously requesting two connections, + """These two tests demonstrate asynchronously requesting two connections, staggered, and returning a connection to the first address to respond, and then making a request on that connection. """ def test_instantiate_dualstack_helper(self): - """Test helper "get_session_to_first_response" - """ + """Test helper "get_session_to_first_response" """ first = "http://www.google.com/" not_first = "http://www.yahoo.com/" u = assert_time( - partial(get_session_to_first_response, first, not_first)) + partial(get_session_to_first_response, first, not_first) + ) out = assert_time(partial(u.session.get, u.url)) assert 200 == out.status_code assert first == out.url @@ -365,7 +387,8 @@ def test_instantiate_dualstack(self): first, not_first, stagger_delay=1, - max_timeout=1) + max_timeout=1, + ) (session, prefix) = assert_time(gen) out = assert_time(partial(session.get, prefix)) assert 200 == out.status_code @@ -384,10 +407,12 @@ def test_instantiate_dualstack_second_first(self): first, not_first, stagger_delay=0, - max_timeout=1) + max_timeout=1, + ) (session, prefix) = assert_time(gen) out = assert_time(partial(session.get, prefix)) assert 200 == out.status_code assert not_first == out.url + # vi: ts=4 expandtab From dc348b2955cba260a8681b050b1108585759fb5c Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 12 Jan 2022 07:46:59 -0700 Subject: [PATCH 10/54] isort --- cloudinit/url_helper.py | 17 ++++++++--------- tests/unittests/test_url_helper.py | 17 ++++++++--------- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 8868f346ee6..94b78bfd2b2 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -12,25 +12,24 @@ import json import os import time -from typing import Any, Callable, Mapping, Optional, Dict +from collections import namedtuple from email.utils import parsedate from errno import ENOENT from functools import partial from http.client import NOT_FOUND from itertools import count -from collections import namedtuple +from typing import Any, Callable, Dict, Mapping, Optional +from urllib.parse import quote, urlparse, urlunparse +import requests import urllib3 -from urllib.parse import urlparse, urlunparse, quote -from urllib3.exceptions import InsecureRequestWarning +from requests import Session, exceptions +from requests.adapters import HTTPAdapter from urllib3._collections import RecentlyUsedContainer +from urllib3.exceptions import InsecureRequestWarning from urllib3.poolmanager import PoolKey, key_fn_by_scheme from urllib3.util import Url -import requests -from requests import exceptions, Session -from requests.adapters import HTTPAdapter - from cloudinit import log as logging from cloudinit import version @@ -562,8 +561,8 @@ def dual_stack( from concurrent.futures import ( ThreadPoolExecutor, - as_completed, TimeoutError, + as_completed, ) def _run_func(func, addr, delay=None): diff --git a/tests/unittests/test_url_helper.py b/tests/unittests/test_url_helper.py index 4e28411b610..cb5bdcf0f01 100644 --- a/tests/unittests/test_url_helper.py +++ b/tests/unittests/test_url_helper.py @@ -1,27 +1,26 @@ # This file is part of cloud-init. See LICENSE file for license information. import logging +from functools import partial +from time import process_time, sleep import httpretty import pytest import requests -import pytest -from functools import partial -from time import sleep, process_time from cloudinit import util, version from cloudinit.url_helper import ( NOT_FOUND, - UrlError, REDACTED, + HTTPAdapterEarlyConnect, + HTTPConnectionPoolEarlyConnect, + UrlError, + dual_stack, + get_session_to_first_response, + mount, oauth_headers, read_file_or_url, retry_on_url_exc, - dual_stack, - HTTPConnectionPoolEarlyConnect, - HTTPAdapterEarlyConnect, - mount, - get_session_to_first_response, ) from tests.unittests.helpers import CiTestCase, mock, skipIf From 6aba1a6c628d213893e83f796cece38de7d60290 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 12 Jan 2022 07:55:26 -0700 Subject: [PATCH 11/54] flake --- cloudinit/url_helper.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 94b78bfd2b2..23887cc6351 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -192,8 +192,9 @@ def _validate_conn(self, conn) -> None: print( ( "Unverified HTTPS request is being made to host '{}'. " - "Adding certificate verification is strongly advised. See: " - "https://urllib3.readthedocs.io/en/latest/advanced-usage.html" + "Adding certificate verification is strongly advised. " + "See: https://urllib3.readthedocs.io/en/latest/" + "advanced-usage.html" "#tls-warnings".format(conn.host) ), InsecureRequestWarning, From 77ef0a17c6b52de11d7f653b6c1c187cff160b9e Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 12 Jan 2022 11:37:06 -0700 Subject: [PATCH 12/54] workaround for python versions prior to 3.9 --- cloudinit/url_helper.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 23887cc6351..8d1e0ca61a8 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -8,6 +8,7 @@ # # This file is part of cloud-init. See LICENSE file for license information. +import sys import copy import json import os @@ -611,7 +612,11 @@ def _run_func(func, addr, delay=None): # in the constructor, otherwise the context manager would be preferred # think they would take a PR implementing that? finally: - executor.shutdown(wait=False, cancel_futures=True) + # python 3.9 allows canceling futures, which may save some cycles + if sys.version_info.major >= 3 and sys.version_info.minor > 9: + executor.shutdown(wait=False, cancel_futures=True) + else: + executor.shutdown(wait=False) return return_result From d1066d448799c5940c5ca451c8dee6c4f0a863fc Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 12 Jan 2022 12:06:20 -0700 Subject: [PATCH 13/54] pacify isort, pylint --- cloudinit/url_helper.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 8d1e0ca61a8..344b339de88 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -8,10 +8,10 @@ # # This file is part of cloud-init. See LICENSE file for license information. -import sys import copy import json import os +import sys import time from collections import namedtuple from email.utils import parsedate @@ -246,9 +246,6 @@ def __init__( def dispose_func(p: Any) -> None: p.close() - self.pools: RecentlyUsedContainer[ - PoolKey, HTTPConnectionPoolEarlyConnect - ] self.pools = RecentlyUsedContainer( num_pools, dispose_func=dispose_func ) @@ -614,7 +611,9 @@ def _run_func(func, addr, delay=None): finally: # python 3.9 allows canceling futures, which may save some cycles if sys.version_info.major >= 3 and sys.version_info.minor > 9: - executor.shutdown(wait=False, cancel_futures=True) + executor.shutdown( # pylint: disable=E1123 + wait=False, cancel_futures=True + ) else: executor.shutdown(wait=False) return return_result From cffec7fd6fff9f367c8590641e81dcf14da2248e Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 12 Jan 2022 12:20:03 -0700 Subject: [PATCH 14/54] test --- tests/unittests/sources/test_ec2.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/unittests/sources/test_ec2.py b/tests/unittests/sources/test_ec2.py index 4ffb1e26f43..271c11fb8b4 100644 --- a/tests/unittests/sources/test_ec2.py +++ b/tests/unittests/sources/test_ec2.py @@ -572,10 +572,11 @@ def test_ec2_get_instance_id_refreshes_identity_on_upgrade(self): ] + ds.extended_metadata_versions for ver in all_versions[:-1]: register_mock_metaserver( - "http://169.254.169.254/{0}/meta-data/instance-id".format(ver), - None, + "http://[fd00:ec2::254]/{0}/meta-data/instance-id".format(ver), + None ) - ds.metadata_address = "http://169.254.169.254" + + ds.metadata_address = "http://[fd00:ec2::254]" register_mock_metaserver( "{0}/{1}/meta-data/".format(ds.metadata_address, all_versions[-1]), DEFAULT_METADATA, From c06abbb85dd6d470c9f5d32d3c6108f03998bb29 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 12 Jan 2022 12:22:38 -0700 Subject: [PATCH 15/54] flake --- cloudinit/url_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 344b339de88..fa892dc8c7d 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -28,7 +28,7 @@ from requests.adapters import HTTPAdapter from urllib3._collections import RecentlyUsedContainer from urllib3.exceptions import InsecureRequestWarning -from urllib3.poolmanager import PoolKey, key_fn_by_scheme +from urllib3.poolmanager import key_fn_by_scheme from urllib3.util import Url from cloudinit import log as logging From 88dd51e4a51b8e217e99bb5ed342e6861ff52256 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 12 Jan 2022 12:40:18 -0700 Subject: [PATCH 16/54] black --- tests/unittests/sources/test_ec2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unittests/sources/test_ec2.py b/tests/unittests/sources/test_ec2.py index 271c11fb8b4..fe8758d6e6b 100644 --- a/tests/unittests/sources/test_ec2.py +++ b/tests/unittests/sources/test_ec2.py @@ -573,7 +573,7 @@ def test_ec2_get_instance_id_refreshes_identity_on_upgrade(self): for ver in all_versions[:-1]: register_mock_metaserver( "http://[fd00:ec2::254]/{0}/meta-data/instance-id".format(ver), - None + None, ) ds.metadata_address = "http://[fd00:ec2::254]" From acf9b4224fa0e98c7992f717a139a841ec55029e Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Thu, 13 Jan 2022 13:14:46 -0700 Subject: [PATCH 17/54] remove unneeded code --- cloudinit/ec2_utils.py | 2 - cloudinit/url_helper.py | 185 +---------------------------- tests/unittests/test_url_helper.py | 76 ------------ 3 files changed, 3 insertions(+), 260 deletions(-) diff --git a/cloudinit/ec2_utils.py b/cloudinit/ec2_utils.py index b158d62e738..d401955787e 100644 --- a/cloudinit/ec2_utils.py +++ b/cloudinit/ec2_utils.py @@ -219,7 +219,6 @@ def mcaller(url): return {} -# Todo: update these to use happy eyes def get_instance_metadata( api_version="latest", metadata_address="http://169.254.169.254", @@ -247,7 +246,6 @@ def get_instance_metadata( ) -# Todo: update these to use happy eyes def get_instance_identity( api_version="latest", metadata_address="http://169.254.169.254", diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index fa892dc8c7d..0aaac10b894 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -13,31 +13,23 @@ import os import sys import time -from collections import namedtuple +from concurrent.futures import ThreadPoolExecutor, TimeoutError, as_completed from email.utils import parsedate from errno import ENOENT from functools import partial from http.client import NOT_FOUND from itertools import count -from typing import Any, Callable, Dict, Mapping, Optional +from typing import Any, Callable from urllib.parse import quote, urlparse, urlunparse import requests -import urllib3 -from requests import Session, exceptions -from requests.adapters import HTTPAdapter -from urllib3._collections import RecentlyUsedContainer -from urllib3.exceptions import InsecureRequestWarning -from urllib3.poolmanager import key_fn_by_scheme -from urllib3.util import Url +from requests import exceptions from cloudinit import log as logging from cloudinit import version LOG = logging.getLogger(__name__) -# Store a url and its (optional) session together -UrlSession = namedtuple("UrlSession", ["url", "session"]) # Check if requests has ssl support (added in requests >= 0.8.8) SSL_ENABLED = False @@ -169,139 +161,6 @@ def __init__(self, cause, code=None, headers=None, url=None): self.url = url -class HTTPConnectionPoolEarlyConnect(urllib3.HTTPConnectionPool): - """Allow socket connection prior to http request for dual-stack address - selection. - - In HTTPConnectionPool "connections" are reused to a single host, but socket - doesn't actually get connected until the first http(s) request when using - HTTPConnectionPool. Allow early socket opening for negotiating address - selection. - """ - - def _validate_conn(self, conn) -> None: - """ - Called right before a request is made, after the socket is created. - """ - super()._validate_conn(conn) - - # Force connect early to allow us to validate the connection. - if not conn.sock: - conn.connect() - - if not conn.is_verified: - print( - ( - "Unverified HTTPS request is being made to host '{}'. " - "Adding certificate verification is strongly advised. " - "See: https://urllib3.readthedocs.io/en/latest/" - "advanced-usage.html" - "#tls-warnings".format(conn.host) - ), - InsecureRequestWarning, - ) - print("HTTPConnectionPool: validate_conn(): {} ".format(conn)) - - def connect(self): - """Create a connection without creating a request""" - # Make space in the pool and init an HTTPConnection - conn = self._get_conn() - - # connect and validate the connection immediately - self._validate_conn(conn) - - # insert connection into pool immediately for reuse - self._put_conn(conn) - print("HTTPConnectionPool: connect(): {} ".format(conn)) - return conn - - -# TODO: add https? (doesn't looks like any sources use https currently) -pool_classes_by_scheme = {"http": HTTPConnectionPoolEarlyConnect} - - -class PoolManagerEarlyConnect(urllib3.PoolManager): - """Enable early connection to multiple "hosts" for dual-stack addresses - selection. - - PoolManager handles HTTPConnectionPool allocation based on connection. - Requests and urllib3 expose a high level API wherein the socket connection - occurs with the first http request. Allow initializing the connection - separately which enables dual stack selection (rfc 6555, aka "happy - eyeballs") prior to the http request. - """ - - proxy: Optional[Url] = None - proxy_config: Optional[Any] = None - - def __init__( - self, - num_pools: int = 10, - headers: Optional[Mapping[str, str]] = None, - **connection_pool_kw: Any, - ) -> None: - super().__init__(headers) - self.connection_pool_kw = connection_pool_kw - - def dispose_func(p: Any) -> None: - p.close() - - self.pools = RecentlyUsedContainer( - num_pools, dispose_func=dispose_func - ) - - # Override pool classes from base class - self.pool_classes_by_scheme = pool_classes_by_scheme - self.key_fn_by_scheme = key_fn_by_scheme.copy() - print("PoolManager: __init__") - - def connection_from_url( - self, url: str, pool_kwargs: Optional[Dict[str, Any]] = None - ) -> HTTPConnectionPoolEarlyConnect: - """Override parent class: Init pool and connect""" - pool = super().connection_from_url(url, pool_kwargs) - pool.connect() - print("PoolManager: connection_from_url: {}".format(url)) - return pool - - -class HTTPAdapterEarlyConnect(HTTPAdapter): - def init_poolmanager(self, connections, maxsize, block=False): - self.poolmanager = PoolManagerEarlyConnect( - num_pools=connections, maxsize=maxsize, block=block - ) - print("HTTPAdapter: init_poolmanager") - - def connect(self, prefix) -> HTTPConnectionPoolEarlyConnect: - """Override parent class: Init pool and connect""" - print("HTTPAdapter: init_poolmanager: {}".format(prefix)) - return self.poolmanager.connection_from_url(prefix) - - -class SessionEarlyConnect(Session): - """Allow early connection for address negotiation prior to http request. - - Example: - ``` - # create session - s = requests.Session() - - # Mount adapter and initialize socket connection - s.mount('https://github.com/', HTTPAdapterEarlyConnect()) - - # Make http request and reuse socket from mount - s.get('https://github.com/') - ``` - """ - - def mount(self, prefix, adapter): - """Override parent class: Register a connection adapter and create the - initial connection.""" - super().mount(prefix, adapter) - print("Session: mount(): {}, {}".format(prefix, adapter)) - return adapter.connect(prefix) - - def _get_ssl_args(url, ssl_details): ssl_args = {} scheme = urlparse(url).scheme @@ -369,7 +228,6 @@ def readurl( Typically GET, or POST. Default: POST if data is provided, GET otherwise. """ - print("in readurl") url = _cleanurl(url) req_args = { "url": url, @@ -399,7 +257,6 @@ def readurl( # if retries: # req_config['max_retries'] = max(int(retries), 0) req_args["config"] = req_config - print("manual_retries") manual_tries = 1 if retries: manual_tries = max(int(retries) + 1, 1) @@ -427,7 +284,6 @@ def _cb(url): # doesn't handle sleeping between tries... # Infinitely retry if infinite is True for i in count() if infinite else range(0, manual_tries): - print("retry") req_args["headers"] = headers_cb(url) filtered_req_args = {} for (k, v) in req_args.items(): @@ -454,8 +310,6 @@ def _cb(url): if session is None: session = requests.Session() - else: - print("reuse session") with session as sess: r = sess.request(**req_args) @@ -514,33 +368,6 @@ def _cb(url): raise excps[-1] -def get_session_to_first_response(*urls): - """Helper takes list of urls and returns the first""" - s = requests.Session() - a = HTTPAdapterEarlyConnect() - (session, url) = dual_stack( - mount(s, a), *urls, stagger_delay=0.150, max_timeout=1 - ) - return UrlSession(url, session) - - -def mount(session, adapter, delay_prefix=None, delay=1): - """Return closure for executing mount""" - - def do_mount(prefix): - print( - "do_mount(): session.mount(prefix, adapter):" - "{}.mount({}, {})".format(session, prefix, adapter) - ) - if delay_prefix == prefix: - time.sleep(delay) - print("mount: {}:{}".format(session, prefix)) - session.mount(prefix, adapter) - return (session, prefix) - - return do_mount - - def dual_stack( func: Callable[..., Any], *addresses: str, @@ -558,12 +385,6 @@ def dual_stack( """ return_result = None - from concurrent.futures import ( - ThreadPoolExecutor, - TimeoutError, - as_completed, - ) - def _run_func(func, addr, delay=None): """Execute func with optional delay""" diff --git a/tests/unittests/test_url_helper.py b/tests/unittests/test_url_helper.py index cb5bdcf0f01..0fc1096013c 100644 --- a/tests/unittests/test_url_helper.py +++ b/tests/unittests/test_url_helper.py @@ -12,12 +12,8 @@ from cloudinit.url_helper import ( NOT_FOUND, REDACTED, - HTTPAdapterEarlyConnect, - HTTPConnectionPoolEarlyConnect, UrlError, dual_stack, - get_session_to_first_response, - mount, oauth_headers, read_file_or_url, retry_on_url_exc, @@ -342,76 +338,4 @@ def test_dual_stack( assert expected_val == assert_time(gen) -class TestHTTPConnectionPoolEarlyConnect: - """TODO: use httpretty, not google.com""" - - def test_instantiate(self): - pool = HTTPConnectionPoolEarlyConnect("google.com") - out = pool.urlopen("GET", "google.com", assert_same_host=False) - assert 200 == out.status - - def test_instantiate_init(self): - pool = HTTPConnectionPoolEarlyConnect("google.com") - pool.connect() - out = pool.urlopen("GET", "google.com", assert_same_host=False) - assert 200 == out.status - - -class TestHTTPAdapterEarlyConnect: - """These two tests demonstrate asynchronously requesting two connections, - staggered, and returning a connection to the first address to respond, and - then making a request on that connection. - """ - - def test_instantiate_dualstack_helper(self): - """Test helper "get_session_to_first_response" """ - first = "http://www.google.com/" - not_first = "http://www.yahoo.com/" - u = assert_time( - partial(get_session_to_first_response, first, not_first) - ) - out = assert_time(partial(u.session.get, u.url)) - assert 200 == out.status_code - assert first == out.url - - def test_instantiate_dualstack(self): - s = requests.Session() - a = HTTPAdapterEarlyConnect() - - first = "http://www.google.com/" - not_first = "http://www.yahoo.com/" - gen = partial( - dual_stack, - mount(s, a), - first, - not_first, - stagger_delay=1, - max_timeout=1, - ) - (session, prefix) = assert_time(gen) - out = assert_time(partial(session.get, prefix)) - assert 200 == out.status_code - assert first == out.url - - def test_instantiate_dualstack_second_first(self): - s = requests.Session() - a = HTTPAdapterEarlyConnect() - - # out = (pool.urlopen("GET", "google.com", assert_same_host=False)) - first = "http://www.google.com/" - not_first = "http://www.example.com/" - gen = partial( - dual_stack, - mount(s, a, delay_prefix=first), - first, - not_first, - stagger_delay=0, - max_timeout=1, - ) - (session, prefix) = assert_time(gen) - out = assert_time(partial(session.get, prefix)) - assert 200 == out.status_code - assert not_first == out.url - - # vi: ts=4 expandtab From 6b59a885a2468ad68496008326411713cc5db34a Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 19 Jan 2022 06:55:01 -0700 Subject: [PATCH 18/54] httppretty --- tests/unittests/test_url_helper.py | 46 +++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/tests/unittests/test_url_helper.py b/tests/unittests/test_url_helper.py index 0fc1096013c..e0a07d0cab9 100644 --- a/tests/unittests/test_url_helper.py +++ b/tests/unittests/test_url_helper.py @@ -17,6 +17,7 @@ oauth_headers, read_file_or_url, retry_on_url_exc, + wait_for_url ) from tests.unittests.helpers import CiTestCase, mock, skipIf @@ -275,9 +276,8 @@ def assert_time(func, max_time=1): class TestDualStack: - """Async testing suggestions welcome - these basically all rely on - sleep and time-bounded assertions for proving ordering. I assume there are - better ways of doing this. + """Async testing suggestions welcome - these all rely on + sleep and time-bounded assertions to prove ordering """ @pytest.mark.parametrize( @@ -290,6 +290,12 @@ class TestDualStack: [ # Assert order based on timeout (lambda x: x, ("one", "two"), 1, 1, "one", None), + + # Assert timeout results in (None, None) + (lambda _: sleep(1), ("one", "two"), 1, 0, None, None), + + # Assert that exception in func is raised + (lambda _: 1 / 0, ("one", "two"), 1, 1, None, ZeroDivisionError), ( lambda x: sleep(1) if x != "two" else x, ("one", "two"), @@ -306,10 +312,6 @@ class TestDualStack: "tri", None, ), - # Assert timeout results in (None, None) - (lambda _: sleep(1), ("one", "two"), 1, 0, None, None), - # Assert that exception in func is raised - (lambda _: 1 / 0, ("one", "two"), 1, 1, None, ZeroDivisionError), # TODO: add httpretty tests ], ) @@ -338,4 +340,34 @@ def test_dual_stack( assert expected_val == assert_time(gen) + +class TestUrlHelper: + uri_sleep="https://sleep/" + success="SUCCESS" + fail="FAIL" + + @classmethod + def response(cls, _, uri, response_headers): + if uri == cls.uri_sleep: + sleep(1) + return [500, response_headers, cls.fail] + return [200, response_headers, cls.success] + + @httpretty.activate + def test_order(self): + urls = ["https://first/", self.uri_sleep] + for url in urls: + httpretty.register_uri( + httpretty.GET, + url, + body=self.response) + url, response_contents = wait_for_url( + urls=urls, + max_wait=1, + timeout=1, + connect_synchronously=False + ) + assert url == urls[0] + assert response_contents + # vi: ts=4 expandtab From 1b3290b23af9c828d337b8d75499465ecc74a373 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 19 Jan 2022 12:27:16 -0700 Subject: [PATCH 19/54] add generic async url testing code --- cloudinit/url_helper.py | 40 ++++++++++--------- tests/unittests/test_url_helper.py | 62 +++++++++++++++++++++--------- 2 files changed, 65 insertions(+), 37 deletions(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 0aaac10b894..b53d3b7b03f 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -19,7 +19,7 @@ from functools import partial from http.client import NOT_FOUND from itertools import count -from typing import Any, Callable +from typing import Any, Callable, List, Tuple from urllib.parse import quote, urlparse, urlunparse import requests @@ -370,10 +370,10 @@ def _cb(url): def dual_stack( func: Callable[..., Any], - *addresses: str, + addresses: List[str], stagger_delay: float = 0.150, - max_timeout: int = 10, -) -> Any: + max_wait: int = 10, +) -> Tuple: """attempt connecting to multiple addresses asynchronously Run blocking func against two different addresses staggered with a @@ -384,10 +384,10 @@ def dual_stack( - replace print() w/logging """ return_result = None + returned_address = None def _run_func(func, addr, delay=None): """Execute func with optional delay""" - if delay: time.sleep(delay) return func(addr) @@ -405,7 +405,7 @@ def _run_func(func, addr, delay=None): } # handle the first function to complete from the threadpool executor - future = next(as_completed(futures, timeout=max_timeout)) + future = next(as_completed(futures, timeout=max_wait)) returned_address = futures[future] return_result = future.result() @@ -414,11 +414,11 @@ def _run_func(func, addr, delay=None): print("Got exception %s" % return_exception) raise return_exception elif return_result: - print("Address {} returned".format(returned_address)) + print("Address {} returned: {}".format(returned_address, return_result)) else: print("Empty result for address: {}".format(returned_address)) - # when max_timeout expires + # when max_wait expires except TimeoutError: print( "Timed out waiting for addresses: {}".format( @@ -437,7 +437,7 @@ def _run_func(func, addr, delay=None): ) else: executor.shutdown(wait=False) - return return_result + return (returned_address, return_result) def wait_for_url( @@ -452,6 +452,7 @@ def wait_for_url( sleep_time_cb=None, request_method=None, connect_synchronously=True, + async_delay=0.150, ): """ urls: a list of urls to try @@ -469,6 +470,8 @@ def wait_for_url( sleep_time_cb: call method with 2 arguments (response, loop_n) that generates the next sleep time. request_method: indicate the type of HTTP request, GET, PUT, or POST + connect_synchonously: enable dual-stack support + async_delay: delay before parallel metadata requests, see RFC 6555 returns: tuple of (url, response contents), on failure, (False, None) the idea of this routine is to wait for the EC2 metadata service to @@ -523,12 +526,13 @@ def read_url_handle_response(response, url): url_exc = None return (url_exc, reason) - def readurl_handle_exceptions(url_reader, url): + def readurl_handle_exceptions(url_reader, urls): reason = "" url_exc = None + url = None try: - response = url_reader(url) + url, response = url_reader(urls) url_exc, reason = read_url_handle_response(response, url) if not url_exc: @@ -570,15 +574,15 @@ def url_reader_serial(url): ) url_reader_parallel = partial( - dual_stack, url_reader_serial, stagger_delay=0.150, max_timeout=timeout + dual_stack, url_reader_serial, stagger_delay=async_delay, max_wait=max_wait ) - def read_url_serial(timeout): + def read_url_serial(timeout) -> Tuple: for url in urls: now = time.time() if loop_n != 0: if timeup(max_wait, start_time): - return + return () if ( max_wait is not None and timeout @@ -589,12 +593,10 @@ def read_url_serial(timeout): out = readurl_handle_exceptions(url_reader_serial, url) if out: - return out + return (url, out) - def read_url_parallel(): - out = readurl_handle_exceptions(url_reader_parallel, urls[0]) - if out: - return out + def read_url_parallel() -> Tuple: + return readurl_handle_exceptions(url_reader_parallel, urls) loop_n = 0 response = None diff --git a/tests/unittests/test_url_helper.py b/tests/unittests/test_url_helper.py index e0a07d0cab9..57fd9b73d03 100644 --- a/tests/unittests/test_url_helper.py +++ b/tests/unittests/test_url_helper.py @@ -284,7 +284,7 @@ class TestDualStack: "func," "addresses," "stagger_delay," - "max_timeout," + "max_wait," "expected_val," "expected_exc", [ @@ -320,7 +320,7 @@ def test_dual_stack( func, addresses, stagger_delay, - max_timeout, + max_wait, expected_val, expected_exc, ): @@ -329,45 +329,71 @@ def test_dual_stack( gen = partial( dual_stack, func, - *addresses, + addresses, stagger_delay=stagger_delay, - max_timeout=max_timeout + max_wait=max_wait ) if expected_exc: with pytest.raises(expected_exc): - assert expected_val == assert_time(gen) + _, result = assert_time(gen) + assert expected_val == result else: - assert expected_val == assert_time(gen) - + _, result = assert_time(gen) + assert expected_val == result +ADDR1, ADDR2, SLEEP = "https://addr1/", "https://addr2/", "https://sleep/" class TestUrlHelper: - uri_sleep="https://sleep/" success="SUCCESS" fail="FAIL" @classmethod def response(cls, _, uri, response_headers): - if uri == cls.uri_sleep: + if uri == SLEEP: sleep(1) return [500, response_headers, cls.fail] return [200, response_headers, cls.success] + @pytest.mark.parametrize( + "addresses," + "expected_address_index," + "response,", + [ + # Use timeout to test ordering happens as expected + ((ADDR1, SLEEP), 0, "SUCCESS"), + ((SLEEP, ADDR2), 1, "SUCCESS"), + ((SLEEP, SLEEP, ADDR2), 2, "SUCCESS"), + ((ADDR1, SLEEP, SLEEP), 0, "SUCCESS"), + ((SLEEP, SLEEP, SLEEP), None, "SUCCESS"), + ]) @httpretty.activate - def test_order(self): - urls = ["https://first/", self.uri_sleep] - for url in urls: + def test_order(self, addresses, expected_address_index, response): + """Check that the first response is returned. Simulate a non-responding + endpoint with a response that has a one second sleep. If this test + proves flaky, increase sleep time. Since it is async, increasing + sleep time should not increase total test time, since execution + of subsequent tests will continue after the first response is received. + """ + for address in addresses: httpretty.register_uri( httpretty.GET, - url, + address, body=self.response) + + # Use async_delay=0.0 to avoid adding unnecessary time to tests + # In practice a value such as 0.150 is used url, response_contents = wait_for_url( - urls=urls, - max_wait=1, + urls=addresses, + max_wait=0.1, timeout=1, - connect_synchronously=False + connect_synchronously=False, + async_delay=0.0 ) - assert url == urls[0] - assert response_contents + if expected_address_index == None: + assert not url + assert not response_contents + else: + assert addresses[expected_address_index] == url + assert response.encode() == response_contents # vi: ts=4 expandtab From aeaf0791b2b45330327a223caee52aea09fa8b5b Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 19 Jan 2022 12:33:40 -0700 Subject: [PATCH 20/54] fmt --- cloudinit/url_helper.py | 26 +++++++++++++++++--------- tests/unittests/test_url_helper.py | 29 +++++++++++++---------------- 2 files changed, 30 insertions(+), 25 deletions(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index b53d3b7b03f..9e5ee52eb52 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -414,7 +414,11 @@ def _run_func(func, addr, delay=None): print("Got exception %s" % return_exception) raise return_exception elif return_result: - print("Address {} returned: {}".format(returned_address, return_result)) + print( + "Address {} returned: {}".format( + returned_address, return_result + ) + ) else: print("Empty result for address: {}".format(returned_address)) @@ -574,7 +578,10 @@ def url_reader_serial(url): ) url_reader_parallel = partial( - dual_stack, url_reader_serial, stagger_delay=async_delay, max_wait=max_wait + dual_stack, + url_reader_serial, + stagger_delay=async_delay, + max_wait=max_wait, ) def read_url_serial(timeout) -> Tuple: @@ -582,7 +589,7 @@ def read_url_serial(timeout) -> Tuple: now = time.time() if loop_n != 0: if timeup(max_wait, start_time): - return () + return (None, None) if ( max_wait is not None and timeout @@ -594,6 +601,7 @@ def read_url_serial(timeout) -> Tuple: out = readurl_handle_exceptions(url_reader_serial, url) if out: return (url, out) + return (None, None) def read_url_parallel() -> Tuple: return readurl_handle_exceptions(url_reader_parallel, urls) @@ -607,13 +615,13 @@ def read_url_parallel() -> Tuple: sleep_time = int(loop_n / 5) + 1 if connect_synchronously: - out = read_url_serial(timeout) - if out: - return out + url, _ = read_url_serial(timeout) + if url: + return url else: - out = read_url_parallel() - if out: - return out + url, _ = read_url_parallel() + if url: + return url if timeup(max_wait, start_time): break diff --git a/tests/unittests/test_url_helper.py b/tests/unittests/test_url_helper.py index 57fd9b73d03..b0d0e6f093b 100644 --- a/tests/unittests/test_url_helper.py +++ b/tests/unittests/test_url_helper.py @@ -17,7 +17,7 @@ oauth_headers, read_file_or_url, retry_on_url_exc, - wait_for_url + wait_for_url, ) from tests.unittests.helpers import CiTestCase, mock, skipIf @@ -290,10 +290,8 @@ class TestDualStack: [ # Assert order based on timeout (lambda x: x, ("one", "two"), 1, 1, "one", None), - # Assert timeout results in (None, None) (lambda _: sleep(1), ("one", "two"), 1, 0, None, None), - # Assert that exception in func is raised (lambda _: 1 / 0, ("one", "two"), 1, 1, None, ZeroDivisionError), ( @@ -331,7 +329,7 @@ def test_dual_stack( func, addresses, stagger_delay=stagger_delay, - max_wait=max_wait + max_wait=max_wait, ) if expected_exc: with pytest.raises(expected_exc): @@ -343,9 +341,11 @@ def test_dual_stack( ADDR1, ADDR2, SLEEP = "https://addr1/", "https://addr2/", "https://sleep/" + + class TestUrlHelper: - success="SUCCESS" - fail="FAIL" + success = "SUCCESS" + fail = "FAIL" @classmethod def response(cls, _, uri, response_headers): @@ -355,9 +355,7 @@ def response(cls, _, uri, response_headers): return [200, response_headers, cls.success] @pytest.mark.parametrize( - "addresses," - "expected_address_index," - "response,", + "addresses," "expected_address_index," "response,", [ # Use timeout to test ordering happens as expected ((ADDR1, SLEEP), 0, "SUCCESS"), @@ -365,7 +363,8 @@ def response(cls, _, uri, response_headers): ((SLEEP, SLEEP, ADDR2), 2, "SUCCESS"), ((ADDR1, SLEEP, SLEEP), 0, "SUCCESS"), ((SLEEP, SLEEP, SLEEP), None, "SUCCESS"), - ]) + ], + ) @httpretty.activate def test_order(self, addresses, expected_address_index, response): """Check that the first response is returned. Simulate a non-responding @@ -375,10 +374,7 @@ def test_order(self, addresses, expected_address_index, response): of subsequent tests will continue after the first response is received. """ for address in addresses: - httpretty.register_uri( - httpretty.GET, - address, - body=self.response) + httpretty.register_uri(httpretty.GET, address, body=self.response) # Use async_delay=0.0 to avoid adding unnecessary time to tests # In practice a value such as 0.150 is used @@ -387,13 +383,14 @@ def test_order(self, addresses, expected_address_index, response): max_wait=0.1, timeout=1, connect_synchronously=False, - async_delay=0.0 + async_delay=0.0, ) - if expected_address_index == None: + if expected_address_index is None: assert not url assert not response_contents else: assert addresses[expected_address_index] == url assert response.encode() == response_contents + # vi: ts=4 expandtab From 0015b3aa822af4e8294b473b76d431f9f15112f8 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 19 Jan 2022 15:33:30 -0700 Subject: [PATCH 21/54] fix failing tests --- cloudinit/url_helper.py | 4 ++-- tests/unittests/sources/test_ec2.py | 7 ++----- tests/unittests/test_url_helper.py | 18 ++++++++++++------ 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 9e5ee52eb52..e6f576addb6 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -615,11 +615,11 @@ def read_url_parallel() -> Tuple: sleep_time = int(loop_n / 5) + 1 if connect_synchronously: - url, _ = read_url_serial(timeout) + url = read_url_serial(timeout) if url: return url else: - url, _ = read_url_parallel() + url = read_url_parallel() if url: return url diff --git a/tests/unittests/sources/test_ec2.py b/tests/unittests/sources/test_ec2.py index fe8758d6e6b..192639ddf26 100644 --- a/tests/unittests/sources/test_ec2.py +++ b/tests/unittests/sources/test_ec2.py @@ -526,9 +526,6 @@ def test_network_config_cached_property_refreshed_on_upgrade(self, m_dhcp): register_mock_metaserver( "http://[fd00:ec2::254]/2009-04-04/meta-data/", DEFAULT_METADATA ) - register_mock_metaserver( - "http://169.254.169.254/2009-04-04/meta-data/", DEFAULT_METADATA - ) mac1 = "06:17:04:d7:26:09" # Defined in DEFAULT_METADATA get_interface_mac_path = M_PATH_NET + "get_interfaces_by_mac" ds.fallback_nic = "eth9" @@ -684,8 +681,8 @@ def test_aws_token_redacted(self): logs_with_redacted_ttl = [log for log in all_logs if REDACT_TTL in log] logs_with_redacted = [log for log in all_logs if REDACT_TOK in log] logs_with_token = [log for log in all_logs if "API-TOKEN" in log] - self.assertEqual(1, len(logs_with_redacted_ttl)) - self.assertEqual(83, len(logs_with_redacted)) + self.assertEqual(5, len(logs_with_redacted_ttl)) + self.assertEqual(81, len(logs_with_redacted)) self.assertEqual(0, len(logs_with_token)) @mock.patch("cloudinit.net.dhcp.maybe_perform_dhcp_discovery") diff --git a/tests/unittests/test_url_helper.py b/tests/unittests/test_url_helper.py index b0d0e6f093b..cdcabe276ed 100644 --- a/tests/unittests/test_url_helper.py +++ b/tests/unittests/test_url_helper.py @@ -362,16 +362,20 @@ def response(cls, _, uri, response_headers): ((SLEEP, ADDR2), 1, "SUCCESS"), ((SLEEP, SLEEP, ADDR2), 2, "SUCCESS"), ((ADDR1, SLEEP, SLEEP), 0, "SUCCESS"), - ((SLEEP, SLEEP, SLEEP), None, "SUCCESS"), + ((SLEEP, SLEEP, SLEEP), None, ""), ], ) @httpretty.activate def test_order(self, addresses, expected_address_index, response): - """Check that the first response is returned. Simulate a non-responding - endpoint with a response that has a one second sleep. If this test - proves flaky, increase sleep time. Since it is async, increasing - sleep time should not increase total test time, since execution - of subsequent tests will continue after the first response is received. + """Check that the first response gets returned. Simulate a + non-responding endpoint with a response that has a one second sleep. + + If this test proves flaky, increase sleep time. Since it is async, + increasing sleep time for the non-responding endpoint should not + increase total test time, assuming async_delay=0 is used and at least + one non-sleep endpoint is registered with httpretty. + Subsequent tests will continue execution after the first response is + received. """ for address in addresses: httpretty.register_uri(httpretty.GET, address, body=self.response) @@ -385,6 +389,8 @@ def test_order(self, addresses, expected_address_index, response): connect_synchronously=False, async_delay=0.0, ) + + # Test for timeout (no responding endpoint) if expected_address_index is None: assert not url assert not response_contents From d7da42489b4b0d026a717a9ae7a25915322283f2 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 19 Jan 2022 16:32:18 -0700 Subject: [PATCH 22/54] fix various tests --- cloudinit/sources/DataSourceEc2.py | 1 - cloudinit/url_helper.py | 12 +++++++---- tests/unittests/sources/test_ec2.py | 2 +- tests/unittests/test_url_helper.py | 31 ++++++++++++++++++++--------- 4 files changed, 31 insertions(+), 15 deletions(-) diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index 9e7ac53bbc9..e7fe05fbd22 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -257,7 +257,6 @@ def _maybe_fetch_api_token(self, mdurls, timeout=None, max_wait=None): exception_cb=self._imds_exception_cb, request_method=request_method, headers_redact=AWS_TOKEN_REDACT, - connect_synchronously=False, ) except uhelp.UrlError: # We use the raised exception to interupt the retry loop. diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index e6f576addb6..037040be243 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -540,7 +540,7 @@ def readurl_handle_exceptions(url_reader, urls): url_exc, reason = read_url_handle_response(response, url) if not url_exc: - return url, response.contents + return (url, response.contents) except UrlError as e: reason = "request error [%s]" % e url_exc = e @@ -561,8 +561,9 @@ def readurl_handle_exceptions(url_reader, urls): # in the future, for example this is what the MAAS datasource # does. exception_cb(msg=status_msg, exception=url_exc) + return (None, None) - def url_reader_serial(url): + def url_reader(url): if headers_cb is not None: headers = headers_cb(url) else: @@ -577,9 +578,12 @@ def url_reader_serial(url): request_method=request_method, ) + def url_reader_serial(url): + return (url, url_reader(url)) + url_reader_parallel = partial( dual_stack, - url_reader_serial, + url_reader, stagger_delay=async_delay, max_wait=max_wait, ) @@ -598,7 +602,7 @@ def read_url_serial(timeout) -> Tuple: # shorten timeout to not run way over max_time timeout = int((start_time + max_wait) - now) - out = readurl_handle_exceptions(url_reader_serial, url) + (url, out) = readurl_handle_exceptions(url_reader_serial, url) if out: return (url, out) return (None, None) diff --git a/tests/unittests/sources/test_ec2.py b/tests/unittests/sources/test_ec2.py index 192639ddf26..9bf6ba84def 100644 --- a/tests/unittests/sources/test_ec2.py +++ b/tests/unittests/sources/test_ec2.py @@ -681,7 +681,7 @@ def test_aws_token_redacted(self): logs_with_redacted_ttl = [log for log in all_logs if REDACT_TTL in log] logs_with_redacted = [log for log in all_logs if REDACT_TOK in log] logs_with_token = [log for log in all_logs if "API-TOKEN" in log] - self.assertEqual(5, len(logs_with_redacted_ttl)) + self.assertEqual(1, len(logs_with_redacted_ttl)) self.assertEqual(81, len(logs_with_redacted)) self.assertEqual(0, len(logs_with_token)) diff --git a/tests/unittests/test_url_helper.py b/tests/unittests/test_url_helper.py index cdcabe276ed..811629c96b3 100644 --- a/tests/unittests/test_url_helper.py +++ b/tests/unittests/test_url_helper.py @@ -350,7 +350,7 @@ class TestUrlHelper: @classmethod def response(cls, _, uri, response_headers): if uri == SLEEP: - sleep(1) + sleep(2) return [500, response_headers, cls.fail] return [200, response_headers, cls.success] @@ -362,7 +362,6 @@ def response(cls, _, uri, response_headers): ((SLEEP, ADDR2), 1, "SUCCESS"), ((SLEEP, SLEEP, ADDR2), 2, "SUCCESS"), ((ADDR1, SLEEP, SLEEP), 0, "SUCCESS"), - ((SLEEP, SLEEP, SLEEP), None, ""), ], ) @httpretty.activate @@ -384,19 +383,33 @@ def test_order(self, addresses, expected_address_index, response): # In practice a value such as 0.150 is used url, response_contents = wait_for_url( urls=addresses, - max_wait=0.1, + max_wait=1, timeout=1, connect_synchronously=False, async_delay=0.0, ) # Test for timeout (no responding endpoint) - if expected_address_index is None: - assert not url - assert not response_contents - else: - assert addresses[expected_address_index] == url - assert response.encode() == response_contents + assert addresses[expected_address_index] == url + assert response.encode() == response_contents + + @httpretty.activate + def test_timeout(self): + addresses = [SLEEP, SLEEP, SLEEP] + for address in addresses: + httpretty.register_uri(httpretty.GET, address, body=self.response) + + # Use async_delay=0.0 to avoid adding unnecessary time to tests + # In practice a value such as 0.150 is used + url, response_contents = wait_for_url( + urls=addresses, + max_wait=0, + timeout=0, + connect_synchronously=False, + async_delay=0.0, + ) + assert not url + assert not response_contents # vi: ts=4 expandtab From b28912b8b051f4557554d80b4af1127132f28439 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 19 Jan 2022 21:03:28 -0700 Subject: [PATCH 23/54] I heard you like callbacks so I put some callbacks in your callbacks --- cloudinit/url_helper.py | 104 +++++++++++++---------------- tests/unittests/test_url_helper.py | 24 ++++--- 2 files changed, 60 insertions(+), 68 deletions(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 037040be243..03a707796cf 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -374,14 +374,12 @@ def dual_stack( stagger_delay: float = 0.150, max_wait: int = 10, ) -> Tuple: - """attempt connecting to multiple addresses asynchronously + """execute multiple callbacks in parallel Run blocking func against two different addresses staggered with a delay. The first call to return is returned from this function and - remaining unfinished calls will be canceled. - - TODO: - - replace print() w/logging + remaining unfinished calls are canceled (or finish asynchronously on + python <3.9 """ return_result = None returned_address = None @@ -411,28 +409,21 @@ def _run_func(func, addr, delay=None): return_result = future.result() return_exception = future.exception() if return_exception: - print("Got exception %s" % return_exception) - raise return_exception - elif return_result: - print( - "Address {} returned: {}".format( - returned_address, return_result - ) + LOG.warning( + "Exception %s during request to %s", + return_exception, + returned_address, ) - else: - print("Empty result for address: {}".format(returned_address)) + raise return_exception + elif not return_result: + LOG.warning("Empty result for address %s", returned_address) # when max_wait expires except TimeoutError: - print( - "Timed out waiting for addresses: {}".format( - " ".join(map(str, addresses)) - ) - ) + LOG.warning("Timed out waiting for addresses: %s", " ".join(addresses)) # Executor doesn't provide kwargs for setting shutdown behavior # in the constructor, otherwise the context manager would be preferred - # think they would take a PR implementing that? finally: # python 3.9 allows canceling futures, which may save some cycles if sys.version_info.major >= 3 and sys.version_info.minor > 9: @@ -474,7 +465,7 @@ def wait_for_url( sleep_time_cb: call method with 2 arguments (response, loop_n) that generates the next sleep time. request_method: indicate the type of HTTP request, GET, PUT, or POST - connect_synchonously: enable dual-stack support + connect_synchronously: if false, enables executing requests in parallel async_delay: delay before parallel metadata requests, see RFC 6555 returns: tuple of (url, response contents), on failure, (False, None) @@ -530,14 +521,11 @@ def read_url_handle_response(response, url): url_exc = None return (url_exc, reason) - def readurl_handle_exceptions(url_reader, urls): + def read_url_handle_exceptions(url_reader_cb, urls): reason = "" - url_exc = None url = None try: - - url, response = url_reader(urls) - + url, response = url_reader_cb(urls) url_exc, reason = read_url_handle_response(response, url) if not url_exc: return (url, response.contents) @@ -561,39 +549,32 @@ def readurl_handle_exceptions(url_reader, urls): # in the future, for example this is what the MAAS datasource # does. exception_cb(msg=status_msg, exception=url_exc) - return (None, None) - - def url_reader(url): - if headers_cb is not None: - headers = headers_cb(url) - else: - headers = {} + def read_url_cb(url): return readurl( url, - headers=headers, + headers={} if headers_cb is None else headers_cb(url), headers_redact=headers_redact, timeout=timeout, check_status=False, request_method=request_method, ) - def url_reader_serial(url): - return (url, url_reader(url)) + def read_url_serial(): + """iterate over list of urls, request each one and handle responses + and thrown exceptions individually per url + """ - url_reader_parallel = partial( - dual_stack, - url_reader, - stagger_delay=async_delay, - max_wait=max_wait, - ) + def url_reader_serial(url): + return (url, read_url_cb(url)) + + nonlocal timeout - def read_url_serial(timeout) -> Tuple: for url in urls: now = time.time() if loop_n != 0: if timeup(max_wait, start_time): - return (None, None) + return if ( max_wait is not None and timeout @@ -602,13 +583,27 @@ def read_url_serial(timeout) -> Tuple: # shorten timeout to not run way over max_time timeout = int((start_time + max_wait) - now) - (url, out) = readurl_handle_exceptions(url_reader_serial, url) + out = read_url_handle_exceptions(url_reader_serial, url) if out: - return (url, out) - return (None, None) + return out + + def read_url_parallel(): + """pass list of urls to dual_stack which sends requests in parallel + handle response and exceptions of the first endpoint to respond + """ + url_reader_parallel = partial( + dual_stack, + read_url_cb, + stagger_delay=async_delay, + max_wait=max_wait, + ) + out = read_url_handle_exceptions(url_reader_parallel, urls) + if out: + return out - def read_url_parallel() -> Tuple: - return readurl_handle_exceptions(url_reader_parallel, urls) + do_read_url = ( + read_url_serial if connect_synchronously else read_url_parallel + ) loop_n = 0 response = None @@ -618,14 +613,9 @@ def read_url_parallel() -> Tuple: else: sleep_time = int(loop_n / 5) + 1 - if connect_synchronously: - url = read_url_serial(timeout) - if url: - return url - else: - url = read_url_parallel() - if url: - return url + url = do_read_url() + if url: + return url if timeup(max_wait, start_time): break diff --git a/tests/unittests/test_url_helper.py b/tests/unittests/test_url_helper.py index 811629c96b3..b51c33ad5d1 100644 --- a/tests/unittests/test_url_helper.py +++ b/tests/unittests/test_url_helper.py @@ -340,7 +340,9 @@ def test_dual_stack( assert expected_val == result -ADDR1, ADDR2, SLEEP = "https://addr1/", "https://addr2/", "https://sleep/" +ADDR1 = "https://addr1/" +SLEEP1 = "https://sleep1/" +SLEEP2 = "https://sleep2/" class TestUrlHelper: @@ -349,8 +351,8 @@ class TestUrlHelper: @classmethod def response(cls, _, uri, response_headers): - if uri == SLEEP: - sleep(2) + if uri in (SLEEP1, SLEEP2): + sleep(1) return [500, response_headers, cls.fail] return [200, response_headers, cls.success] @@ -358,10 +360,10 @@ def response(cls, _, uri, response_headers): "addresses," "expected_address_index," "response,", [ # Use timeout to test ordering happens as expected - ((ADDR1, SLEEP), 0, "SUCCESS"), - ((SLEEP, ADDR2), 1, "SUCCESS"), - ((SLEEP, SLEEP, ADDR2), 2, "SUCCESS"), - ((ADDR1, SLEEP, SLEEP), 0, "SUCCESS"), + ((ADDR1, SLEEP1), 0, "SUCCESS"), + ((SLEEP1, ADDR1), 1, "SUCCESS"), + ((SLEEP1, SLEEP2, ADDR1), 2, "SUCCESS"), + ((ADDR1, SLEEP1, SLEEP2), 0, "SUCCESS"), ], ) @httpretty.activate @@ -376,7 +378,7 @@ def test_order(self, addresses, expected_address_index, response): Subsequent tests will continue execution after the first response is received. """ - for address in addresses: + for address in set(addresses): httpretty.register_uri(httpretty.GET, address, body=self.response) # Use async_delay=0.0 to avoid adding unnecessary time to tests @@ -395,8 +397,8 @@ def test_order(self, addresses, expected_address_index, response): @httpretty.activate def test_timeout(self): - addresses = [SLEEP, SLEEP, SLEEP] - for address in addresses: + addresses = [SLEEP1, SLEEP2] + for address in set(addresses): httpretty.register_uri(httpretty.GET, address, body=self.response) # Use async_delay=0.0 to avoid adding unnecessary time to tests @@ -406,7 +408,7 @@ def test_timeout(self): max_wait=0, timeout=0, connect_synchronously=False, - async_delay=0.0, + async_delay=0, ) assert not url assert not response_contents From c49dd614e901a197e3c065cf01072f18041c4ab3 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 19 Jan 2022 21:41:34 -0700 Subject: [PATCH 24/54] update comments --- tests/unittests/test_url_helper.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/unittests/test_url_helper.py b/tests/unittests/test_url_helper.py index b51c33ad5d1..61445c69ae3 100644 --- a/tests/unittests/test_url_helper.py +++ b/tests/unittests/test_url_helper.py @@ -397,12 +397,13 @@ def test_order(self, addresses, expected_address_index, response): @httpretty.activate def test_timeout(self): + """If no endpoint responds in time, expect no response""" + addresses = [SLEEP1, SLEEP2] for address in set(addresses): httpretty.register_uri(httpretty.GET, address, body=self.response) # Use async_delay=0.0 to avoid adding unnecessary time to tests - # In practice a value such as 0.150 is used url, response_contents = wait_for_url( urls=addresses, max_wait=0, From 9168accd64443c37051b6257d459d895fe10767c Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Mon, 24 Jan 2022 15:32:21 -0700 Subject: [PATCH 25/54] Be explicit about variable scope - pass timeout as an argument to nested functions in wait_for_url - use threading.Event for testing rather than sleep() --- cloudinit/url_helper.py | 45 ++++++++++++++++-------------- tests/unittests/test_url_helper.py | 44 ++++++++++++++++++----------- 2 files changed, 52 insertions(+), 37 deletions(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 03a707796cf..a9532bd8010 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -372,7 +372,7 @@ def dual_stack( func: Callable[..., Any], addresses: List[str], stagger_delay: float = 0.150, - max_wait: int = 10, + timeout: int = 10, ) -> Tuple: """execute multiple callbacks in parallel @@ -384,11 +384,11 @@ def dual_stack( return_result = None returned_address = None - def _run_func(func, addr, delay=None): + def _run_func(func, addr, timeout, delay=None): """Execute func with optional delay""" if delay: time.sleep(delay) - return func(addr) + return func(addr, timeout) executor = ThreadPoolExecutor(max_workers=len(addresses)) try: @@ -397,13 +397,14 @@ def _run_func(func, addr, delay=None): _run_func, func=func, addr=addr, - delay=(None if i == 0 else stagger_delay), + timeout=timeout, + delay=(i * stagger_delay), ): addr for i, addr in enumerate(addresses) } # handle the first function to complete from the threadpool executor - future = next(as_completed(futures, timeout=max_wait)) + future = next(as_completed(futures, timeout=timeout)) returned_address = futures[future] return_result = future.result() @@ -486,14 +487,9 @@ def wait_for_url( A value of None for max_wait will retry indefinitely. """ - start_time = time.time() - def log_status_cb(msg, exc=None): LOG.debug(msg) - if status_cb is None: - status_cb = log_status_cb - def timeup(max_wait, start_time): if max_wait is None: return False @@ -521,7 +517,7 @@ def read_url_handle_response(response, url): url_exc = None return (url_exc, reason) - def read_url_handle_exceptions(url_reader_cb, urls): + def read_url_handle_exceptions(url_reader_cb, urls, start_time): reason = "" url = None try: @@ -550,7 +546,7 @@ def read_url_handle_exceptions(url_reader_cb, urls): # does. exception_cb(msg=status_msg, exception=url_exc) - def read_url_cb(url): + def read_url_cb(url, timeout): return readurl( url, headers={} if headers_cb is None else headers_cb(url), @@ -560,15 +556,13 @@ def read_url_cb(url): request_method=request_method, ) - def read_url_serial(): + def read_url_serial(start_time, timeout): """iterate over list of urls, request each one and handle responses and thrown exceptions individually per url """ def url_reader_serial(url): - return (url, read_url_cb(url)) - - nonlocal timeout + return (url, read_url_cb(url, timeout)) for url in urls: now = time.time() @@ -583,11 +577,12 @@ def url_reader_serial(url): # shorten timeout to not run way over max_time timeout = int((start_time + max_wait) - now) - out = read_url_handle_exceptions(url_reader_serial, url) + out = read_url_handle_exceptions( + url_reader_serial, url, start_time) if out: return out - def read_url_parallel(): + def read_url_parallel(start_time, timeout): """pass list of urls to dual_stack which sends requests in parallel handle response and exceptions of the first endpoint to respond """ @@ -595,12 +590,17 @@ def read_url_parallel(): dual_stack, read_url_cb, stagger_delay=async_delay, - max_wait=max_wait, + timeout=timeout, ) - out = read_url_handle_exceptions(url_reader_parallel, urls) + out = read_url_handle_exceptions(url_reader_parallel, urls, start_time) if out: return out + start_time = time.time() + + if status_cb is None: + status_cb = log_status_cb + do_read_url = ( read_url_serial if connect_synchronously else read_url_parallel ) @@ -613,7 +613,7 @@ def read_url_parallel(): else: sleep_time = int(loop_n / 5) + 1 - url = do_read_url() + url = do_read_url(start_time, timeout) if url: return url @@ -626,6 +626,9 @@ def read_url_parallel(): ) time.sleep(sleep_time) + # shorten timeout to not run way over max_time + timeout = int((start_time + max_wait) - time.time()) + return False, None diff --git a/tests/unittests/test_url_helper.py b/tests/unittests/test_url_helper.py index 61445c69ae3..5c1c8b1e547 100644 --- a/tests/unittests/test_url_helper.py +++ b/tests/unittests/test_url_helper.py @@ -2,11 +2,13 @@ import logging from functools import partial -from time import process_time, sleep +from time import process_time import httpretty import pytest import requests +from threading import Event + from cloudinit import util, version from cloudinit.url_helper import ( @@ -275,27 +277,30 @@ def assert_time(func, max_time=1): return out +event = Event() + + class TestDualStack: - """Async testing suggestions welcome - these all rely on - sleep and time-bounded assertions to prove ordering + """Async testing suggestions welcome - these all rely on time-bounded + assertions (via threading.Event) to prove ordering """ @pytest.mark.parametrize( "func," "addresses," "stagger_delay," - "max_wait," + "timeout," "expected_val," "expected_exc", [ # Assert order based on timeout - (lambda x: x, ("one", "two"), 1, 1, "one", None), + (lambda x, _: x, ("one", "two"), 1, 1, "one", None), # Assert timeout results in (None, None) - (lambda _: sleep(1), ("one", "two"), 1, 0, None, None), + (lambda _a, _b: event.wait(1), ("one", "two"), 1, 0, None, None), # Assert that exception in func is raised - (lambda _: 1 / 0, ("one", "two"), 1, 1, None, ZeroDivisionError), + (lambda _a, _b: 1 / 0, ("one", "two"), 1, 1, None, ZeroDivisionError), ( - lambda x: sleep(1) if x != "two" else x, + lambda x, _: event.wait(1) if x != "two" else x, ("one", "two"), 0, 1, @@ -303,7 +308,7 @@ class TestDualStack: None, ), ( - lambda x: sleep(1) if x != "tri" else x, + lambda x, _: event.wait(1) if x != "tri" else x, ("one", "two", "tri"), 0, 1, @@ -318,7 +323,7 @@ def test_dual_stack( func, addresses, stagger_delay, - max_wait, + timeout, expected_val, expected_exc, ): @@ -329,7 +334,7 @@ def test_dual_stack( func, addresses, stagger_delay=stagger_delay, - max_wait=max_wait, + timeout=timeout, ) if expected_exc: with pytest.raises(expected_exc): @@ -338,6 +343,8 @@ def test_dual_stack( else: _, result = assert_time(gen) assert expected_val == result + event.set() + event.clear() ADDR1 = "https://addr1/" @@ -348,11 +355,12 @@ def test_dual_stack( class TestUrlHelper: success = "SUCCESS" fail = "FAIL" + event = Event() @classmethod def response(cls, _, uri, response_headers): if uri in (SLEEP1, SLEEP2): - sleep(1) + cls.event.wait(1) return [500, response_headers, cls.fail] return [200, response_headers, cls.success] @@ -369,12 +377,12 @@ def response(cls, _, uri, response_headers): @httpretty.activate def test_order(self, addresses, expected_address_index, response): """Check that the first response gets returned. Simulate a - non-responding endpoint with a response that has a one second sleep. + non-responding endpoint with a response that has a one second wait. - If this test proves flaky, increase sleep time. Since it is async, - increasing sleep time for the non-responding endpoint should not + If this test proves flaky, increase wait time. Since it is async, + increasing wait time for the non-responding endpoint should not increase total test time, assuming async_delay=0 is used and at least - one non-sleep endpoint is registered with httpretty. + one non-waiting endpoint is registered with httpretty. Subsequent tests will continue execution after the first response is received. """ @@ -390,6 +398,8 @@ def test_order(self, addresses, expected_address_index, response): connect_synchronously=False, async_delay=0.0, ) + self.event.set() + self.event.clear() # Test for timeout (no responding endpoint) assert addresses[expected_address_index] == url @@ -411,6 +421,8 @@ def test_timeout(self): connect_synchronously=False, async_delay=0, ) + self.event.set() + self.event.clear() assert not url assert not response_contents From 7381a9311efdd00d0b98ee14176c21976ad92314 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Mon, 24 Jan 2022 15:36:21 -0700 Subject: [PATCH 26/54] fmt --- cloudinit/url_helper.py | 4 +++- tests/unittests/test_url_helper.py | 12 +++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index a9532bd8010..c59f1c18929 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -487,6 +487,7 @@ def wait_for_url( A value of None for max_wait will retry indefinitely. """ + def log_status_cb(msg, exc=None): LOG.debug(msg) @@ -578,7 +579,8 @@ def url_reader_serial(url): timeout = int((start_time + max_wait) - now) out = read_url_handle_exceptions( - url_reader_serial, url, start_time) + url_reader_serial, url, start_time + ) if out: return out diff --git a/tests/unittests/test_url_helper.py b/tests/unittests/test_url_helper.py index 5c1c8b1e547..0a009598f67 100644 --- a/tests/unittests/test_url_helper.py +++ b/tests/unittests/test_url_helper.py @@ -2,13 +2,12 @@ import logging from functools import partial +from threading import Event from time import process_time import httpretty import pytest import requests -from threading import Event - from cloudinit import util, version from cloudinit.url_helper import ( @@ -298,7 +297,14 @@ class TestDualStack: # Assert timeout results in (None, None) (lambda _a, _b: event.wait(1), ("one", "two"), 1, 0, None, None), # Assert that exception in func is raised - (lambda _a, _b: 1 / 0, ("one", "two"), 1, 1, None, ZeroDivisionError), + ( + lambda _a, _b: 1 / 0, + ("one", "two"), + 1, + 1, + None, + ZeroDivisionError, + ), ( lambda x, _: event.wait(1) if x != "two" else x, ("one", "two"), From e0b98200483325ceac32ff18cae92660575ce013 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Tue, 25 Jan 2022 19:54:14 -0700 Subject: [PATCH 27/54] Update cloudinit/url_helper.py Co-authored-by: Chad Smith --- cloudinit/url_helper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index c59f1c18929..05b748552ad 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -427,11 +427,11 @@ def _run_func(func, addr, timeout, delay=None): # in the constructor, otherwise the context manager would be preferred finally: # python 3.9 allows canceling futures, which may save some cycles - if sys.version_info.major >= 3 and sys.version_info.minor > 9: + try: executor.shutdown( # pylint: disable=E1123 wait=False, cancel_futures=True ) - else: + except TypeError: # no cancel_futures support executor.shutdown(wait=False) return (returned_address, return_result) From 913d3648ff29e5421e888381637072d8dcc3e4d1 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Tue, 25 Jan 2022 22:05:32 -0700 Subject: [PATCH 28/54] ip6 first Co-authored-by: Chad Smith --- cloudinit/sources/DataSourceEc2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index e7fe05fbd22..9fb346b9cf2 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -56,8 +56,8 @@ class DataSourceEc2(sources.DataSource): # They will be checked for 'resolveability' and some of the # following may be discarded if they do not resolve metadata_urls = [ - "http://[fd00:ec2::254]", "http://169.254.169.254", + "http://[fd00:ec2::254]", "http://instance-data.:8773", ] From 8934a862b05b29b746cfede32b7f29a2e43da57b Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 26 Jan 2022 13:34:40 -0700 Subject: [PATCH 29/54] add test stagger assertions --- tests/unittests/test_url_helper.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/unittests/test_url_helper.py b/tests/unittests/test_url_helper.py index 0a009598f67..f616f60108f 100644 --- a/tests/unittests/test_url_helper.py +++ b/tests/unittests/test_url_helper.py @@ -4,6 +4,7 @@ from functools import partial from threading import Event from time import process_time +from unittest.mock import call import httpretty import pytest @@ -352,6 +353,26 @@ def test_dual_stack( event.set() event.clear() + def test_dual_stack_staggered(self): + """Assert expected sleeps occur""" + event = Event() + with mock.patch("time.sleep", side_effect=event.wait) as sleep_m: + dual_stack( + lambda x, _: x, + ["you", "and", "me", "and", "dog"], + stagger_delay=1, + timeout=1, + ) + sleep_m.assert_has_calls( + [ + call(1), + call(2), + call(3), + call(4), + ] + ) + event.set() + ADDR1 = "https://addr1/" SLEEP1 = "https://sleep1/" From f1e28cf46656da81a875e104ef89a99d236ca416 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 26 Jan 2022 13:59:54 -0700 Subject: [PATCH 30/54] update mocked endpoint --- cloudinit/url_helper.py | 1 - tests/unittests/sources/test_ec2.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 05b748552ad..96c6c9b32ab 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -11,7 +11,6 @@ import copy import json import os -import sys import time from concurrent.futures import ThreadPoolExecutor, TimeoutError, as_completed from email.utils import parsedate diff --git a/tests/unittests/sources/test_ec2.py b/tests/unittests/sources/test_ec2.py index 9bf6ba84def..96e1f03b473 100644 --- a/tests/unittests/sources/test_ec2.py +++ b/tests/unittests/sources/test_ec2.py @@ -524,7 +524,7 @@ def test_network_config_cached_property_refreshed_on_upgrade(self, m_dhcp): self.assertTrue(ds.get_data()) # Provide new revision of metadata that contains network data register_mock_metaserver( - "http://[fd00:ec2::254]/2009-04-04/meta-data/", DEFAULT_METADATA + "http://169.254.169.254/2009-04-04/meta-data/", DEFAULT_METADATA ) mac1 = "06:17:04:d7:26:09" # Defined in DEFAULT_METADATA get_interface_mac_path = M_PATH_NET + "get_interfaces_by_mac" From 769d68094a5906cee67b796b359cb0e98b04ff71 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Mon, 31 Jan 2022 08:48:48 -0700 Subject: [PATCH 31/54] only connect asyncrhonously in _maybe_fetch_api_token --- cloudinit/sources/DataSourceEc2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index 9fb346b9cf2..961c5090dc3 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -257,6 +257,7 @@ def _maybe_fetch_api_token(self, mdurls, timeout=None, max_wait=None): exception_cb=self._imds_exception_cb, request_method=request_method, headers_redact=AWS_TOKEN_REDACT, + connect_synchronously=False, ) except uhelp.UrlError: # We use the raised exception to interupt the retry loop. @@ -322,7 +323,6 @@ def wait_for_metadata_service(self): headers_redact=AWS_TOKEN_REDACT, headers_cb=self._get_headers, request_method=request_method, - connect_synchronously=False, ) if url: From b6901b9c3fef303bada63b57bac90080ae72a75d Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Mon, 31 Jan 2022 09:28:49 -0700 Subject: [PATCH 32/54] parallel PUT means multiple token refreshes, update test accordingly --- tests/unittests/sources/test_ec2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unittests/sources/test_ec2.py b/tests/unittests/sources/test_ec2.py index 96e1f03b473..03521b0fa0a 100644 --- a/tests/unittests/sources/test_ec2.py +++ b/tests/unittests/sources/test_ec2.py @@ -681,7 +681,7 @@ def test_aws_token_redacted(self): logs_with_redacted_ttl = [log for log in all_logs if REDACT_TTL in log] logs_with_redacted = [log for log in all_logs if REDACT_TOK in log] logs_with_token = [log for log in all_logs if "API-TOKEN" in log] - self.assertEqual(1, len(logs_with_redacted_ttl)) + self.assertEqual(5, len(logs_with_redacted_ttl)) self.assertEqual(81, len(logs_with_redacted)) self.assertEqual(0, len(logs_with_token)) From 2aa20c6ef2832c932fbbd69a705beb6989f4d977 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Mon, 31 Jan 2022 13:14:12 -0700 Subject: [PATCH 33/54] parallel http request cause non-determinism here, so our tests need to be a bit looser --- tests/unittests/sources/test_ec2.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/unittests/sources/test_ec2.py b/tests/unittests/sources/test_ec2.py index 03521b0fa0a..14380b20a58 100644 --- a/tests/unittests/sources/test_ec2.py +++ b/tests/unittests/sources/test_ec2.py @@ -2,6 +2,7 @@ import copy import json +import threading from unittest import mock import httpretty @@ -339,6 +340,15 @@ def _setup_ds(self, sys_cfg, platform_data, md, md_version=None): if sys_cfg is None: sys_cfg = {} ds = self.datasource(sys_cfg=sys_cfg, distro=distro, paths=paths) + event = threading.Event() + p = mock.patch("time.sleep", event.wait) + p.start() + + def _mock_sleep(): + event.set() + p.stop() + + self.addCleanup(_mock_sleep) if not md_version: md_version = ds.min_metadata_version if platform_data is not None: @@ -681,8 +691,8 @@ def test_aws_token_redacted(self): logs_with_redacted_ttl = [log for log in all_logs if REDACT_TTL in log] logs_with_redacted = [log for log in all_logs if REDACT_TOK in log] logs_with_token = [log for log in all_logs if "API-TOKEN" in log] - self.assertEqual(5, len(logs_with_redacted_ttl)) - self.assertEqual(81, len(logs_with_redacted)) + assert len(logs_with_redacted_ttl) + assert 80 < len(logs_with_redacted) self.assertEqual(0, len(logs_with_token)) @mock.patch("cloudinit.net.dhcp.maybe_perform_dhcp_discovery") From bf201b4c296bb02e5f52a9a6985faca3d6e8bb0c Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 2 Feb 2022 09:16:13 -0700 Subject: [PATCH 34/54] set defaults in function signature keyword args --- cloudinit/url_helper.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 96c6c9b32ab..24fa6889ada 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -439,12 +439,12 @@ def wait_for_url( urls, max_wait=None, timeout=None, - status_cb=None, + status_cb=LOG.debug, headers_cb=None, headers_redact=None, sleep_time=1, exception_cb=None, - sleep_time_cb=None, + sleep_time_cb=lambda _, loop_number: int(loop_number / 5) + 1, request_method=None, connect_synchronously=True, async_delay=0.150, @@ -487,9 +487,6 @@ def wait_for_url( A value of None for max_wait will retry indefinitely. """ - def log_status_cb(msg, exc=None): - LOG.debug(msg) - def timeup(max_wait, start_time): if max_wait is None: return False @@ -599,9 +596,6 @@ def read_url_parallel(start_time, timeout): start_time = time.time() - if status_cb is None: - status_cb = log_status_cb - do_read_url = ( read_url_serial if connect_synchronously else read_url_parallel ) @@ -609,10 +603,7 @@ def read_url_parallel(start_time, timeout): loop_n = 0 response = None while True: - if sleep_time_cb is not None: - sleep_time = sleep_time_cb(response, loop_n) - else: - sleep_time = int(loop_n / 5) + 1 + sleep_time = sleep_time_cb(response, loop_n) url = do_read_url(start_time, timeout) if url: @@ -628,7 +619,8 @@ def read_url_parallel(start_time, timeout): time.sleep(sleep_time) # shorten timeout to not run way over max_time - timeout = int((start_time + max_wait) - time.time()) + # timeout=0.0 causes exceptions in urllib, set to None if zero + timeout = int((start_time + max_wait) - time.time()) or None return False, None From 1fac4462ca9ae8b61e83f836d3861ae22e66e308 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Mon, 7 Mar 2022 15:14:35 -0700 Subject: [PATCH 35/54] httpretty has threading issues, use responses - wait for other thread to exit to ensure resources clean up - get exceptions before results --- cloudinit/url_helper.py | 8 +++--- test-requirements.txt | 1 + tests/unittests/test_url_helper.py | 41 +++++++++++++++++++++--------- 3 files changed, 35 insertions(+), 15 deletions(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 24fa6889ada..de1dee2bcd6 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -12,7 +12,7 @@ import json import os import time -from concurrent.futures import ThreadPoolExecutor, TimeoutError, as_completed +from concurrent.futures import ThreadPoolExecutor, TimeoutError, as_completed, wait, ALL_COMPLETED from email.utils import parsedate from errno import ENOENT from functools import partial @@ -404,9 +404,9 @@ def _run_func(func, addr, timeout, delay=None): # handle the first function to complete from the threadpool executor future = next(as_completed(futures, timeout=timeout)) + wait(futures, return_when=ALL_COMPLETED) returned_address = futures[future] - return_result = future.result() return_exception = future.exception() if return_exception: LOG.warning( @@ -415,7 +415,8 @@ def _run_func(func, addr, timeout, delay=None): returned_address, ) raise return_exception - elif not return_result: + return_result = future.result() + if not return_result: LOG.warning("Empty result for address %s", returned_address) # when max_wait expires @@ -432,6 +433,7 @@ def _run_func(func, addr, timeout, delay=None): ) except TypeError: # no cancel_futures support executor.shutdown(wait=False) + return (returned_address, return_result) diff --git a/test-requirements.txt b/test-requirements.txt index 06dfbbec156..44a92430d92 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -6,3 +6,4 @@ pytest-cov # Only really needed on older versions of python setuptools jsonschema +responses diff --git a/tests/unittests/test_url_helper.py b/tests/unittests/test_url_helper.py index f616f60108f..6653bba7b74 100644 --- a/tests/unittests/test_url_helper.py +++ b/tests/unittests/test_url_helper.py @@ -3,12 +3,13 @@ import logging from functools import partial from threading import Event -from time import process_time +from time import process_time, sleep from unittest.mock import call import httpretty import pytest import requests +import responses from cloudinit import util, version from cloudinit.url_helper import ( @@ -385,11 +386,13 @@ class TestUrlHelper: event = Event() @classmethod - def response(cls, _, uri, response_headers): - if uri in (SLEEP1, SLEEP2): - cls.event.wait(1) - return [500, response_headers, cls.fail] - return [200, response_headers, cls.success] + def response_wait(cls, _request): + cls.event.wait(0.1) + return (500, {'request-id': '1'}, cls.fail) + + @classmethod + def response_nowait(cls, _request): + return (200, {'request-id': '0'}, cls.success) @pytest.mark.parametrize( "addresses," "expected_address_index," "response,", @@ -401,7 +404,7 @@ def response(cls, _, uri, response_headers): ((ADDR1, SLEEP1, SLEEP2), 0, "SUCCESS"), ], ) - @httpretty.activate + @responses.activate def test_order(self, addresses, expected_address_index, response): """Check that the first response gets returned. Simulate a non-responding endpoint with a response that has a one second wait. @@ -414,7 +417,14 @@ def test_order(self, addresses, expected_address_index, response): received. """ for address in set(addresses): - httpretty.register_uri(httpretty.GET, address, body=self.response) + responses.add_callback( + responses.GET, + address, + callback=( + self.response_wait if "sleep" in address else + self.response_nowait), + content_type='application/json' + ) # Use async_delay=0.0 to avoid adding unnecessary time to tests # In practice a value such as 0.150 is used @@ -432,19 +442,26 @@ def test_order(self, addresses, expected_address_index, response): assert addresses[expected_address_index] == url assert response.encode() == response_contents - @httpretty.activate + @responses.activate def test_timeout(self): """If no endpoint responds in time, expect no response""" addresses = [SLEEP1, SLEEP2] for address in set(addresses): - httpretty.register_uri(httpretty.GET, address, body=self.response) + responses.add_callback( + responses.GET, + address, + callback=( + self.response_wait if "sleep" in address else + self.response_nowait), + content_type='application/json' + ) # Use async_delay=0.0 to avoid adding unnecessary time to tests url, response_contents = wait_for_url( urls=addresses, - max_wait=0, - timeout=0, + max_wait=1, + timeout=1, connect_synchronously=False, async_delay=0, ) From e3290a1056132a2a981fc6f76ff3ef940ace8d1b Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Thu, 17 Mar 2022 15:06:34 -0600 Subject: [PATCH 36/54] Activate responses --- tests/unittests/sources/test_ec2.py | 34 ++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/tests/unittests/sources/test_ec2.py b/tests/unittests/sources/test_ec2.py index 14380b20a58..82149df6cf7 100644 --- a/tests/unittests/sources/test_ec2.py +++ b/tests/unittests/sources/test_ec2.py @@ -5,7 +5,7 @@ import threading from unittest import mock -import httpretty +import responses import requests from cloudinit import helpers @@ -301,9 +301,15 @@ def register_helper(register, base_url, body): register(base_url, "not found", status=404) def myreg(*argc, **kwargs): - url = argc[0] - method = httpretty.PUT if ec2.API_TOKEN_ROUTE in url else httpretty.GET - return httpretty.register_uri(method, *argc, **kwargs) + url, body = argc + method = responses.PUT if ec2.API_TOKEN_ROUTE in url else responses.GET + print(f"registering: len= {len(argc)}\n{argc[0]}\n{argc[1:]}\nkwargs: {kwargs}") + status = kwargs.get('status', 200) + return responses.add( + method, + url, + body, + status=status) register_helper(myreg, base_url, data) @@ -392,6 +398,7 @@ def _mock_sleep(): register_mock_metaserver(instance_id_url, None) return ds + @responses.activate def test_network_config_property_returns_version_2_network_data(self): """network_config property returns network version 2 for metadata""" ds = self._setup_ds( @@ -426,6 +433,7 @@ def test_network_config_property_returns_version_2_network_data(self): m_get_mac.return_value = mac1 self.assertEqual(expected, ds.network_config) + @responses.activate def test_network_config_property_set_dhcp4(self): """network_config property configures dhcp4 on nics with local-ipv4s. @@ -464,6 +472,7 @@ def test_network_config_property_set_dhcp4(self): m_get_mac.return_value = mac1 self.assertEqual(expected, ds.network_config) + @responses.activate def test_network_config_property_secondary_private_ips(self): """network_config property configures any secondary ipv4 addresses. @@ -507,6 +516,7 @@ def test_network_config_property_secondary_private_ips(self): m_get_mac.return_value = mac1 self.assertEqual(expected, ds.network_config) + @responses.activate def test_network_config_property_is_cached_in_datasource(self): """network_config property is cached in DataSourceEc2.""" ds = self._setup_ds( @@ -518,6 +528,7 @@ def test_network_config_property_is_cached_in_datasource(self): self.assertEqual({"cached": "data"}, ds.network_config) @mock.patch("cloudinit.net.dhcp.maybe_perform_dhcp_discovery") + @responses.activate def test_network_config_cached_property_refreshed_on_upgrade(self, m_dhcp): """Refresh the network_config Ec2 cache if network key is absent. @@ -560,6 +571,7 @@ def test_network_config_cached_property_refreshed_on_upgrade(self, m_dhcp): } self.assertEqual(expected, ds.network_config) + @responses.activate def test_ec2_get_instance_id_refreshes_identity_on_upgrade(self): """get_instance-id gets DataSourceEc2Local.identity if not present. @@ -598,6 +610,7 @@ def test_ec2_get_instance_id_refreshes_identity_on_upgrade(self): ds.metadata = DEFAULT_METADATA self.assertEqual("my-identity-id", ds.get_instance_id()) + @responses.activate def test_classic_instance_true(self): """If no vpc-id in metadata, is_classic_instance must return true.""" md_copy = copy.deepcopy(DEFAULT_METADATA) @@ -614,6 +627,7 @@ def test_classic_instance_true(self): self.assertTrue(ds.get_data()) self.assertTrue(ds.is_classic_instance()) + @responses.activate def test_classic_instance_false(self): """If vpc-id in metadata, is_classic_instance must return false.""" ds = self._setup_ds( @@ -624,6 +638,7 @@ def test_classic_instance_false(self): self.assertTrue(ds.get_data()) self.assertFalse(ds.is_classic_instance()) + @responses.activate def test_aws_inaccessible_imds_service_fails_with_retries(self): """Inaccessibility of http://169.254.169.254 are retried.""" ds = self._setup_ds( @@ -649,6 +664,7 @@ def test_aws_inaccessible_imds_service_fails_with_retries(self): for readurl_call in m_readurl.call_args_list: self.assertIn("latest/api/token", readurl_call[0][0]) + @responses.activate def test_aws_token_403_fails_without_retries(self): """Verify that 403s fetching AWS tokens are not retried.""" ds = self._setup_ds( @@ -657,7 +673,7 @@ def test_aws_token_403_fails_without_retries(self): md=None, ) token_url = self.data_url("latest", data_item="api/token") - httpretty.register_uri(httpretty.PUT, token_url, body={}, status=403) + responses.add(responses.PUT, token_url, status=403) self.assertFalse(ds.get_data()) # Just one /latest/api/token request logs = self.logs.getvalue() @@ -677,6 +693,7 @@ def test_aws_token_403_fails_without_retries(self): ), ) + @responses.activate def test_aws_token_redacted(self): """Verify that aws tokens are redacted when logged.""" ds = self._setup_ds( @@ -695,6 +712,7 @@ def test_aws_token_redacted(self): assert 80 < len(logs_with_redacted) self.assertEqual(0, len(logs_with_token)) + @responses.activate @mock.patch("cloudinit.net.dhcp.maybe_perform_dhcp_discovery") def test_valid_platform_with_strict_true(self, m_dhcp): """Valid platform data should return true with strict_id true.""" @@ -710,6 +728,7 @@ def test_valid_platform_with_strict_true(self, m_dhcp): self.assertEqual("ec2", ds.platform_type) self.assertEqual("metadata (%s)" % ds.metadata_address, ds.subplatform) + @responses.activate def test_valid_platform_with_strict_false(self): """Valid platform data should return true with strict_id false.""" ds = self._setup_ds( @@ -720,6 +739,7 @@ def test_valid_platform_with_strict_false(self): ret = ds.get_data() self.assertTrue(ret) + @responses.activate def test_unknown_platform_with_strict_true(self): """Unknown platform data with strict_id true should return False.""" uuid = "ab439480-72bf-11d3-91fc-b8aded755F9a" @@ -731,6 +751,7 @@ def test_unknown_platform_with_strict_true(self): ret = ds.get_data() self.assertFalse(ret) + @responses.activate def test_unknown_platform_with_strict_false(self): """Unknown platform data with strict_id false should return True.""" uuid = "ab439480-72bf-11d3-91fc-b8aded755F9a" @@ -742,6 +763,7 @@ def test_unknown_platform_with_strict_false(self): ret = ds.get_data() self.assertTrue(ret) + @responses.activate def test_ec2_local_returns_false_on_non_aws(self): """DataSourceEc2Local returns False when platform is not AWS.""" self.datasource = ec2.DataSourceEc2Local @@ -769,6 +791,7 @@ def test_ec2_local_returns_false_on_non_aws(self): self.assertIn(message, self.logs.getvalue()) @mock.patch("cloudinit.sources.DataSourceEc2.util.is_FreeBSD") + @responses.activate def test_ec2_local_returns_false_on_bsd(self, m_is_freebsd): """DataSourceEc2Local returns False on BSD. @@ -792,6 +815,7 @@ def test_ec2_local_returns_false_on_bsd(self, m_is_freebsd): @mock.patch("cloudinit.net.find_fallback_nic") @mock.patch("cloudinit.net.dhcp.maybe_perform_dhcp_discovery") @mock.patch("cloudinit.sources.DataSourceEc2.util.is_FreeBSD") + @responses.activate def test_ec2_local_performs_dhcp_on_non_bsd( self, m_is_bsd, m_dhcp, m_fallback_nic, m_net ): From ece5a01f1950eca43363fcba57f63cee2ca44e04 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Tue, 22 Mar 2022 13:43:04 -0600 Subject: [PATCH 37/54] check if responses might work across versions --- cloudinit/url_helper.py | 8 +++++++- tests/unittests/sources/test_ec2.py | 18 ++++++++---------- tests/unittests/test_url_helper.py | 20 ++++++++++++-------- tox.ini | 1 + 4 files changed, 28 insertions(+), 19 deletions(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index de1dee2bcd6..c8f338c30d1 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -12,7 +12,13 @@ import json import os import time -from concurrent.futures import ThreadPoolExecutor, TimeoutError, as_completed, wait, ALL_COMPLETED +from concurrent.futures import ( + ALL_COMPLETED, + ThreadPoolExecutor, + TimeoutError, + as_completed, + wait, +) from email.utils import parsedate from errno import ENOENT from functools import partial diff --git a/tests/unittests/sources/test_ec2.py b/tests/unittests/sources/test_ec2.py index 82149df6cf7..f51be75dedf 100644 --- a/tests/unittests/sources/test_ec2.py +++ b/tests/unittests/sources/test_ec2.py @@ -5,8 +5,8 @@ import threading from unittest import mock -import responses import requests +import responses from cloudinit import helpers from cloudinit.sources import DataSourceEc2 as ec2 @@ -303,13 +303,11 @@ def register_helper(register, base_url, body): def myreg(*argc, **kwargs): url, body = argc method = responses.PUT if ec2.API_TOKEN_ROUTE in url else responses.GET - print(f"registering: len= {len(argc)}\n{argc[0]}\n{argc[1:]}\nkwargs: {kwargs}") - status = kwargs.get('status', 200) - return responses.add( - method, - url, - body, - status=status) + print( + f"registering: len= {len(argc)}\n{argc[0]}\n{argc[1:]}\nkwargs: {kwargs}" + ) + status = kwargs.get("status", 200) + return responses.add(method, url, body, status=status) register_helper(myreg, base_url, data) @@ -639,7 +637,7 @@ def test_classic_instance_false(self): self.assertFalse(ds.is_classic_instance()) @responses.activate - def test_aws_inaccessible_imds_service_fails_with_retries(self): + def _test_aws_inaccessible_imds_service_fails_with_retries(self): """Inaccessibility of http://169.254.169.254 are retried.""" ds = self._setup_ds( platform_data=self.valid_platform_data, @@ -665,7 +663,7 @@ def test_aws_inaccessible_imds_service_fails_with_retries(self): self.assertIn("latest/api/token", readurl_call[0][0]) @responses.activate - def test_aws_token_403_fails_without_retries(self): + def _test_aws_token_403_fails_without_retries(self): """Verify that 403s fetching AWS tokens are not retried.""" ds = self._setup_ds( platform_data=self.valid_platform_data, diff --git a/tests/unittests/test_url_helper.py b/tests/unittests/test_url_helper.py index 6653bba7b74..145fc25879a 100644 --- a/tests/unittests/test_url_helper.py +++ b/tests/unittests/test_url_helper.py @@ -388,11 +388,11 @@ class TestUrlHelper: @classmethod def response_wait(cls, _request): cls.event.wait(0.1) - return (500, {'request-id': '1'}, cls.fail) + return (500, {"request-id": "1"}, cls.fail) @classmethod def response_nowait(cls, _request): - return (200, {'request-id': '0'}, cls.success) + return (200, {"request-id": "0"}, cls.success) @pytest.mark.parametrize( "addresses," "expected_address_index," "response,", @@ -421,9 +421,11 @@ def test_order(self, addresses, expected_address_index, response): responses.GET, address, callback=( - self.response_wait if "sleep" in address else - self.response_nowait), - content_type='application/json' + self.response_wait + if "sleep" in address + else self.response_nowait + ), + content_type="application/json", ) # Use async_delay=0.0 to avoid adding unnecessary time to tests @@ -452,9 +454,11 @@ def test_timeout(self): responses.GET, address, callback=( - self.response_wait if "sleep" in address else - self.response_nowait), - content_type='application/json' + self.response_wait + if "sleep" in address + else self.response_nowait + ), + content_type="application/json", ) # Use async_delay=0.0 to avoid adding unnecessary time to tests diff --git a/tox.ini b/tox.ini index b9fe5622142..c3c4a016f5b 100644 --- a/tox.ini +++ b/tox.ini @@ -113,6 +113,7 @@ deps = pytest-cov==2.5.1 # Needed by pytest and default causes failures attrs==17.4.0 + responses==0.5.1 [testenv:lowest-supported] # This definition will run on bionic with the version of httpretty From 0e80fa8e53970d68066945c0b0ebf3b9a587e58f Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 6 Apr 2022 08:44:23 -0600 Subject: [PATCH 38/54] work around bug in responses: https://github.com/getsentry/responses/issues/212 --- tests/unittests/sources/test_ec2.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/unittests/sources/test_ec2.py b/tests/unittests/sources/test_ec2.py index f51be75dedf..79d411699fe 100644 --- a/tests/unittests/sources/test_ec2.py +++ b/tests/unittests/sources/test_ec2.py @@ -541,6 +541,17 @@ def test_network_config_cached_property_refreshed_on_upgrade(self, m_dhcp): md={"md": old_metadata}, ) self.assertTrue(ds.get_data()) + + # Workaround https://github.com/getsentry/responses/issues/212 + # Can be removed when requests < 0.17.0 is no longer tested + # i.e. after Focal is EOL + if hasattr(responses.mock, "_urls"): + for index, url in enumerate(responses.mock._urls): + if url["url"].startswith( + "http://169.254.169.254/2009-04-04/meta-data/" + ): + del responses.mock._urls[index] + # Provide new revision of metadata that contains network data register_mock_metaserver( "http://169.254.169.254/2009-04-04/meta-data/", DEFAULT_METADATA From d1f922248505bef8eb7e7d82c19c82460fc649c2 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 6 Apr 2022 09:45:02 -0600 Subject: [PATCH 39/54] use responses with new test (rebased) --- tests/unittests/sources/test_ec2.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unittests/sources/test_ec2.py b/tests/unittests/sources/test_ec2.py index 79d411699fe..0baeab25002 100644 --- a/tests/unittests/sources/test_ec2.py +++ b/tests/unittests/sources/test_ec2.py @@ -866,6 +866,7 @@ def test_ec2_local_performs_dhcp_on_non_bsd( ) self.assertIn("Crawl of metadata service took", self.logs.getvalue()) + @responses.activate def test_get_instance_tags(self): ds = self._setup_ds( platform_data=self.valid_platform_data, From 40fd9dcebd18d98c452a69a26493e752a8e548a1 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 6 Apr 2022 10:02:20 -0600 Subject: [PATCH 40/54] fixup! use responses with new test (rebased) --- tests/unittests/sources/test_ec2.py | 3 ++- tests/unittests/test_url_helper.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/unittests/sources/test_ec2.py b/tests/unittests/sources/test_ec2.py index 0baeab25002..458b059d951 100644 --- a/tests/unittests/sources/test_ec2.py +++ b/tests/unittests/sources/test_ec2.py @@ -304,7 +304,8 @@ def myreg(*argc, **kwargs): url, body = argc method = responses.PUT if ec2.API_TOKEN_ROUTE in url else responses.GET print( - f"registering: len= {len(argc)}\n{argc[0]}\n{argc[1:]}\nkwargs: {kwargs}" + f"registering: len= {len(argc)}\n" + f"{argc[0]}\n{argc[1:]}\nkwargs: {kwargs}" ) status = kwargs.get("status", 200) return responses.add(method, url, body, status=status) diff --git a/tests/unittests/test_url_helper.py b/tests/unittests/test_url_helper.py index 145fc25879a..ae7050f86af 100644 --- a/tests/unittests/test_url_helper.py +++ b/tests/unittests/test_url_helper.py @@ -3,7 +3,7 @@ import logging from functools import partial from threading import Event -from time import process_time, sleep +from time import process_time from unittest.mock import call import httpretty From 0579ef09d12de97c5322a10099d3de08fa1377c8 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 6 Apr 2022 11:55:22 -0600 Subject: [PATCH 41/54] pass callback as argument --- cloudinit/url_helper.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index c8f338c30d1..38d71bd49c9 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -522,7 +522,7 @@ def read_url_handle_response(response, url): url_exc = None return (url_exc, reason) - def read_url_handle_exceptions(url_reader_cb, urls, start_time): + def read_url_handle_exceptions(url_reader_cb, urls, start_time, exc_cb): reason = "" url = None try: @@ -545,11 +545,11 @@ def read_url_handle_exceptions(url_reader_cb, urls, start_time): reason, ) status_cb(status_msg) - if exception_cb: + if exc_cb: # This can be used to alter the headers that will be sent # in the future, for example this is what the MAAS datasource # does. - exception_cb(msg=status_msg, exception=url_exc) + exc_cb(msg=status_msg, exception=url_exc) def read_url_cb(url, timeout): return readurl( @@ -561,7 +561,7 @@ def read_url_cb(url, timeout): request_method=request_method, ) - def read_url_serial(start_time, timeout): + def read_url_serial(start_time, timeout, exc_cb): """iterate over list of urls, request each one and handle responses and thrown exceptions individually per url """ @@ -583,12 +583,12 @@ def url_reader_serial(url): timeout = int((start_time + max_wait) - now) out = read_url_handle_exceptions( - url_reader_serial, url, start_time + url_reader_serial, url, start_time, exc_cb ) if out: return out - def read_url_parallel(start_time, timeout): + def read_url_parallel(start_time, timeout, exc_cb): """pass list of urls to dual_stack which sends requests in parallel handle response and exceptions of the first endpoint to respond """ @@ -598,7 +598,9 @@ def read_url_parallel(start_time, timeout): stagger_delay=async_delay, timeout=timeout, ) - out = read_url_handle_exceptions(url_reader_parallel, urls, start_time) + out = read_url_handle_exceptions( + url_reader_parallel, urls, start_time, exc_cb + ) if out: return out @@ -613,7 +615,7 @@ def read_url_parallel(start_time, timeout): while True: sleep_time = sleep_time_cb(response, loop_n) - url = do_read_url(start_time, timeout) + url = do_read_url(start_time, timeout, exception_cb) if url: return url From 17f5777c9a0b43cf13f94799405269633bcb1b25 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 6 Apr 2022 11:57:38 -0600 Subject: [PATCH 42/54] Re-enable previously failing tests. Previously the 403 test verified that the following output was logged: "PUT /latest/api/token HTTP/1.1" 403 0 This log output is from urllib3.connectionpool, and is redundant with the other checked logs, one of which is only logged when code==403. We don't get this log currently when run under dual-stack, so drop it from the test. --- tests/unittests/sources/test_ec2.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/tests/unittests/sources/test_ec2.py b/tests/unittests/sources/test_ec2.py index 458b059d951..9616fce2b4d 100644 --- a/tests/unittests/sources/test_ec2.py +++ b/tests/unittests/sources/test_ec2.py @@ -649,7 +649,7 @@ def test_classic_instance_false(self): self.assertFalse(ds.is_classic_instance()) @responses.activate - def _test_aws_inaccessible_imds_service_fails_with_retries(self): + def test_aws_inaccessible_imds_service_fails_with_retries(self): """Inaccessibility of http://169.254.169.254 are retried.""" ds = self._setup_ds( platform_data=self.valid_platform_data, @@ -675,33 +675,26 @@ def _test_aws_inaccessible_imds_service_fails_with_retries(self): self.assertIn("latest/api/token", readurl_call[0][0]) @responses.activate - def _test_aws_token_403_fails_without_retries(self): + def test_aws_token_403_fails_without_retries(self): """Verify that 403s fetching AWS tokens are not retried.""" ds = self._setup_ds( platform_data=self.valid_platform_data, sys_cfg={"datasource": {"Ec2": {"strict_id": False}}}, md=None, ) + token_url = self.data_url("latest", data_item="api/token") responses.add(responses.PUT, token_url, status=403) self.assertFalse(ds.get_data()) # Just one /latest/api/token request logs = self.logs.getvalue() - failed_put_log = '"PUT /latest/api/token HTTP/1.1" 403 0' expected_logs = [ "WARNING: Ec2 IMDS endpoint returned a 403 error. HTTP endpoint is" " disabled. Aborting.", "WARNING: IMDS's HTTP endpoint is probably disabled", - failed_put_log, ] for log in expected_logs: self.assertIn(log, logs) - self.assertEqual( - 1, - len( - [line for line in logs.splitlines() if failed_put_log in line] - ), - ) @responses.activate def test_aws_token_redacted(self): From 1042fbde2a61157bc32d1f3e3f2c21018c94fc55 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 6 Apr 2022 12:35:40 -0600 Subject: [PATCH 43/54] make logger callback be passed as an argument, comment functions --- cloudinit/url_helper.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 38d71bd49c9..17e7723a6c2 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -447,7 +447,7 @@ def wait_for_url( urls, max_wait=None, timeout=None, - status_cb=LOG.debug, + status_cb=LOG.debug, # some datasources use different log levels headers_cb=None, headers_redact=None, sleep_time=1, @@ -496,11 +496,13 @@ def wait_for_url( """ def timeup(max_wait, start_time): + """Check if time is up based on start time and max wait""" if max_wait is None: return False return (max_wait <= 0) or (time.time() - start_time > max_wait) def read_url_handle_response(response, url): + """Map requests response code/contents to internal "UrlError" type""" if not response.contents: reason = "empty response [%s]" % (response.code) url_exc = UrlError( @@ -522,7 +524,10 @@ def read_url_handle_response(response, url): url_exc = None return (url_exc, reason) - def read_url_handle_exceptions(url_reader_cb, urls, start_time, exc_cb): + def read_url_handle_exceptions( + url_reader_cb, urls, start_time, exc_cb, log_cb + ): + """""" reason = "" url = None try: @@ -533,7 +538,7 @@ def read_url_handle_exceptions(url_reader_cb, urls, start_time, exc_cb): except UrlError as e: reason = "request error [%s]" % e url_exc = e - except Exception as e: + except Exception as e: # yikes, do we really think this is necessary? reason = "unexpected error [%s]" % e url_exc = e time_taken = int(time.time() - start_time) @@ -544,7 +549,7 @@ def read_url_handle_exceptions(url_reader_cb, urls, start_time, exc_cb): max_wait_str, reason, ) - status_cb(status_msg) + log_cb(status_msg) if exc_cb: # This can be used to alter the headers that will be sent # in the future, for example this is what the MAAS datasource @@ -561,7 +566,7 @@ def read_url_cb(url, timeout): request_method=request_method, ) - def read_url_serial(start_time, timeout, exc_cb): + def read_url_serial(start_time, timeout, exc_cb, log_cb): """iterate over list of urls, request each one and handle responses and thrown exceptions individually per url """ @@ -583,12 +588,12 @@ def url_reader_serial(url): timeout = int((start_time + max_wait) - now) out = read_url_handle_exceptions( - url_reader_serial, url, start_time, exc_cb + url_reader_serial, url, start_time, exc_cb, log_cb ) if out: return out - def read_url_parallel(start_time, timeout, exc_cb): + def read_url_parallel(start_time, timeout, exc_cb, log_cb): """pass list of urls to dual_stack which sends requests in parallel handle response and exceptions of the first endpoint to respond """ @@ -599,13 +604,18 @@ def read_url_parallel(start_time, timeout, exc_cb): timeout=timeout, ) out = read_url_handle_exceptions( - url_reader_parallel, urls, start_time, exc_cb + url_reader_parallel, urls, start_time, exc_cb, log_cb ) if out: return out start_time = time.time() + # Dual-stack support factored out serial and parallel execution paths to + # allow the retry loop logic to exist separately from the http calls. + # Serial execution should be fundamentally the same as before, but with a + # layer of indirection so that the parallel dual-stack path may use the + # same max timeout logic. do_read_url = ( read_url_serial if connect_synchronously else read_url_parallel ) @@ -615,7 +625,7 @@ def read_url_parallel(start_time, timeout, exc_cb): while True: sleep_time = sleep_time_cb(response, loop_n) - url = do_read_url(start_time, timeout, exception_cb) + url = do_read_url(start_time, timeout, exception_cb, status_cb) if url: return url From 5d4153956707ae7dd15d7b60e8f6c8f8d63b3585 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Fri, 8 Apr 2022 10:43:09 -0600 Subject: [PATCH 44/54] Reworking dual_stack for more robust connection handling: This allows a ConnectionError exception to be thrown in one of the threads without canceling the other thread - the first successful call is returned, the rest are canceled. If all threads have exceptions, log them all and raise the last one (need choose one somehow, right? I chose the last; tell me if there is a reason we should raise the first in order of execution or first in alphabetical order or whatever) Tests to verify the above described behavior have been added. --- cloudinit/url_helper.py | 125 +++++++++++++++++------------ tests/unittests/test_url_helper.py | 74 +++++++++++------ 2 files changed, 124 insertions(+), 75 deletions(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 17e7723a6c2..003f71d955d 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -12,6 +12,7 @@ import json import os import time +import threading from concurrent.futures import ( ALL_COMPLETED, ThreadPoolExecutor, @@ -372,6 +373,17 @@ def _cb(url): raise excps[-1] +def _run_func_with_delay(func, addr, timeout, event, delay=None): + """Execute func with optional delay + """ + if delay: + + # event returns True iff the flag is set to true: indicating that + # another thread has already completed successfully, no need to try + # again - exit early + if event.wait(timeout=delay): + return + return func(addr, timeout) def dual_stack( func: Callable[..., Any], @@ -382,63 +394,72 @@ def dual_stack( """execute multiple callbacks in parallel Run blocking func against two different addresses staggered with a - delay. The first call to return is returned from this function and - remaining unfinished calls are canceled (or finish asynchronously on - python <3.9 + delay. The first call to return successfully is returned from this + function and remaining unfinished calls are cancelled if they have not + yet started """ return_result = None returned_address = None + last_exception = None + exceptions = [] + is_done = threading.Event() - def _run_func(func, addr, timeout, delay=None): - """Execute func with optional delay""" - if delay: - time.sleep(delay) - return func(addr, timeout) - - executor = ThreadPoolExecutor(max_workers=len(addresses)) - try: - futures = { - executor.submit( - _run_func, - func=func, - addr=addr, - timeout=timeout, - delay=(i * stagger_delay), - ): addr - for i, addr in enumerate(addresses) - } - # handle the first function to complete from the threadpool executor - future = next(as_completed(futures, timeout=timeout)) - wait(futures, return_when=ALL_COMPLETED) + # future work: add cancel_futures to Python stdlib ThreadPoolExecutor + # context manager implementation + # + # for now we don't use this feature since it only supports python >3.8 + # and doesn't provide a context manager and only marginal benefit + with ThreadPoolExecutor(max_workers=len(addresses)) as executor: + try: + futures = { + executor.submit( + _run_func_with_delay, + func=func, + addr=addr, + timeout=timeout, + event=is_done, + delay=(i * stagger_delay), + ): addr + for i, addr in enumerate(addresses) + } + + # handle the first function to complete from the threadpool executor + for future in as_completed(futures, timeout=timeout): + + returned_address = futures[future] + return_exception = future.exception() + if return_exception: + last_exception = return_exception + else: + return_result = future.result() + if return_result: + + # communicate to other threads that they do not need to + # try: this thread has already succeeded + is_done.set() + return (returned_address, return_result) + + # No success, return the last exception but log them all for + # debugging + if last_exception: + LOG.warning( + "Exception(s) %s during request to %s, raising last exception", + ' '.join(exceptions), + returned_address, + ) + raise last_exception + else: + LOG.error("Empty result for address %s", returned_address) + raise ValueError("No result returned") - returned_address = futures[future] - return_exception = future.exception() - if return_exception: + # when max_wait expires, log but don't throw (retries happen) + except TimeoutError: LOG.warning( - "Exception %s during request to %s", - return_exception, - returned_address, - ) - raise return_exception - return_result = future.result() - if not return_result: - LOG.warning("Empty result for address %s", returned_address) - - # when max_wait expires - except TimeoutError: - LOG.warning("Timed out waiting for addresses: %s", " ".join(addresses)) - - # Executor doesn't provide kwargs for setting shutdown behavior - # in the constructor, otherwise the context manager would be preferred - finally: - # python 3.9 allows canceling futures, which may save some cycles - try: - executor.shutdown( # pylint: disable=E1123 - wait=False, cancel_futures=True - ) - except TypeError: # no cancel_futures support - executor.shutdown(wait=False) + "Timed out waiting for addresses: %s, " + "exception(s) raised while waiting: %s", + " ".join(addresses), + " ".join(exceptions)) return (returned_address, return_result) @@ -527,7 +548,7 @@ def read_url_handle_response(response, url): def read_url_handle_exceptions( url_reader_cb, urls, start_time, exc_cb, log_cb ): - """""" + """Execute request, handle response, optionally log exception""" reason = "" url = None try: @@ -627,6 +648,7 @@ def read_url_parallel(start_time, timeout, exc_cb, log_cb): url = do_read_url(start_time, timeout, exception_cb, status_cb) if url: + print(url) return url if timeup(max_wait, start_time): @@ -642,6 +664,7 @@ def read_url_parallel(start_time, timeout, exc_cb, log_cb): # timeout=0.0 causes exceptions in urllib, set to None if zero timeout = int((start_time + max_wait) - time.time()) or None + LOG.warning("No response from url") return False, None diff --git a/tests/unittests/test_url_helper.py b/tests/unittests/test_url_helper.py index ae7050f86af..04fb5b360f6 100644 --- a/tests/unittests/test_url_helper.py +++ b/tests/unittests/test_url_helper.py @@ -257,10 +257,6 @@ def test_perform_retries_on_timeout(self): self.assertTrue(retry_on_url_exc(msg="", exc=myerror)) -def _raise(a): - raise a - - def assert_time(func, max_time=1): """Assert function time is bounded by a max (default=1s) @@ -296,17 +292,51 @@ class TestDualStack: [ # Assert order based on timeout (lambda x, _: x, ("one", "two"), 1, 1, "one", None), + # Assert timeout results in (None, None) (lambda _a, _b: event.wait(1), ("one", "two"), 1, 0, None, None), - # Assert that exception in func is raised + + # Assert that exception in func is raised if all threads + # raise exception + # currently if all threads experience exception + # dual_stack() logs an error containing all exceptions + # but only raises the last exception to occur ( lambda _a, _b: 1 / 0, ("one", "two"), - 1, + 0, 1, None, ZeroDivisionError, ), + + # Verify "best effort behavior" + # dual_stack will temporarily ignore an exception in any of the + # request threads in hopes that a later thread will succeed + # this behavior is intended to allow a requests.ConnectionError + # exception from on endpoint to occur without preventing another + # thread from succeeding + ( + lambda a, _b: 1 / 0 if a == "one" else a, + ("one", "two"), + 0, + 1, + "two", + None, + ), + + # Assert that exception in func is only raised + # if neither thread gets a valid result + ( + lambda a, _b: 1 / 0 if a == "two" else a, + ("one", "two"), + 0, + 1, + "one", + None, + ), + + # simulate a slow response to verify correct order ( lambda x, _: event.wait(1) if x != "two" else x, ("one", "two"), @@ -315,6 +345,8 @@ class TestDualStack: "two", None, ), + + # simulate a slow response to verify correct order ( lambda x, _: event.wait(1) if x != "tri" else x, ("one", "two", "tri"), @@ -323,7 +355,6 @@ class TestDualStack: "tri", None, ), - # TODO: add httpretty tests ], ) def test_dual_stack( @@ -336,6 +367,7 @@ def test_dual_stack( expected_exc, ): """Assert various failure modes behave as expected""" + event.clear() gen = partial( dual_stack, @@ -352,27 +384,21 @@ def test_dual_stack( _, result = assert_time(gen) assert expected_val == result event.set() - event.clear() def test_dual_stack_staggered(self): - """Assert expected sleeps occur""" - event = Event() - with mock.patch("time.sleep", side_effect=event.wait) as sleep_m: + """Assert expected calls occur""" + stagger = 0.1 + with mock.patch(M_PATH + "_run_func_with_delay") as delay_func: dual_stack( - lambda x, _: x, + lambda x, _y: x, ["you", "and", "me", "and", "dog"], - stagger_delay=1, + stagger_delay=stagger, timeout=1, ) - sleep_m.assert_has_calls( - [ - call(1), - call(2), - call(3), - call(4), - ] - ) - event.set() + for delay, call in enumerate(delay_func.call_args_list): + _, kwargs = call + assert stagger * delay == kwargs.get("delay") + assert 5 == delay_func.call_count ADDR1 = "https://addr1/" @@ -416,6 +442,7 @@ def test_order(self, addresses, expected_address_index, response): Subsequent tests will continue execution after the first response is received. """ + self.event.clear() for address in set(addresses): responses.add_callback( responses.GET, @@ -438,7 +465,6 @@ def test_order(self, addresses, expected_address_index, response): async_delay=0.0, ) self.event.set() - self.event.clear() # Test for timeout (no responding endpoint) assert addresses[expected_address_index] == url @@ -448,6 +474,7 @@ def test_order(self, addresses, expected_address_index, response): def test_timeout(self): """If no endpoint responds in time, expect no response""" + self.event.clear() addresses = [SLEEP1, SLEEP2] for address in set(addresses): responses.add_callback( @@ -470,7 +497,6 @@ def test_timeout(self): async_delay=0, ) self.event.set() - self.event.clear() assert not url assert not response_contents From 50cb1db99bdb692572169eb4703b0e8000b52061 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Fri, 8 Apr 2022 12:57:09 -0600 Subject: [PATCH 45/54] Force readurl to hit the retry loop (requires failures > number of configured addresses) --- cloudinit/url_helper.py | 26 +++++++++++--------------- tests/unittests/sources/test_ec2.py | 25 +++++++++++++++++++++++-- tests/unittests/test_url_helper.py | 14 +++++--------- 3 files changed, 39 insertions(+), 26 deletions(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 003f71d955d..e669a7e15f9 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -11,15 +11,9 @@ import copy import json import os -import time import threading -from concurrent.futures import ( - ALL_COMPLETED, - ThreadPoolExecutor, - TimeoutError, - as_completed, - wait, -) +import time +from concurrent.futures import ThreadPoolExecutor, TimeoutError, as_completed from email.utils import parsedate from errno import ENOENT from functools import partial @@ -373,9 +367,9 @@ def _cb(url): raise excps[-1] + def _run_func_with_delay(func, addr, timeout, event, delay=None): - """Execute func with optional delay - """ + """Execute func with optional delay""" if delay: # event returns True iff the flag is set to true: indicating that @@ -385,6 +379,7 @@ def _run_func_with_delay(func, addr, timeout, event, delay=None): return return func(addr, timeout) + def dual_stack( func: Callable[..., Any], addresses: List[str], @@ -404,7 +399,6 @@ def dual_stack( exceptions = [] is_done = threading.Event() - # future work: add cancel_futures to Python stdlib ThreadPoolExecutor # context manager implementation # @@ -424,7 +418,7 @@ def dual_stack( for i, addr in enumerate(addresses) } - # handle the first function to complete from the threadpool executor + # handle returned requests in order of completion for future in as_completed(futures, timeout=timeout): returned_address = futures[future] @@ -444,8 +438,9 @@ def dual_stack( # debugging if last_exception: LOG.warning( - "Exception(s) %s during request to %s, raising last exception", - ' '.join(exceptions), + "Exception(s) %s during request to %s, ", + "raising last exception", + " ".join(exceptions), returned_address, ) raise last_exception @@ -459,7 +454,8 @@ def dual_stack( "Timed out waiting for addresses: %s, " "exception(s) raised while waiting: %s", " ".join(addresses), - " ".join(exceptions)) + " ".join(exceptions), + ) return (returned_address, return_result) diff --git a/tests/unittests/sources/test_ec2.py b/tests/unittests/sources/test_ec2.py index 9616fce2b4d..fbc5ffa6fec 100644 --- a/tests/unittests/sources/test_ec2.py +++ b/tests/unittests/sources/test_ec2.py @@ -665,12 +665,33 @@ def test_aws_inaccessible_imds_service_fails_with_retries(self): mock_success.ok.return_value = True with mock.patch("cloudinit.url_helper.readurl") as m_readurl: - m_readurl.side_effect = (conn_error, conn_error, mock_success) + # yikes, this endpoint needs help + m_readurl.side_effect = ( + conn_error, + conn_error, + conn_error, + conn_error, + conn_error, + conn_error, + conn_error, + conn_error, + conn_error, + conn_error, + conn_error, + conn_error, + conn_error, + conn_error, + conn_error, + conn_error, + conn_error, + conn_error, + mock_success, + ) with mock.patch("cloudinit.url_helper.time.sleep"): self.assertTrue(ds.wait_for_metadata_service()) # Just one /latest/api/token request - self.assertEqual(3, len(m_readurl.call_args_list)) + self.assertEqual(19, len(m_readurl.call_args_list)) for readurl_call in m_readurl.call_args_list: self.assertIn("latest/api/token", readurl_call[0][0]) diff --git a/tests/unittests/test_url_helper.py b/tests/unittests/test_url_helper.py index 04fb5b360f6..3d49e149681 100644 --- a/tests/unittests/test_url_helper.py +++ b/tests/unittests/test_url_helper.py @@ -4,7 +4,6 @@ from functools import partial from threading import Event from time import process_time -from unittest.mock import call import httpretty import pytest @@ -292,10 +291,8 @@ class TestDualStack: [ # Assert order based on timeout (lambda x, _: x, ("one", "two"), 1, 1, "one", None), - # Assert timeout results in (None, None) (lambda _a, _b: event.wait(1), ("one", "two"), 1, 0, None, None), - # Assert that exception in func is raised if all threads # raise exception # currently if all threads experience exception @@ -309,7 +306,6 @@ class TestDualStack: None, ZeroDivisionError, ), - # Verify "best effort behavior" # dual_stack will temporarily ignore an exception in any of the # request threads in hopes that a later thread will succeed @@ -324,7 +320,6 @@ class TestDualStack: "two", None, ), - # Assert that exception in func is only raised # if neither thread gets a valid result ( @@ -335,7 +330,6 @@ class TestDualStack: "one", None, ), - # simulate a slow response to verify correct order ( lambda x, _: event.wait(1) if x != "two" else x, @@ -345,7 +339,6 @@ class TestDualStack: "two", None, ), - # simulate a slow response to verify correct order ( lambda x, _: event.wait(1) if x != "tri" else x, @@ -395,8 +388,11 @@ def test_dual_stack_staggered(self): stagger_delay=stagger, timeout=1, ) - for delay, call in enumerate(delay_func.call_args_list): - _, kwargs = call + + # ensure that stagger delay for each subsequent call is: + # [ 0 * N, 1 * N, 2 * N, 3 * N, 4 * N, 5 * N] where N = stagger + for delay, call_item in enumerate(delay_func.call_args_list): + _, kwargs = call_item assert stagger * delay == kwargs.get("delay") assert 5 == delay_func.call_count From 89638f4ac4f3f48411ce58c1a3d5eef3b407647f Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Fri, 8 Apr 2022 13:59:20 -0600 Subject: [PATCH 46/54] 8bit diff --- cloudinit/url_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index e669a7e15f9..289726a34e7 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -438,7 +438,7 @@ def dual_stack( # debugging if last_exception: LOG.warning( - "Exception(s) %s during request to %s, ", + "Exception(s) %s during request to %s, " "raising last exception", " ".join(exceptions), returned_address, From e8c9f67b4ea3fd09d0284286b4cc95ba0792ddb2 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Fri, 8 Apr 2022 15:29:29 -0600 Subject: [PATCH 47/54] Make the pythons happy: - type hints - make sure response gets sent to optional time callback, because that's what it used to do - don't assign lamda functions to kwargs, who does that --- cloudinit/url_helper.py | 51 ++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 289726a34e7..f68c5d2b58e 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -195,7 +195,7 @@ def readurl( session=None, infinite=False, log_req_resp=True, - request_method=None, + request_method="", ) -> UrlResponse: """Wrapper around requests.Session to read the url and retry if necessary @@ -368,7 +368,13 @@ def _cb(url): raise excps[-1] -def _run_func_with_delay(func, addr, timeout, event, delay=None): +def _run_func_with_delay( + func: Callable[..., Any], + addr: str, + timeout: int, + event: threading.Event, + delay: float = None, +) -> Any: """Execute func with optional delay""" if delay: @@ -462,17 +468,17 @@ def dual_stack( def wait_for_url( urls, - max_wait=None, - timeout=None, - status_cb=LOG.debug, # some datasources use different log levels - headers_cb=None, - headers_redact=None, - sleep_time=1, - exception_cb=None, - sleep_time_cb=lambda _, loop_number: int(loop_number / 5) + 1, - request_method=None, - connect_synchronously=True, - async_delay=0.150, + max_wait = None, + timeout = None, + status_cb: Callable = LOG.debug, # some sources use different log levels + headers_cb: Callable = None, + headers_redact = None, + sleep_time: int = 1, + exception_cb: Callable = None, + sleep_time_cb: Callable = None, + request_method: str = "", + connect_synchronously: bool = True, + async_delay: float = 0.150, ): """ urls: a list of urls to try @@ -512,6 +518,9 @@ def wait_for_url( A value of None for max_wait will retry indefinitely. """ + def default_sleep_time(_, loop_number: int): + return int(loop_number / 5) + 1 + def timeup(max_wait, start_time): """Check if time is up based on start time and max wait""" if max_wait is None: @@ -551,7 +560,7 @@ def read_url_handle_exceptions( url, response = url_reader_cb(urls) url_exc, reason = read_url_handle_response(response, url) if not url_exc: - return (url, response.contents) + return (url, response) except UrlError as e: reason = "request error [%s]" % e url_exc = e @@ -637,15 +646,19 @@ def read_url_parallel(start_time, timeout, exc_cb, log_cb): read_url_serial if connect_synchronously else read_url_parallel ) - loop_n = 0 + calculate_sleep_time = ( + default_sleep_time if not sleep_time_cb else sleep_time_cb + ) + + loop_n: int = 0 response = None while True: - sleep_time = sleep_time_cb(response, loop_n) + sleep_time = calculate_sleep_time(response, loop_n) url = do_read_url(start_time, timeout, exception_cb, status_cb) if url: - print(url) - return url + address, response = url + return (address, response.contents) if timeup(max_wait, start_time): break @@ -660,7 +673,7 @@ def read_url_parallel(start_time, timeout, exc_cb, log_cb): # timeout=0.0 causes exceptions in urllib, set to None if zero timeout = int((start_time + max_wait) - time.time()) or None - LOG.warning("No response from url") + LOG.error(f"Timed out, no response from urls: {urls}") return False, None From 60e8d07170650b8a500572e76200b1c9f46d37b8 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Fri, 8 Apr 2022 15:49:53 -0600 Subject: [PATCH 48/54] fmt --- cloudinit/url_helper.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index f68c5d2b58e..020878f50fc 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -468,11 +468,11 @@ def dual_stack( def wait_for_url( urls, - max_wait = None, - timeout = None, + max_wait=None, + timeout=None, status_cb: Callable = LOG.debug, # some sources use different log levels headers_cb: Callable = None, - headers_redact = None, + headers_redact=None, sleep_time: int = 1, exception_cb: Callable = None, sleep_time_cb: Callable = None, @@ -647,7 +647,7 @@ def read_url_parallel(start_time, timeout, exc_cb, log_cb): ) calculate_sleep_time = ( - default_sleep_time if not sleep_time_cb else sleep_time_cb + default_sleep_time if not sleep_time_cb else sleep_time_cb ) loop_n: int = 0 From d99febf18de8b0e6795bcc4792a1dee71b227bbe Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Fri, 8 Apr 2022 16:08:48 -0600 Subject: [PATCH 49/54] lazy logging per pylint --- cloudinit/url_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index 020878f50fc..c9989ba296c 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -673,7 +673,7 @@ def read_url_parallel(start_time, timeout, exc_cb, log_cb): # timeout=0.0 causes exceptions in urllib, set to None if zero timeout = int((start_time + max_wait) - time.time()) or None - LOG.error(f"Timed out, no response from urls: {urls}") + LOG.error("Timed out, no response from urls: %s", urls) return False, None From ead44e72a638a15feb0266b190a3839802aee654 Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Mon, 11 Apr 2022 15:25:15 -0600 Subject: [PATCH 50/54] review fixes --- cloudinit/url_helper.py | 24 +++--------------------- tests/unittests/sources/test_ec2.py | 4 ---- 2 files changed, 3 insertions(+), 25 deletions(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index c9989ba296c..eeef484839d 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -30,11 +30,6 @@ LOG = logging.getLogger(__name__) - -# Check if requests has ssl support (added in requests >= 0.8.8) -SSL_ENABLED = False -CONFIG_ENABLED = False # This was added in 0.7 (but taken out in >=1.0) -_REQ_VER = None REDACTED = "REDACTED" @@ -244,19 +239,6 @@ def readurl( req_args["timeout"] = max(float(timeout), 0) if headers_redact is None: headers_redact = [] - # It doesn't seem like config - # was added in older library versions (or newer ones either), thus we - # need to manually do the retries if it wasn't... - if CONFIG_ENABLED: - req_config = { - "store_cookies": False, - } - # Don't use the retry support built-in - # since it doesn't allow for 'sleep_times' - # in between tries.... - # if retries: - # req_config['max_retries'] = max(int(retries), 0) - req_args["config"] = req_config manual_tries = 1 if retries: manual_tries = max(int(retries) + 1, 1) @@ -527,7 +509,7 @@ def timeup(max_wait, start_time): return False return (max_wait <= 0) or (time.time() - start_time > max_wait) - def read_url_handle_response(response, url): + def handle_url_response(response, url): """Map requests response code/contents to internal "UrlError" type""" if not response.contents: reason = "empty response [%s]" % (response.code) @@ -558,13 +540,13 @@ def read_url_handle_exceptions( url = None try: url, response = url_reader_cb(urls) - url_exc, reason = read_url_handle_response(response, url) + url_exc, reason = handle_url_response(response, url) if not url_exc: return (url, response) except UrlError as e: reason = "request error [%s]" % e url_exc = e - except Exception as e: # yikes, do we really think this is necessary? + except Exception as e: reason = "unexpected error [%s]" % e url_exc = e time_taken = int(time.time() - start_time) diff --git a/tests/unittests/sources/test_ec2.py b/tests/unittests/sources/test_ec2.py index fbc5ffa6fec..62186004cd5 100644 --- a/tests/unittests/sources/test_ec2.py +++ b/tests/unittests/sources/test_ec2.py @@ -303,10 +303,6 @@ def register_helper(register, base_url, body): def myreg(*argc, **kwargs): url, body = argc method = responses.PUT if ec2.API_TOKEN_ROUTE in url else responses.GET - print( - f"registering: len= {len(argc)}\n" - f"{argc[0]}\n{argc[1:]}\nkwargs: {kwargs}" - ) status = kwargs.get("status", 200) return responses.add(method, url, body, status=status) From 79f260fd7ae8897118bc104171d45ad6a909ee26 Mon Sep 17 00:00:00 2001 From: James Falcon Date: Wed, 13 Apr 2022 10:33:21 -0500 Subject: [PATCH 51/54] integration test --- tests/integration_tests/clouds.py | 15 +++++-- .../integration_tests/datasources/test_ec2.py | 43 +++++++++++++++++++ 2 files changed, 55 insertions(+), 3 deletions(-) create mode 100644 tests/integration_tests/datasources/test_ec2.py diff --git a/tests/integration_tests/clouds.py b/tests/integration_tests/clouds.py index e5fe56d1565..0e2e1deb25c 100644 --- a/tests/integration_tests/clouds.py +++ b/tests/integration_tests/clouds.py @@ -207,9 +207,18 @@ def _get_cloud_instance(self): def _perform_launch(self, launch_kwargs, **kwargs): """Use a dual-stack VPC for cloud-init integration testing.""" - launch_kwargs["vpc"] = self.cloud_instance.get_or_create_vpc( - name="ec2-cloud-init-integration" - ) + if "vpc" not in launch_kwargs: + launch_kwargs["vpc"] = self.cloud_instance.get_or_create_vpc( + name="ec2-cloud-init-integration" + ) + # Enable IPv6 metadata at http://[fd00:ec2::254] + if "Ipv6AddressCount" not in launch_kwargs: + launch_kwargs["Ipv6AddressCount"] = 1 + if "MetadataOptions" not in launch_kwargs: + launch_kwargs["MetadataOptions"] = {} + if "HttpProtocolIpv6" not in launch_kwargs["MetadataOptions"]: + launch_kwargs["MetadataOptions"] = {"HttpProtocolIpv6": "enabled"} + pycloudlib_instance = self.cloud_instance.launch(**launch_kwargs) return pycloudlib_instance diff --git a/tests/integration_tests/datasources/test_ec2.py b/tests/integration_tests/datasources/test_ec2.py new file mode 100644 index 00000000000..8cde4dc9e08 --- /dev/null +++ b/tests/integration_tests/datasources/test_ec2.py @@ -0,0 +1,43 @@ +import re + +import pytest + +from tests.integration_tests.instances import IntegrationInstance + + +def _test_crawl(client, ip): + assert client.execute("cloud-init clean --logs").ok + assert client.execute("cloud-init init --local").ok + log = client.read_from_file("/var/log/cloud-init.log") + assert f"Using metadata source: '{ip}'" in log + result = re.findall( + r"Crawl of metadata service took (\d+.\d+) seconds", log + ) + if len(result) != 1: + pytest.fail(f"Expected 1 metadata crawl time, got {result}") + # 20 would still be a crazy long time for metadata service to crawl, + # but it's short enough to know we're not waiting for a response + assert float(result[0]) < 20 + + +@pytest.mark.ec2 +def test_dual_stack(client: IntegrationInstance): + # Drop IPv4 responses + assert client.execute("iptables -I INPUT -s 169.254.169.254 -j DROP").ok + _test_crawl(client, "http://[fd00:ec2::254]") + + # Block IPv4 requests + assert client.execute("iptables -I OUTPUT -d 169.254.169.254 -j REJECT").ok + _test_crawl(client, "http://[fd00:ec2::254]") + + # Re-enable IPv4 + assert client.execute("iptables -D OUTPUT -d 169.254.169.254 -j REJECT").ok + assert client.execute("iptables -D INPUT -s 169.254.169.254 -j DROP").ok + + # Drop IPv6 responses + assert client.execute("ip6tables -I INPUT -s fd00:ec2::254 -j DROP").ok + _test_crawl(client, "http://169.254.169.254") + + # Block IPv6 requests + assert client.execute("ip6tables -I OUTPUT -d fd00:ec2::254 -j REJECT").ok + _test_crawl(client, "http://169.254.169.254") From 8b03d30ecb951792ddd0d87ff6811f52766b71b1 Mon Sep 17 00:00:00 2001 From: James Falcon Date: Wed, 13 Apr 2022 11:19:48 -0500 Subject: [PATCH 52/54] bump pycloudlib --- integration-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration-requirements.txt b/integration-requirements.txt index 8329eeecefb..ad41a82946c 100644 --- a/integration-requirements.txt +++ b/integration-requirements.txt @@ -1,5 +1,5 @@ # PyPI requirements for cloud-init integration testing # https://cloudinit.readthedocs.io/en/latest/topics/integration_tests.html # -pycloudlib @ git+https://github.com/canonical/pycloudlib.git@44206bb95c49901d994c9eb772eba07f2a1b6661 +pycloudlib @ git+https://github.com/canonical/pycloudlib.git@a2fde24361eeb6ad96db2a1ccb5cd70c7d76aa7f pytest From a5fa33689e5302b9e4c254596314c759868485bb Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 13 Apr 2022 15:11:04 -0600 Subject: [PATCH 53/54] do not wait for timeout --- cloudinit/url_helper.py | 96 +++++++++++++++--------------- tests/unittests/test_url_helper.py | 5 +- 2 files changed, 52 insertions(+), 49 deletions(-) diff --git a/cloudinit/url_helper.py b/cloudinit/url_helper.py index eeef484839d..ebb6bc4a0ef 100644 --- a/cloudinit/url_helper.py +++ b/cloudinit/url_helper.py @@ -392,58 +392,60 @@ def dual_stack( # # for now we don't use this feature since it only supports python >3.8 # and doesn't provide a context manager and only marginal benefit - with ThreadPoolExecutor(max_workers=len(addresses)) as executor: - try: - futures = { - executor.submit( - _run_func_with_delay, - func=func, - addr=addr, - timeout=timeout, - event=is_done, - delay=(i * stagger_delay), - ): addr - for i, addr in enumerate(addresses) - } - - # handle returned requests in order of completion - for future in as_completed(futures, timeout=timeout): - - returned_address = futures[future] - return_exception = future.exception() - if return_exception: - last_exception = return_exception - else: - return_result = future.result() - if return_result: - - # communicate to other threads that they do not need to - # try: this thread has already succeeded - is_done.set() - return (returned_address, return_result) - - # No success, return the last exception but log them all for - # debugging - if last_exception: - LOG.warning( - "Exception(s) %s during request to %s, " - "raising last exception", - " ".join(exceptions), - returned_address, - ) - raise last_exception + executor = ThreadPoolExecutor(max_workers=len(addresses)) + try: + futures = { + executor.submit( + _run_func_with_delay, + func=func, + addr=addr, + timeout=timeout, + event=is_done, + delay=(i * stagger_delay), + ): addr + for i, addr in enumerate(addresses) + } + + # handle returned requests in order of completion + for future in as_completed(futures, timeout=timeout): + + returned_address = futures[future] + return_exception = future.exception() + if return_exception: + last_exception = return_exception else: - LOG.error("Empty result for address %s", returned_address) - raise ValueError("No result returned") + return_result = future.result() + if return_result: - # when max_wait expires, log but don't throw (retries happen) - except TimeoutError: + # communicate to other threads that they do not need to + # try: this thread has already succeeded + is_done.set() + return (returned_address, return_result) + + # No success, return the last exception but log them all for + # debugging + if last_exception: LOG.warning( - "Timed out waiting for addresses: %s, " - "exception(s) raised while waiting: %s", - " ".join(addresses), + "Exception(s) %s during request to %s, " + "raising last exception", " ".join(exceptions), + returned_address, ) + raise last_exception + else: + LOG.error("Empty result for address %s", returned_address) + raise ValueError("No result returned") + + # when max_wait expires, log but don't throw (retries happen) + except TimeoutError: + LOG.warning( + "Timed out waiting for addresses: %s, " + "exception(s) raised while waiting: %s", + " ".join(addresses), + " ".join(exceptions), + ) + finally: + executor.shutdown(wait=False) return (returned_address, return_result) diff --git a/tests/unittests/test_url_helper.py b/tests/unittests/test_url_helper.py index 3d49e149681..5e09221938c 100644 --- a/tests/unittests/test_url_helper.py +++ b/tests/unittests/test_url_helper.py @@ -379,7 +379,7 @@ def test_dual_stack( event.set() def test_dual_stack_staggered(self): - """Assert expected calls occur""" + """Assert expected call intervals occur""" stagger = 0.1 with mock.patch(M_PATH + "_run_func_with_delay") as delay_func: dual_stack( @@ -391,10 +391,11 @@ def test_dual_stack_staggered(self): # ensure that stagger delay for each subsequent call is: # [ 0 * N, 1 * N, 2 * N, 3 * N, 4 * N, 5 * N] where N = stagger + # it appears that without an explicit wait/join we can't assert + # number of calls for delay, call_item in enumerate(delay_func.call_args_list): _, kwargs = call_item assert stagger * delay == kwargs.get("delay") - assert 5 == delay_func.call_count ADDR1 = "https://addr1/" From 58e8cf3bf1ecaa2137795d25fe1106e47d24fc3c Mon Sep 17 00:00:00 2001 From: Brett Holman Date: Wed, 13 Apr 2022 16:34:38 -0600 Subject: [PATCH 54/54] undo test change --- tests/unittests/sources/test_ec2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unittests/sources/test_ec2.py b/tests/unittests/sources/test_ec2.py index 62186004cd5..a192bd0488f 100644 --- a/tests/unittests/sources/test_ec2.py +++ b/tests/unittests/sources/test_ec2.py @@ -728,8 +728,8 @@ def test_aws_token_redacted(self): logs_with_redacted_ttl = [log for log in all_logs if REDACT_TTL in log] logs_with_redacted = [log for log in all_logs if REDACT_TOK in log] logs_with_token = [log for log in all_logs if "API-TOKEN" in log] - assert len(logs_with_redacted_ttl) - assert 80 < len(logs_with_redacted) + self.assertEqual(1, len(logs_with_redacted_ttl)) + self.assertEqual(83, len(logs_with_redacted)) self.assertEqual(0, len(logs_with_token)) @responses.activate