diff --git a/httpx/utils.py b/httpx/utils.py index c16341c5f9..870c817e3e 100644 --- a/httpx/utils.py +++ b/httpx/utils.py @@ -14,6 +14,7 @@ if typing.TYPE_CHECKING: # pragma: no cover from .models import PrimitiveData + from .models import URL def normalize_header_key(value: typing.AnyStr, encoding: str = None) -> bytes: @@ -212,6 +213,28 @@ def kv_format(**kwargs: typing.Any) -> str: return " ".join(f"{key}={value!r}" for key, value in kwargs.items()) +def should_not_be_proxied(url: "URL") -> bool: + """ Return True if url should not be proxied, + return False otherwise. + """ + no_proxy = getproxies().get("no") + if not no_proxy: + return False + no_proxy_list = [host.strip() for host in no_proxy.split(",")] + for name in no_proxy_list: + if name == "*": + return True + if name: + name = name.lstrip(".") # ignore leading dots + name = re.escape(name) + pattern = r"(.+\.)?%s$" % name + if re.match(pattern, url.host, re.I) or re.match( + pattern, url.authority, re.I + ): + return True + return False + + def get_environment_proxies() -> typing.Dict[str, str]: """Gets proxy information from the environment""" diff --git a/tests/test_utils.py b/tests/test_utils.py index 800928c993..4ca7acb3f7 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -14,6 +14,7 @@ guess_json_utf, obfuscate_sensitive_headers, parse_header_links, + should_not_be_proxied, ) @@ -201,3 +202,79 @@ def test_obfuscate_sensitive_headers(headers, output): bytes_output = [(k.encode(), v.encode()) for k, v in output] assert list(obfuscate_sensitive_headers(headers)) == output assert list(obfuscate_sensitive_headers(bytes_headers)) == bytes_output + + +@pytest.mark.parametrize( + ["url", "no_proxy", "expected"], + [ + ( + "http://127.0.0.1", + {"NO_PROXY": ""}, + False, + ), # everything proxied when no_proxy is empty/unset + ( + "http://127.0.0.1", + {"NO_PROXY": "127.0.0.1"}, + True, + ), # no_proxy as ip case is matched + ( + "http://127.0.0.1", + {"NO_PROXY": "https://127.0.0.1"}, + False, + ), # no_proxy with scheme is ignored + ( + "http://127.0.0.1", + {"NO_PROXY": "1.1.1.1"}, + False, + ), # different no_proxy means its proxied + ( + "http://courses.mit.edu", + {"NO_PROXY": "mit.edu"}, + True, + ), # no_proxy for sub-domain matches + ( + "https://mit.edu.info", + {"NO_PROXY": "mit.edu"}, + False, + ), # domain is actually edu.info, so should be proxied + ( + "https://mit.edu.info", + {"NO_PROXY": "mit.edu,edu.info"}, + True, + ), # list in no_proxy, matches second domain + ( + "https://mit.edu.info", + {"NO_PROXY": "mit.edu, edu.info"}, + True, + ), # list with spaces in no_proxy + ( + "https://mit.edu.info", + {"NO_PROXY": "mit.edu,mit.info"}, + False, + ), # list in no_proxy, without any domain matching + ( + "https://foo.example.com", + {"NO_PROXY": "www.example.com"}, + False, + ), # different subdomains foo vs www means we still proxy + ( + "https://www.example1.com", + {"NO_PROXY": ".example1.com"}, + True, + ), # no_proxy starting with dot + ( + "https://www.example2.com", + {"NO_PROXY": "ample2.com"}, + False, + ), # whole-domain matching + ( + "https://www.example3.com", + {"NO_PROXY": "*"}, + True, + ), # wildcard * means nothing proxied + ], +) +def test_should_not_be_proxied(url, no_proxy, expected): + os.environ.update(no_proxy) + parsed_url = httpx.models.URL(url) + assert should_not_be_proxied(parsed_url) == expected