From 52c4bb42f8f03cea0cbb4a7673c1b46a25047e69 Mon Sep 17 00:00:00 2001 From: Sebastian Rittau Date: Fri, 11 Apr 2025 13:08:47 +0200 Subject: [PATCH 1/5] Simplify and fix `urllib.parse.urlencode()` Remove overloads and type vars. Introduce a protocol for the `quote_via` argument. This means that the interface accepted by the supplied `quote_via` is stricter, and is not dependent on the actual supplied types in the `query` argument, but must work with all possible query types. Closes: #4234 --- .../@tests/test_cases/urllib/check_parse.py | 10 +++++ stdlib/urllib/parse.pyi | 44 ++++++++----------- 2 files changed, 28 insertions(+), 26 deletions(-) create mode 100644 stdlib/@tests/test_cases/urllib/check_parse.py diff --git a/stdlib/@tests/test_cases/urllib/check_parse.py b/stdlib/@tests/test_cases/urllib/check_parse.py new file mode 100644 index 000000000000..fbb35d8bd3ae --- /dev/null +++ b/stdlib/@tests/test_cases/urllib/check_parse.py @@ -0,0 +1,10 @@ +from urllib.parse import quote, quote_plus, urlencode + +urlencode({"a": "b"}, quote_via=quote) +urlencode({b"a": b"b"}, quote_via=quote) +urlencode({"a": b"b"}, quote_via=quote) +urlencode({b"a": "b"}, quote_via=quote) +mixed_dict: dict[str | bytes, str | bytes] = {} +urlencode(mixed_dict, quote_via=quote) + +urlencode({"a": "b"}, quote_via=quote_plus) diff --git a/stdlib/urllib/parse.pyi b/stdlib/urllib/parse.pyi index f2fae0c3d402..88f357b53d50 100644 --- a/stdlib/urllib/parse.pyi +++ b/stdlib/urllib/parse.pyi @@ -1,7 +1,7 @@ import sys -from collections.abc import Callable, Iterable, Mapping, Sequence +from collections.abc import Iterable, Mapping, Sequence from types import GenericAlias -from typing import Any, AnyStr, Generic, Literal, NamedTuple, TypeVar, overload +from typing import Any, AnyStr, Generic, Literal, NamedTuple, Protocol, overload from typing_extensions import TypeAlias __all__ = [ @@ -132,38 +132,30 @@ def urldefrag(url: str) -> DefragResult: ... @overload def urldefrag(url: bytes | bytearray | None) -> DefragResultBytes: ... -_Q = TypeVar("_Q", bound=str | Iterable[int]) _QueryType: TypeAlias = ( - Mapping[Any, Any] | Mapping[Any, Sequence[Any]] | Sequence[tuple[Any, Any]] | Sequence[tuple[Any, Sequence[Any]]] + Mapping[str, str | bytes] + | Mapping[bytes, str | bytes] + | Mapping[str | bytes, str | bytes] + | Mapping[str, Sequence[str | bytes]] + | Mapping[bytes, Sequence[str | bytes]] + | Mapping[str | bytes, Sequence[str | bytes]] + | Sequence[tuple[str | bytes, str | bytes]] + | Sequence[tuple[str | bytes, Sequence[str | bytes]]] ) -@overload -def urlencode( - query: _QueryType, - doseq: bool = False, - safe: str = "", - encoding: str | None = None, - errors: str | None = None, - quote_via: Callable[[AnyStr, str, str, str], str] = ..., -) -> str: ... -@overload -def urlencode( - query: _QueryType, - doseq: bool, - safe: _Q, - encoding: str | None = None, - errors: str | None = None, - quote_via: Callable[[AnyStr, _Q, str, str], str] = ..., -) -> str: ... -@overload +class _QuoteVia(Protocol): + @overload + def __call__(self, string: str, safe: str | bytes, encoding: str, errors: str) -> str: ... + @overload + def __call__(self, string: bytes, safe: str | bytes) -> str: ... + def urlencode( query: _QueryType, doseq: bool = False, - *, - safe: _Q, + safe: str | bytes = "", encoding: str | None = None, errors: str | None = None, - quote_via: Callable[[AnyStr, _Q, str, str], str] = ..., + quote_via: _QuoteVia = ..., ) -> str: ... def urljoin(base: AnyStr, url: AnyStr | None, allow_fragments: bool = True) -> AnyStr: ... @overload From f2767b6870d187e4674f536248f4dec23301cc96 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 11 Apr 2025 11:10:38 +0000 Subject: [PATCH 2/5] [pre-commit.ci] auto fixes from pre-commit.com hooks --- stdlib/@tests/test_cases/urllib/check_parse.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/stdlib/@tests/test_cases/urllib/check_parse.py b/stdlib/@tests/test_cases/urllib/check_parse.py index fbb35d8bd3ae..f464f6341fdc 100644 --- a/stdlib/@tests/test_cases/urllib/check_parse.py +++ b/stdlib/@tests/test_cases/urllib/check_parse.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from urllib.parse import quote, quote_plus, urlencode urlencode({"a": "b"}, quote_via=quote) From b59bc51128402fe8dc177786c114092e8b6b152d Mon Sep 17 00:00:00 2001 From: Sebastian Rittau Date: Fri, 11 Apr 2025 13:34:17 +0200 Subject: [PATCH 3/5] Change values from str | bytes to object --- stdlib/urllib/parse.pyi | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/stdlib/urllib/parse.pyi b/stdlib/urllib/parse.pyi index 88f357b53d50..11ffd5a257eb 100644 --- a/stdlib/urllib/parse.pyi +++ b/stdlib/urllib/parse.pyi @@ -132,15 +132,16 @@ def urldefrag(url: str) -> DefragResult: ... @overload def urldefrag(url: bytes | bytearray | None) -> DefragResultBytes: ... +# The values are passed to `str()` (unless they are bytes), so anything is valid. _QueryType: TypeAlias = ( - Mapping[str, str | bytes] - | Mapping[bytes, str | bytes] - | Mapping[str | bytes, str | bytes] - | Mapping[str, Sequence[str | bytes]] - | Mapping[bytes, Sequence[str | bytes]] - | Mapping[str | bytes, Sequence[str | bytes]] - | Sequence[tuple[str | bytes, str | bytes]] - | Sequence[tuple[str | bytes, Sequence[str | bytes]]] + Mapping[str, object] + | Mapping[bytes, object] + | Mapping[str | bytes, object] + | Mapping[str, Sequence[object]] + | Mapping[bytes, Sequence[object]] + | Mapping[str | bytes, Sequence[object]] + | Sequence[tuple[str | bytes, object]] + | Sequence[tuple[str | bytes, Sequence[object]]] ) class _QuoteVia(Protocol): From e36eabd5b416608471f30b107f9045947a318dea Mon Sep 17 00:00:00 2001 From: Sebastian Rittau Date: Fri, 11 Apr 2025 13:51:52 +0200 Subject: [PATCH 4/5] Use @type_check_only --- stdlib/urllib/parse.pyi | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/stdlib/urllib/parse.pyi b/stdlib/urllib/parse.pyi index 11ffd5a257eb..03a7946e9439 100644 --- a/stdlib/urllib/parse.pyi +++ b/stdlib/urllib/parse.pyi @@ -1,7 +1,7 @@ import sys from collections.abc import Iterable, Mapping, Sequence from types import GenericAlias -from typing import Any, AnyStr, Generic, Literal, NamedTuple, Protocol, overload +from typing import Any, AnyStr, Generic, Literal, NamedTuple, Protocol, overload, type_check_only from typing_extensions import TypeAlias __all__ = [ @@ -132,7 +132,7 @@ def urldefrag(url: str) -> DefragResult: ... @overload def urldefrag(url: bytes | bytearray | None) -> DefragResultBytes: ... -# The values are passed to `str()` (unless they are bytes), so anything is valid. +# The values are passed through `str()` (unless they are bytes), so anything is valid. _QueryType: TypeAlias = ( Mapping[str, object] | Mapping[bytes, object] @@ -144,6 +144,7 @@ _QueryType: TypeAlias = ( | Sequence[tuple[str | bytes, Sequence[object]]] ) +@type_check_only class _QuoteVia(Protocol): @overload def __call__(self, string: str, safe: str | bytes, encoding: str, errors: str) -> str: ... From 685cd50371747be68f54fdcf14d06942cb0a2fb6 Mon Sep 17 00:00:00 2001 From: Sebastian Rittau Date: Mon, 14 Apr 2025 10:57:47 +0200 Subject: [PATCH 5/5] Make protocol args pos-only --- stdlib/urllib/parse.pyi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stdlib/urllib/parse.pyi b/stdlib/urllib/parse.pyi index 03a7946e9439..a5ed616d25af 100644 --- a/stdlib/urllib/parse.pyi +++ b/stdlib/urllib/parse.pyi @@ -147,9 +147,9 @@ _QueryType: TypeAlias = ( @type_check_only class _QuoteVia(Protocol): @overload - def __call__(self, string: str, safe: str | bytes, encoding: str, errors: str) -> str: ... + def __call__(self, string: str, safe: str | bytes, encoding: str, errors: str, /) -> str: ... @overload - def __call__(self, string: bytes, safe: str | bytes) -> str: ... + def __call__(self, string: bytes, safe: str | bytes, /) -> str: ... def urlencode( query: _QueryType,