From aa456789f550891cb1f9492201a20173b4ba78d1 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 14 Mar 2023 12:52:12 +0000 Subject: [PATCH 1/7] Rename `_default` and add docstring --- src/canonicaljson/__init__.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/canonicaljson/__init__.py b/src/canonicaljson/__init__.py index 2e33b66..69d0e3e 100644 --- a/src/canonicaljson/__init__.py +++ b/src/canonicaljson/__init__.py @@ -31,7 +31,11 @@ __version__ = "1.6.5" -def _default(obj: object) -> object: # pragma: no cover +def _preprocess_for_serialisation(obj: object) -> object: # pragma: no cover + """Transform an `obj` into something the JSON library knows how to encode. + + This is only called for types that the JSON library does not recognise. + """ if type(obj) is frozendict_type: # If frozendict is available and used, cast `obj` into a dict return dict(obj) # type: ignore[call-overload] @@ -77,7 +81,7 @@ def set_json_library(json_lib: JsonLibrary) -> None: allow_nan=False, separators=(",", ":"), sort_keys=True, - default=_default, + default=_preprocess_for_serialisation, ) global _pretty_encoder @@ -86,7 +90,7 @@ def set_json_library(json_lib: JsonLibrary) -> None: allow_nan=False, indent=4, sort_keys=True, - default=_default, + default=_preprocess_for_serialisation, ) From 443a3b365461bc660bd003ad26793e38d45411df Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 14 Mar 2023 13:29:35 +0000 Subject: [PATCH 2/7] Remove frozendict support --- README.rst | 1 - setup.cfg | 6 ------ src/canonicaljson/__init__.py | 10 +--------- tests/test_canonicaljson.py | 17 ----------------- tox.ini | 1 - 5 files changed, 1 insertion(+), 34 deletions(-) diff --git a/README.rst b/README.rst index af0c212..2190056 100644 --- a/README.rst +++ b/README.rst @@ -15,7 +15,6 @@ Features U+0056, to keep the output as small as possible. * Uses the shortest escape sequence for each escaped character. * Encodes the JSON as UTF-8. -* Can encode ``frozendict`` immutable dictionaries. Supports Python versions 3.7 and newer. diff --git a/setup.cfg b/setup.cfg index 4b707de..60417f4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -34,12 +34,6 @@ install_requires = typing_extensions>=4.0.0; python_version < '3.8' -[options.extras_require] -# frozendict support can be enabled using the `canonicaljson[frozendict]` syntax -frozendict = - frozendict>=1.0 - - [options.package_data] canonicaljson = py.typed diff --git a/src/canonicaljson/__init__.py b/src/canonicaljson/__init__.py index 69d0e3e..567bca5 100644 --- a/src/canonicaljson/__init__.py +++ b/src/canonicaljson/__init__.py @@ -15,18 +15,13 @@ # limitations under the License. import platform -from typing import Any, Generator, Iterator, Optional, Type +from typing import Any, Generator, Iterator, Type try: from typing import Protocol except ImportError: # pragma: no cover from typing_extensions import Protocol # type: ignore[assignment] -frozendict_type: Optional[Type[Any]] -try: - from frozendict import frozendict as frozendict_type -except ImportError: - frozendict_type = None # pragma: no cover __version__ = "1.6.5" @@ -36,9 +31,6 @@ def _preprocess_for_serialisation(obj: object) -> object: # pragma: no cover This is only called for types that the JSON library does not recognise. """ - if type(obj) is frozendict_type: - # If frozendict is available and used, cast `obj` into a dict - return dict(obj) # type: ignore[call-overload] raise TypeError( "Object of type %s is not JSON serializable" % obj.__class__.__name__ ) diff --git a/tests/test_canonicaljson.py b/tests/test_canonicaljson.py index f1fac9a..eab4afe 100644 --- a/tests/test_canonicaljson.py +++ b/tests/test_canonicaljson.py @@ -19,7 +19,6 @@ from canonicaljson import ( encode_canonical_json, encode_pretty_printed_json, - frozendict_type, iterencode_canonical_json, iterencode_pretty_printed_json, set_json_library, @@ -107,22 +106,6 @@ def test_encode_pretty_printed(self) -> None: b'{\n "la merde amus\xc3\xa9e": "\xF0\x9F\x92\xA9"\n}', ) - @unittest.skipIf( - frozendict_type is None, - "If `frozendict` is not available, skip test", - ) - def test_frozen_dict(self) -> None: - # For mypy's benefit: - assert frozendict_type is not None - self.assertEqual( - encode_canonical_json(frozendict_type({"a": 1})), - b'{"a":1}', - ) - self.assertEqual( - encode_pretty_printed_json(frozendict_type({"a": 1})), - b'{\n "a": 1\n}', - ) - def test_unknown_type(self) -> None: class Unknown(object): pass diff --git a/tox.ini b/tox.ini index a893107..63b9d58 100644 --- a/tox.ini +++ b/tox.ini @@ -33,7 +33,6 @@ commands = python -m black --check --diff src tests [testenv:mypy] deps = mypy==1.0 - types-frozendict==2.0.8 types-simplejson==3.17.5 types-setuptools==57.4.14 commands = mypy src tests From f76e8950a839ba09c5abec095333c924fa818f90 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 14 Mar 2023 13:02:00 +0000 Subject: [PATCH 3/7] Add serisalisation registration hook --- README.rst | 18 ++++++++++++++++++ src/canonicaljson/__init__.py | 31 +++++++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 2190056..f8be589 100644 --- a/README.rst +++ b/README.rst @@ -15,6 +15,7 @@ Features U+0056, to keep the output as small as possible. * Uses the shortest escape sequence for each escaped character. * Encodes the JSON as UTF-8. +* Can be configured to encode custom types unknown to the stdlib JSON encoder. Supports Python versions 3.7 and newer. @@ -58,3 +59,20 @@ The underlying JSON implementation can be chosen with the following: which uses the standard library json module). .. _simplejson: https://simplejson.readthedocs.io/ + +A preserialisation hook allows you to encode objects which aren't encodable by the +standard library JSONEncoder. + +.. code:: python + + import canonicaljson + from typing import Dict + + class CustomType: + pass + + def callback(c: CustomType) -> Dict[str, str]: + return {"Hello": "world!"} + + canonicaljson.register_preserialisation_callback(CustomType, callback) + assert canonicaljson.encode_canonical_json(CustomType()) == b'{"Hello":"world!"}' diff --git a/src/canonicaljson/__init__.py b/src/canonicaljson/__init__.py index 567bca5..24ed332 100644 --- a/src/canonicaljson/__init__.py +++ b/src/canonicaljson/__init__.py @@ -13,9 +13,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import functools import platform -from typing import Any, Generator, Iterator, Type +from typing import Any, Callable, Generator, Iterator, Type, TypeVar try: from typing import Protocol @@ -26,6 +26,7 @@ __version__ = "1.6.5" +@functools.singledispatch def _preprocess_for_serialisation(obj: object) -> object: # pragma: no cover """Transform an `obj` into something the JSON library knows how to encode. @@ -36,6 +37,32 @@ def _preprocess_for_serialisation(obj: object) -> object: # pragma: no cover ) +T = TypeVar("T") + + +def register_preserialisation_callback( + data_type: Type[T], callback: Callable[[T], object] +) -> None: + """ + Register a `callback` to preprocess `data_type` objects unknown to the JSON encoder. + + When canonicaljson encodes an object `x` at runtime that its JSON library does not + know how to encode, it will + - select a `callback`, + - compute `y = callback(x)`, then + - JSON-encode `y` and return the result. + + The `callback` should return an object that is JSON-serialisable by the stdlib + json module. + + If this is called multiple times with the same `data_type`, the most recently + registered callback is used when serialising that `data_type`. + """ + if data_type is object: + raise ValueError("Cannot register callback for the `object` type") + _preprocess_for_serialisation.register(data_type, callback) + + class Encoder(Protocol): # pragma: no cover def encode(self, data: object) -> str: pass From 060369ef7b8416c0f7898647c5ab50508c5de238 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 14 Mar 2023 12:53:29 +0000 Subject: [PATCH 4/7] Tests --- tests/test_canonicaljson.py | 39 +++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/tests/test_canonicaljson.py b/tests/test_canonicaljson.py index eab4afe..80e935f 100644 --- a/tests/test_canonicaljson.py +++ b/tests/test_canonicaljson.py @@ -13,6 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from unittest.mock import Mock from math import inf, nan @@ -22,6 +23,7 @@ iterencode_canonical_json, iterencode_pretty_printed_json, set_json_library, + register_preserialisation_callback, ) import unittest @@ -150,3 +152,40 @@ def test_set_json(self) -> None: from canonicaljson import json # type: ignore[attr-defined] set_json_library(json) + + def test_encode_unknown_class_raises(self) -> None: + class C: + pass + + with self.assertRaises(Exception): + encode_canonical_json(C()) + + def test_preserialisation_callback(self) -> None: + class C: + pass + + register_preserialisation_callback(C, lambda c: "I am a C instance") + + result = encode_canonical_json(C()) + self.assertEqual(result, b'"I am a C instance"') + + def test_cannot_register_preserialisation_callback_for_object(self) -> None: + with self.assertRaises(Exception): + register_preserialisation_callback( + object, lambda c: "shouldn't be able to do this" + ) + + def test_most_recent_preserialisation_callback_called(self) -> None: + class C: + pass + + callback1 = Mock(return_value="callback 1 was called") + callback2 = Mock(return_value="callback 2 was called") + + register_preserialisation_callback(C, callback1) + register_preserialisation_callback(C, callback2) + + encode_canonical_json(C()) + + callback1.assert_not_called() + callback2.assert_called_once() From 07dcccdc5e74280371a5d82edc4cd84b4b201a99 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 14 Mar 2023 16:08:15 +0000 Subject: [PATCH 5/7] Update README.rst Co-authored-by: Patrick Cloke --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index f8be589..03baff3 100644 --- a/README.rst +++ b/README.rst @@ -61,7 +61,7 @@ The underlying JSON implementation can be chosen with the following: .. _simplejson: https://simplejson.readthedocs.io/ A preserialisation hook allows you to encode objects which aren't encodable by the -standard library JSONEncoder. +standard library ``JSONEncoder``. .. code:: python From 7856f0c41d85271987f62036d1e69f442b3332fc Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 14 Mar 2023 15:00:40 -0400 Subject: [PATCH 6/7] Stop using simplejson. --- setup.cfg | 8 ---- src/canonicaljson/__init__.py | 86 +++++++---------------------------- tests/test_canonicaljson.py | 15 ------ tox.ini | 2 - 4 files changed, 16 insertions(+), 95 deletions(-) diff --git a/setup.cfg b/setup.cfg index 60417f4..060a555 100644 --- a/setup.cfg +++ b/setup.cfg @@ -26,14 +26,6 @@ package_dir = =src packages = canonicaljson -install_requires = - # simplejson versions before 3.14.0 had a bug with some characters - # (e.g. \u2028) if ensure_ascii was set to false. - simplejson>=3.14.0 - # typing.Protocol was only added to the stdlib in Python 3.8 - typing_extensions>=4.0.0; python_version < '3.8' - - [options.package_data] canonicaljson = py.typed diff --git a/src/canonicaljson/__init__.py b/src/canonicaljson/__init__.py index 24ed332..5eb993a 100644 --- a/src/canonicaljson/__init__.py +++ b/src/canonicaljson/__init__.py @@ -14,13 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. import functools -import platform -from typing import Any, Callable, Generator, Iterator, Type, TypeVar - -try: - from typing import Protocol -except ImportError: # pragma: no cover - from typing_extensions import Protocol # type: ignore[assignment] +import json +from typing import Callable, Generator, Type, TypeVar __version__ = "1.6.5" @@ -63,54 +58,22 @@ def register_preserialisation_callback( _preprocess_for_serialisation.register(data_type, callback) -class Encoder(Protocol): # pragma: no cover - def encode(self, data: object) -> str: - pass - - def iterencode(self, data: object) -> Iterator[str]: - pass - - def __init__(self, *args: Any, **kwargs: Any) -> None: - pass - - -class JsonLibrary(Protocol): # pragma: no cover - @property - def JSONEncoder(self) -> Type[Encoder]: - pass - - # Declare these in the module scope, but they get configured in # set_json_library. -_canonical_encoder: Encoder = None # type: ignore[assignment] -_pretty_encoder: Encoder = None # type: ignore[assignment] - - -def set_json_library(json_lib: JsonLibrary) -> None: - """ - Set the underlying JSON library that canonicaljson uses to json_lib. - - Params: - json_lib: The module to use for JSON encoding. Must have a - `JSONEncoder` property. - """ - global _canonical_encoder - _canonical_encoder = json_lib.JSONEncoder( - ensure_ascii=False, - allow_nan=False, - separators=(",", ":"), - sort_keys=True, - default=_preprocess_for_serialisation, - ) - - global _pretty_encoder - _pretty_encoder = json_lib.JSONEncoder( - ensure_ascii=False, - allow_nan=False, - indent=4, - sort_keys=True, - default=_preprocess_for_serialisation, - ) +_canonical_encoder = json.JSONEncoder( + ensure_ascii=False, + allow_nan=False, + separators=(",", ":"), + sort_keys=True, + default=_preprocess_for_serialisation, +) +_pretty_encoder = json.JSONEncoder( + ensure_ascii=False, + allow_nan=False, + indent=4, + sort_keys=True, + default=_preprocess_for_serialisation, +) def encode_canonical_json(data: object) -> bytes: @@ -152,20 +115,3 @@ def iterencode_pretty_printed_json(data: object) -> Generator[bytes, None, None] """ for chunk in _pretty_encoder.iterencode(data): yield chunk.encode("utf-8") - - -if platform.python_implementation() == "PyPy": # pragma: no cover - # pypy ships with an optimised JSON encoder/decoder that is faster than - # simplejson's C extension. - import json -else: # pragma: no cover - # using simplejson rather than regular json on CPython for backwards - # compatibility (simplejson on Python 3.5 handles parsing of bytes while - # the standard library json does not). - # - # Note that it seems performance is on par or better using json from the - # standard library as of Python 3.7. - import simplejson as json # type: ignore[no-redef] - -# Set the JSON library to the backwards compatible version. -set_json_library(json) diff --git a/tests/test_canonicaljson.py b/tests/test_canonicaljson.py index 80e935f..8d76f5a 100644 --- a/tests/test_canonicaljson.py +++ b/tests/test_canonicaljson.py @@ -22,12 +22,10 @@ encode_pretty_printed_json, iterencode_canonical_json, iterencode_pretty_printed_json, - set_json_library, register_preserialisation_callback, ) import unittest -from unittest import mock class TestCanonicalJson(unittest.TestCase): @@ -140,19 +138,6 @@ def test_invalid_float_values(self) -> None: with self.assertRaises(ValueError): encode_pretty_printed_json(nan) - def test_set_json(self) -> None: - """Ensure that changing the underlying JSON implementation works.""" - mock_json = mock.Mock(spec=["JSONEncoder"]) - mock_json.JSONEncoder.return_value.encode.return_value = "sentinel" - try: - set_json_library(mock_json) - self.assertEqual(encode_canonical_json({}), b"sentinel") - finally: - # Reset the JSON library to whatever was originally set. - from canonicaljson import json # type: ignore[attr-defined] - - set_json_library(json) - def test_encode_unknown_class_raises(self) -> None: class C: pass diff --git a/tox.ini b/tox.ini index 63b9d58..0c166ab 100644 --- a/tox.ini +++ b/tox.ini @@ -33,7 +33,5 @@ commands = python -m black --check --diff src tests [testenv:mypy] deps = mypy==1.0 - types-simplejson==3.17.5 types-setuptools==57.4.14 commands = mypy src tests - From 57937f7c5a319437c9138c09bd26934854d404cd Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 14 Mar 2023 15:04:25 -0400 Subject: [PATCH 7/7] Fix comment. --- src/canonicaljson/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/canonicaljson/__init__.py b/src/canonicaljson/__init__.py index 5eb993a..3861434 100644 --- a/src/canonicaljson/__init__.py +++ b/src/canonicaljson/__init__.py @@ -58,8 +58,7 @@ def register_preserialisation_callback( _preprocess_for_serialisation.register(data_type, callback) -# Declare these in the module scope, but they get configured in -# set_json_library. +# Declare these once for re-use. _canonical_encoder = json.JSONEncoder( ensure_ascii=False, allow_nan=False,