From 2f9a8ac3cf7dbcc2042e4eb2df5766b5d49c2c20 Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Thu, 13 Mar 2025 15:56:34 +0100 Subject: [PATCH 01/27] fix pylint warnings --- src/databricks/labs/blueprint/paths.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/databricks/labs/blueprint/paths.py b/src/databricks/labs/blueprint/paths.py index 6923b64..5197f9f 100644 --- a/src/databricks/labs/blueprint/paths.py +++ b/src/databricks/labs/blueprint/paths.py @@ -151,6 +151,7 @@ def __new__(cls, *args, **kwargs): # Force all initialisation to go via __init__() irrespective of the (Python-specific) base version. return object.__new__(cls) + # pylint: disable=super-init-not-called def __init__(self, ws: WorkspaceClient, *args: str | bytes | os.PathLike) -> None: # We deliberately do _not_ call the super initializer because we're taking over complete responsibility for the # implementation of the public API. @@ -398,6 +399,7 @@ def with_suffix(self: P, suffix: str) -> P: raise ValueError(msg) return self.with_name(stem + suffix) + # pylint: disable=arguments-differ def relative_to(self: P, *other: str | bytes | os.PathLike, walk_up: bool = False) -> P: normalized = self.with_segments(*other) if self.anchor != normalized.anchor: From 8e62ca4515d7249963497b26dd7f6504d29aca5b Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Thu, 13 Mar 2025 15:57:19 +0100 Subject: [PATCH 02/27] add support for marshalling 'object' and 'any' --- src/databricks/labs/blueprint/installation.py | 68 +++++++-- tests/unit/test_installation.py | 144 ++++++++++++++++++ 2 files changed, 200 insertions(+), 12 deletions(-) diff --git a/src/databricks/labs/blueprint/installation.py b/src/databricks/labs/blueprint/installation.py index 5d028a2..ea4f954 100644 --- a/src/databricks/labs/blueprint/installation.py +++ b/src/databricks/labs/blueprint/installation.py @@ -469,6 +469,7 @@ def _get_list_type_ref(inst: T) -> type[list[T]]: item_type = type(from_list[0]) # type: ignore[misc] return list[item_type] # type: ignore[valid-type] + # pylint: disable=too-complex def _marshal(self, type_ref: type, path: list[str], inst: Any) -> tuple[Any, bool]: """The `_marshal` method is a private method that is used to serialize an object of type `type_ref` to a dictionary. This method is called by the `save` method.""" @@ -481,7 +482,11 @@ def _marshal(self, type_ref: type, path: list[str], inst: Any) -> tuple[Any, boo if dataclasses.is_dataclass(type_ref): return self._marshal_dataclass(type_ref, path, inst) if type_ref == list: - return self._marshal_list(type_ref, path, inst) + return self._marshal_raw_list(path, inst) + if type_ref == dict: + return self._marshal_raw_dict(path, inst) + if type_ref in (object, any): + return self._marshal(type(inst), path, inst) if isinstance(type_ref, enum.EnumMeta): return self._marshal_enum(inst) if type_ref == types.NoneType: @@ -523,8 +528,8 @@ def _marshal_generic(self, type_ref: type, path: list[str], inst: Any) -> tuple[ if not type_args: raise SerdeError(f"Missing type arguments: {type_args}") if len(type_args) == 2: - return self._marshal_dict(type_args[1], path, inst) - return self._marshal_list(type_args[0], path, inst) + return self._marshal_generic_dict(type_args[1], path, inst) + return self._marshal_generic_list(type_args[0], path, inst) @staticmethod def _marshal_generic_alias(type_ref, inst): @@ -534,21 +539,34 @@ def _marshal_generic_alias(type_ref, inst): return None, False return inst, isinstance(inst, type_ref.__origin__) # type: ignore[attr-defined] - def _marshal_list(self, type_ref: type, path: list[str], inst: Any) -> tuple[Any, bool]: - """The `_marshal_list` method is a private method that is used to serialize an object of type `type_ref` to - a dictionary. This method is called by the `save` method.""" + def _marshal_generic_list(self, type_ref: type, path: list[str], inst: Any) -> tuple[Any, bool]: + """The `_marshal_generic_list` method is a private method that is used to serialize an object of type list[type_ref] to + an array. This method is called by the `save` method.""" as_list = [] if not isinstance(inst, list): return None, False for i, v in enumerate(inst): value, ok = self._marshal(type_ref, [*path, f"{i}"], v) if not ok: - raise SerdeError(self._explain_why(type_ref, [*path, f"{i}"], v)) + raise SerdeError(self._explain_why(type(v), [*path, f"{i}"], v)) as_list.append(value) return as_list, True - def _marshal_dict(self, type_ref: type, path: list[str], inst: Any) -> tuple[Any, bool]: - """The `_marshal_dict` method is a private method that is used to serialize an object of type `type_ref` to + def _marshal_raw_list(self, path: list[str], inst: Any) -> tuple[Any, bool]: + """The `_marshal_raw_list` method is a private method that is used to serialize an object of type list to + an array. This method is called by the `save` method.""" + as_list = [] + if not isinstance(inst, list): + return None, False + for i, v in enumerate(inst): + value, ok = self._marshal(type(v), [*path, f"{i}"], v) + if not ok: + raise SerdeError(self._explain_why(type(v), [*path, f"{i}"], v)) + as_list.append(value) + return as_list, True + + def _marshal_generic_dict(self, type_ref: type, path: list[str], inst: Any) -> tuple[Any, bool]: + """The `_marshal_generic_dict` method is a private method that is used to serialize an object of type dict[str, type_ref] to a dictionary. This method is called by the `save` method.""" if not isinstance(inst, dict): return None, False @@ -556,7 +574,19 @@ def _marshal_dict(self, type_ref: type, path: list[str], inst: Any) -> tuple[Any for k, v in inst.items(): as_dict[k], ok = self._marshal(type_ref, [*path, k], v) if not ok: - raise SerdeError(self._explain_why(type_ref, [*path, k], v)) + raise SerdeError(self._explain_why(type(v), [*path, k], v)) + return as_dict, True + + def _marshal_raw_dict(self, path: list[str], inst: Any) -> tuple[Any, bool]: + """The `_marshal_raw_dict` method is a private method that is used to serialize an object of type dict to + a dictionary. This method is called by the `save` method.""" + if not isinstance(inst, dict): + return None, False + as_dict = {} + for k, v in inst.items(): + as_dict[k], ok = self._marshal(type(v), [*path, k], v) + if not ok: + raise SerdeError(self._explain_why(type(v), [*path, k], v)) return as_dict, True def _marshal_dataclass(self, type_ref: type, path: list[str], inst: Any) -> tuple[Any, bool]: @@ -646,8 +676,22 @@ def _unmarshal_generic_types(cls, type_ref, path, inst): return cls._unmarshal_union(inst, path, type_ref) if isinstance(type_ref, (_GenericAlias, types.GenericAlias)): return cls._unmarshal_generic(inst, path, type_ref) + if type_ref in (object, any): + return cls._unmarshal_object(inst, path) raise SerdeError(f'{".".join(path)}: unknown: {type_ref}: {inst}') + @classmethod + def _unmarshal_object(cls, inst, path): + if inst is None: + return None + if isinstance(inst, (bool, int, float, str)): + return cls._unmarshal_primitive(inst, type(inst)) + if isinstance(inst, list): + return cls._unmarshal_list(inst, path, object) + if isinstance(inst, dict): + return cls._unmarshal_dict(inst, path, object) + raise SerdeError(f'{".".join(path)}: unknown: {type(inst)}: {inst}') + @classmethod def _unmarshal_dataclass(cls, inst, path, type_ref): """The `_unmarshal_dataclass` method is a private method that is used to deserialize a dictionary to an object @@ -707,13 +751,13 @@ def _unmarshal_generic(cls, inst, path, type_ref): @classmethod def _unmarshal_list(cls, inst, path, hint): - """The `_unmarshal_list` method is a private method that is used to deserialize a dictionary to an object + """The `_unmarshal_list` method is a private method that is used to deserialize an array to a list of type `type_ref`. This method is called by the `load` method.""" if inst is None: return None as_list = [] for i, v in enumerate(inst): - as_list.append(cls._unmarshal(v, [*path, f"{i}"], hint)) + as_list.append(cls._unmarshal(v, [*path, f"{i}"], hint or type(v))) return as_list @classmethod diff --git a/tests/unit/test_installation.py b/tests/unit/test_installation.py index 6b67219..4884d9e 100644 --- a/tests/unit/test_installation.py +++ b/tests/unit/test_installation.py @@ -451,3 +451,147 @@ def test_assert_file_uploaded(): installation = MockInstallation() installation.upload("foo", b"bar") installation.assert_file_uploaded("foo", b"bar") + + +def test_generic_dict_str(): + @dataclass + class SampleClass: + field: dict[str, str] + + installation = MockInstallation() + saved = SampleClass(field={"a": "b", "b": "c"}) + installation.save(saved, filename="backups/SampleClass.json") + loaded = installation.load(SampleClass, filename="backups/SampleClass.json") + assert loaded == saved + + +def test_generic_dict_int(): + @dataclass + class SampleClass: + field: dict[str, int] + + installation = MockInstallation() + saved = SampleClass(field={"a": 1, "b": 1}) + installation.save(saved, filename="backups/SampleClass.json") + loaded = installation.load(SampleClass, filename="backups/SampleClass.json") + assert loaded == saved + + +def test_generic_dict_float(): + @dataclass + class SampleClass: + field: dict[str, float] + + installation = MockInstallation() + saved = SampleClass(field={"a": 1.1, "b": 1.2}) + installation.save(saved, filename="backups/SampleClass.json") + loaded = installation.load(SampleClass, filename="backups/SampleClass.json") + assert loaded == saved + + +def test_generic_dict_list(): + @dataclass + class SampleClass: + field: dict[str, list[str]] + + installation = MockInstallation() + saved = SampleClass(field={"a": ["x", "y"], "b": []}) + installation.save(saved, filename="backups/SampleClass.json") + loaded = installation.load(SampleClass, filename="backups/SampleClass.json") + assert loaded == saved + + +def test_generic_dict_object(): + @dataclass + class SampleClass: + field: dict[str, object] + + installation = MockInstallation() + saved = SampleClass(field={"a": ["x", "y"], "b": [], "c": 3, "d": True, "e": {"a": "b"}}) + installation.save(saved, filename="backups/SampleClass.json") + loaded = installation.load(SampleClass, filename="backups/SampleClass.json") + assert loaded == saved + + +def test_generic_dict_any(): + @dataclass + class SampleClass: + field: dict[str, any] + + installation = MockInstallation() + saved = SampleClass(field={"a": ["x", "y"], "b": [], "c": 3, "d": True, "e": {"a": "b"}}) + installation.save(saved, filename="backups/SampleClass.json") + loaded = installation.load(SampleClass, filename="backups/SampleClass.json") + assert loaded == saved + + +def test_generic_list_str(): + @dataclass + class SampleClass: + field: list[str] + + installation = MockInstallation() + saved = SampleClass(field=["a", "b"]) + installation.save(saved, filename="backups/SampleClass.json") + loaded = installation.load(SampleClass, filename="backups/SampleClass.json") + assert loaded == saved + + +def test_generic_list_int(): + @dataclass + class SampleClass: + field: list[int] + + installation = MockInstallation() + saved = SampleClass(field=[1, 2, 3]) + installation.save(saved, filename="backups/SampleClass.json") + loaded = installation.load(SampleClass, filename="backups/SampleClass.json") + assert loaded == saved + + +def test_generic_list_float(): + @dataclass + class SampleClass: + field: list[float] + + installation = MockInstallation() + saved = SampleClass(field=[1.1, 1.2]) + installation.save(saved, filename="backups/SampleClass.json") + loaded = installation.load(SampleClass, filename="backups/SampleClass.json") + assert loaded == saved + + +def test_generic_list_list(): + @dataclass + class SampleClass: + field: list[list[str]] + + installation = MockInstallation() + saved = SampleClass(field=[["x", "y"], []]) + installation.save(saved, filename="backups/SampleClass.json") + loaded = installation.load(SampleClass, filename="backups/SampleClass.json") + assert loaded == saved + + +def test_generic_list_object(): + @dataclass + class SampleClass: + field: list[object] + + installation = MockInstallation() + saved = SampleClass(field=[["x", "y"], [], 3, True, {"a": "b"}]) + installation.save(saved, filename="backups/SampleClass.json") + loaded = installation.load(SampleClass, filename="backups/SampleClass.json") + assert loaded == saved + + +def test_generic_list_any(): + @dataclass + class SampleClass: + field: list[any] + + installation = MockInstallation() + saved = SampleClass(field=[["x", "y"], [], 3, True, {"a": "b"}]) + installation.save(saved, filename="backups/SampleClass.json") + loaded = installation.load(SampleClass, filename="backups/SampleClass.json") + assert loaded == saved From 15952ca70ba8602bd190242790d701404c6cd9d3 Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Thu, 13 Mar 2025 16:44:09 +0100 Subject: [PATCH 03/27] also support 'Any' --- src/databricks/labs/blueprint/installation.py | 4 ++-- tests/unit/test_installation.py | 24 +++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/src/databricks/labs/blueprint/installation.py b/src/databricks/labs/blueprint/installation.py index ea4f954..a99b411 100644 --- a/src/databricks/labs/blueprint/installation.py +++ b/src/databricks/labs/blueprint/installation.py @@ -485,7 +485,7 @@ def _marshal(self, type_ref: type, path: list[str], inst: Any) -> tuple[Any, boo return self._marshal_raw_list(path, inst) if type_ref == dict: return self._marshal_raw_dict(path, inst) - if type_ref in (object, any): + if type_ref in (object, any, Any): return self._marshal(type(inst), path, inst) if isinstance(type_ref, enum.EnumMeta): return self._marshal_enum(inst) @@ -676,7 +676,7 @@ def _unmarshal_generic_types(cls, type_ref, path, inst): return cls._unmarshal_union(inst, path, type_ref) if isinstance(type_ref, (_GenericAlias, types.GenericAlias)): return cls._unmarshal_generic(inst, path, type_ref) - if type_ref in (object, any): + if type_ref in (object, any, Any): return cls._unmarshal_object(inst, path) raise SerdeError(f'{".".join(path)}: unknown: {type_ref}: {inst}') diff --git a/tests/unit/test_installation.py b/tests/unit/test_installation.py index 4884d9e..5f42e97 100644 --- a/tests/unit/test_installation.py +++ b/tests/unit/test_installation.py @@ -525,6 +525,18 @@ class SampleClass: assert loaded == saved +def test_generic_dict_any2(): + @dataclass + class SampleClass: + field: dict[str, typing.Any] + + installation = MockInstallation() + saved = SampleClass(field={"a": ["x", "y"], "b": [], "c": 3, "d": True, "e": {"a": "b"}}) + installation.save(saved, filename="backups/SampleClass.json") + loaded = installation.load(SampleClass, filename="backups/SampleClass.json") + assert loaded == saved + + def test_generic_list_str(): @dataclass class SampleClass: @@ -595,3 +607,15 @@ class SampleClass: installation.save(saved, filename="backups/SampleClass.json") loaded = installation.load(SampleClass, filename="backups/SampleClass.json") assert loaded == saved + + +def test_generic_list_any2(): + @dataclass + class SampleClass: + field: list[typing.Any] + + installation = MockInstallation() + saved = SampleClass(field=[["x", "y"], [], 3, True, {"a": "b"}]) + installation.save(saved, filename="backups/SampleClass.json") + loaded = installation.load(SampleClass, filename="backups/SampleClass.json") + assert loaded == saved From a371465f2b8e7dccb71a96aba89712dff199ccc6 Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Fri, 14 Mar 2025 10:35:29 +0100 Subject: [PATCH 04/27] lowercase 'any' is not a type --- src/databricks/labs/blueprint/installation.py | 4 ++-- tests/unit/test_installation.py | 24 ------------------- 2 files changed, 2 insertions(+), 26 deletions(-) diff --git a/src/databricks/labs/blueprint/installation.py b/src/databricks/labs/blueprint/installation.py index a99b411..6280380 100644 --- a/src/databricks/labs/blueprint/installation.py +++ b/src/databricks/labs/blueprint/installation.py @@ -485,7 +485,7 @@ def _marshal(self, type_ref: type, path: list[str], inst: Any) -> tuple[Any, boo return self._marshal_raw_list(path, inst) if type_ref == dict: return self._marshal_raw_dict(path, inst) - if type_ref in (object, any, Any): + if type_ref in (object, Any): return self._marshal(type(inst), path, inst) if isinstance(type_ref, enum.EnumMeta): return self._marshal_enum(inst) @@ -676,7 +676,7 @@ def _unmarshal_generic_types(cls, type_ref, path, inst): return cls._unmarshal_union(inst, path, type_ref) if isinstance(type_ref, (_GenericAlias, types.GenericAlias)): return cls._unmarshal_generic(inst, path, type_ref) - if type_ref in (object, any, Any): + if type_ref in (object, Any): return cls._unmarshal_object(inst, path) raise SerdeError(f'{".".join(path)}: unknown: {type_ref}: {inst}') diff --git a/tests/unit/test_installation.py b/tests/unit/test_installation.py index 5f42e97..0359f18 100644 --- a/tests/unit/test_installation.py +++ b/tests/unit/test_installation.py @@ -514,18 +514,6 @@ class SampleClass: def test_generic_dict_any(): - @dataclass - class SampleClass: - field: dict[str, any] - - installation = MockInstallation() - saved = SampleClass(field={"a": ["x", "y"], "b": [], "c": 3, "d": True, "e": {"a": "b"}}) - installation.save(saved, filename="backups/SampleClass.json") - loaded = installation.load(SampleClass, filename="backups/SampleClass.json") - assert loaded == saved - - -def test_generic_dict_any2(): @dataclass class SampleClass: field: dict[str, typing.Any] @@ -598,18 +586,6 @@ class SampleClass: def test_generic_list_any(): - @dataclass - class SampleClass: - field: list[any] - - installation = MockInstallation() - saved = SampleClass(field=[["x", "y"], [], 3, True, {"a": "b"}]) - installation.save(saved, filename="backups/SampleClass.json") - loaded = installation.load(SampleClass, filename="backups/SampleClass.json") - assert loaded == saved - - -def test_generic_list_any2(): @dataclass class SampleClass: field: list[typing.Any] From a7f0eb55df52803bdb5a9d3ce432c8a424d65ef6 Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Thu, 15 May 2025 18:18:24 +0200 Subject: [PATCH 05/27] fix issue where genuine strings were converted to bool within a union --- src/databricks/labs/blueprint/installation.py | 13 ++++++++++--- tests/unit/test_installation.py | 13 +++++++++++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/databricks/labs/blueprint/installation.py b/src/databricks/labs/blueprint/installation.py index 6280380..b3be198 100644 --- a/src/databricks/labs/blueprint/installation.py +++ b/src/databricks/labs/blueprint/installation.py @@ -777,10 +777,17 @@ def _unmarshal_dict(cls, inst, path, type_ref): def _unmarshal_primitive(cls, inst, type_ref): """The `_unmarshal_primitive` method is a private method that is used to deserialize a dictionary to an object of type `type_ref`. This method is called by the `load` method.""" - if not inst: + if inst is None: + return None + if type(inst)==type_ref: return inst - # convert from str to int if necessary - converted = type_ref(inst) # type: ignore[call-arg] + converted = inst + # convert from str + if isinstance(inst, str): + if type_ref in (int, float): + converted = type_ref(inst) # type: ignore[call-arg] + elif type_ref == bool: + converted = inst.lower()=="true" return converted @staticmethod diff --git a/tests/unit/test_installation.py b/tests/unit/test_installation.py index 0359f18..e91b7e3 100644 --- a/tests/unit/test_installation.py +++ b/tests/unit/test_installation.py @@ -595,3 +595,16 @@ class SampleClass: installation.save(saved, filename="backups/SampleClass.json") loaded = installation.load(SampleClass, filename="backups/SampleClass.json") assert loaded == saved + +def test_bool_in_union(): + @dataclass + class SampleClass: + field: dict[str, bool | str] + + installation = MockInstallation() + saved = SampleClass(field={"a": "b"}) + installation.save(saved, filename="backups/SampleClass.json") + loaded = installation.load(SampleClass, filename="backups/SampleClass.json") + assert loaded == saved + + From 43e75246d277859dc3f5108b8a83c302ee527dc2 Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Thu, 15 May 2025 18:28:45 +0200 Subject: [PATCH 06/27] fix unmarshalling of complex unions --- src/databricks/labs/blueprint/installation.py | 16 ++++++++++++---- tests/unit/test_installation.py | 14 ++++++++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/src/databricks/labs/blueprint/installation.py b/src/databricks/labs/blueprint/installation.py index b3be198..fec053e 100644 --- a/src/databricks/labs/blueprint/installation.py +++ b/src/databricks/labs/blueprint/installation.py @@ -726,9 +726,14 @@ def _unmarshal_union(cls, inst, path, type_ref): """The `_unmarshal_union` method is a private method that is used to deserialize a dictionary to an object of type `type_ref`. This method is called by the `load` method.""" for variant in get_args(type_ref): - value = cls._unmarshal(inst, path, variant) - if value: - return value + if variant == type(None) and inst is None: + return None + try: + value = cls._unmarshal(inst, path, variant) + if value is not None: + return value + except: + pass return None @classmethod @@ -787,7 +792,10 @@ def _unmarshal_primitive(cls, inst, type_ref): if type_ref in (int, float): converted = type_ref(inst) # type: ignore[call-arg] elif type_ref == bool: - converted = inst.lower()=="true" + if inst.lower() == "true": + converted = True + elif inst.lower() == "false": + converted = False return converted @staticmethod diff --git a/tests/unit/test_installation.py b/tests/unit/test_installation.py index e91b7e3..aa45604 100644 --- a/tests/unit/test_installation.py +++ b/tests/unit/test_installation.py @@ -608,3 +608,17 @@ class SampleClass: assert loaded == saved +JsonType: typing.TypeAlias = None | bool | int | float | str | list["JsonType"] | dict[str, "JsonType"] + +def test_complex_union(): + + @dataclass + class SampleClass: + field: dict[str, JsonType] + + installation = MockInstallation() + saved = SampleClass(field={"a": "b"}) + installation.save(saved, filename="backups/SampleClass.json") + loaded = installation.load(SampleClass, filename="backups/SampleClass.json") + assert loaded == saved + From af2107ffd6a51761acd48c94176cdde8f448e341 Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Thu, 15 May 2025 23:06:11 +0200 Subject: [PATCH 07/27] fix crash when unmarshalling complex unions --- src/databricks/labs/blueprint/installation.py | 6 ++++-- tests/unit/test_installation.py | 13 +++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/databricks/labs/blueprint/installation.py b/src/databricks/labs/blueprint/installation.py index fec053e..75afc8f 100644 --- a/src/databricks/labs/blueprint/installation.py +++ b/src/databricks/labs/blueprint/installation.py @@ -755,14 +755,16 @@ def _unmarshal_generic(cls, inst, path, type_ref): return cls._unmarshal_list(inst, path, type_args[0]) @classmethod - def _unmarshal_list(cls, inst, path, hint): + def _unmarshal_list(cls, inst, path, type_ref): """The `_unmarshal_list` method is a private method that is used to deserialize an array to a list of type `type_ref`. This method is called by the `load` method.""" if inst is None: return None + if not isinstance(inst, list): + raise SerdeError(cls._explain_why(type_ref, path, inst)) as_list = [] for i, v in enumerate(inst): - as_list.append(cls._unmarshal(v, [*path, f"{i}"], hint or type(v))) + as_list.append(cls._unmarshal(v, [*path, f"{i}"], type_ref or type(v))) return as_list @classmethod diff --git a/tests/unit/test_installation.py b/tests/unit/test_installation.py index aa45604..4d6ea8d 100644 --- a/tests/unit/test_installation.py +++ b/tests/unit/test_installation.py @@ -622,3 +622,16 @@ class SampleClass: loaded = installation.load(SampleClass, filename="backups/SampleClass.json") assert loaded == saved +JsonType2: typing.TypeAlias = dict[str, "JsonType2"] | list["JsonType2"] | str | float | int | bool | None + +def test_complex_union2(): + + @dataclass + class SampleClass: + field: dict[str, JsonType2] + + installation = MockInstallation() + saved = SampleClass(field={"a": "b"}) + installation.save(saved, filename="backups/SampleClass.json") + loaded = installation.load(SampleClass, filename="backups/SampleClass.json") + assert loaded == saved \ No newline at end of file From d9d2e7e280d8480f0a27d8293d711f838293bb57 Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 19 May 2025 16:25:39 +0200 Subject: [PATCH 08/27] add tests --- tests/unit/test_marshalling.py | 41 ++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 tests/unit/test_marshalling.py diff --git a/tests/unit/test_marshalling.py b/tests/unit/test_marshalling.py new file mode 100644 index 0000000..df34441 --- /dev/null +++ b/tests/unit/test_marshalling.py @@ -0,0 +1,41 @@ +from dataclasses import dataclass + +import pytest +from dataclasses import dataclass + +from databricks.labs.blueprint.installation import Installation, MockInstallation + + +@pytest.mark.parametrize("allow_weak_types", [ True, False ]) +def test_weak_typing_with_list(allow_weak_types) -> None: + + # this example corresponds to a frequent Python coding pattern + # where users don't specify the item type of a list + + @dataclass + class SampleClass: + field: list + + Installation.allow_weak_types = allow_weak_types + installation = MockInstallation() + saved = SampleClass(field=["a", 1, True]) + installation.save(saved, filename="backups/SampleClass.json") + loaded = installation.load(SampleClass, filename="backups/SampleClass.json") + assert loaded == saved + + +@pytest.mark.parametrize("allow_weak_types", [True, False]) +def test_weak_typing_with_dict(allow_weak_types) -> None: + # this example corresponds to a frequent Python coding pattern + # where users don't specify the key and item types of a dict + + @dataclass + class SampleClass: + field: dict + + Installation.allow_weak_types = allow_weak_types + installation = MockInstallation() + saved = SampleClass(field={"x": "a", "y": 1, "z": True}) + installation.save(saved, filename="backups/SampleClass.json") + loaded = installation.load(SampleClass, filename="backups/SampleClass.json") + assert loaded == saved From 9df6876e19b2bcdec8d643041cd1beca12cd10f4 Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 19 May 2025 16:27:23 +0200 Subject: [PATCH 09/27] make weak types support optional for testing --- src/databricks/labs/blueprint/installation.py | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/databricks/labs/blueprint/installation.py b/src/databricks/labs/blueprint/installation.py index 75afc8f..48b3b99 100644 --- a/src/databricks/labs/blueprint/installation.py +++ b/src/databricks/labs/blueprint/installation.py @@ -67,6 +67,7 @@ class Installation: T = TypeVar("T") _PRIMITIVES = (int, bool, float, str) + allow_weak_types = True def __init__(self, ws: WorkspaceClient, product: str, *, install_folder: str | None = None): """The `Installation` class constructor creates an `Installation` object for the given product in @@ -483,10 +484,11 @@ def _marshal(self, type_ref: type, path: list[str], inst: Any) -> tuple[Any, boo return self._marshal_dataclass(type_ref, path, inst) if type_ref == list: return self._marshal_raw_list(path, inst) - if type_ref == dict: - return self._marshal_raw_dict(path, inst) - if type_ref in (object, Any): - return self._marshal(type(inst), path, inst) + if self.allow_weak_types: + if type_ref == dict: + return self._marshal_raw_dict(path, inst) + if type_ref in (object, Any): + return self._marshal(type(inst), path, inst) if isinstance(type_ref, enum.EnumMeta): return self._marshal_enum(inst) if type_ref == types.NoneType: @@ -646,6 +648,8 @@ def from_dict(cls, raw: dict): def _unmarshal(cls, inst: Any, path: list[str], type_ref: type[T]) -> T | None: """The `_unmarshal` method is a private method that is used to deserialize a dictionary to an object of type `type_ref`. This method is called by the `load` method.""" + if type_ref == types.NoneType: + return None if dataclasses.is_dataclass(type_ref): return cls._unmarshal_dataclass(inst, path, type_ref) if isinstance(type_ref, enum.EnumMeta): @@ -676,7 +680,7 @@ def _unmarshal_generic_types(cls, type_ref, path, inst): return cls._unmarshal_union(inst, path, type_ref) if isinstance(type_ref, (_GenericAlias, types.GenericAlias)): return cls._unmarshal_generic(inst, path, type_ref) - if type_ref in (object, Any): + if cls.allow_weak_types and type_ref in (object, Any): return cls._unmarshal_object(inst, path) raise SerdeError(f'{".".join(path)}: unknown: {type_ref}: {inst}') @@ -686,10 +690,11 @@ def _unmarshal_object(cls, inst, path): return None if isinstance(inst, (bool, int, float, str)): return cls._unmarshal_primitive(inst, type(inst)) - if isinstance(inst, list): - return cls._unmarshal_list(inst, path, object) - if isinstance(inst, dict): - return cls._unmarshal_dict(inst, path, object) + if cls.allow_weak_types: + if isinstance(inst, list): + return cls._unmarshal_list(inst, path, object) + if isinstance(inst, dict): + return cls._unmarshal_dict(inst, path, object) raise SerdeError(f'{".".join(path)}: unknown: {type(inst)}: {inst}') @classmethod From 26aa5383a18bb3bea4a1d1a7631390b29fa96ddb Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 19 May 2025 16:27:41 +0200 Subject: [PATCH 10/27] fix unmarshalling of raw list --- src/databricks/labs/blueprint/installation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/databricks/labs/blueprint/installation.py b/src/databricks/labs/blueprint/installation.py index 48b3b99..ab80a20 100644 --- a/src/databricks/labs/blueprint/installation.py +++ b/src/databricks/labs/blueprint/installation.py @@ -658,12 +658,12 @@ def _unmarshal(cls, inst: Any, path: list[str], type_ref: type[T]) -> T | None: return type_ref(inst) if type_ref in cls._PRIMITIVES: return cls._unmarshal_primitive(inst, type_ref) + if type_ref == list: + return cls._unmarshal_list(inst, path, Any) if type_ref == databricks.sdk.core.Config: if not inst: inst = {} return databricks.sdk.core.Config(**inst) # type: ignore[return-value] - if type_ref == types.NoneType: - return None if isinstance(type_ref, cls._FromDict): return type_ref.from_dict(inst) return cls._unmarshal_generic_types(type_ref, path, inst) From 10dd453691ee12b0c10c0677832722c84086612e Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 19 May 2025 16:49:16 +0200 Subject: [PATCH 11/27] fix unmarshalling of raw dict --- src/databricks/labs/blueprint/installation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/databricks/labs/blueprint/installation.py b/src/databricks/labs/blueprint/installation.py index ab80a20..d893db8 100644 --- a/src/databricks/labs/blueprint/installation.py +++ b/src/databricks/labs/blueprint/installation.py @@ -660,6 +660,8 @@ def _unmarshal(cls, inst: Any, path: list[str], type_ref: type[T]) -> T | None: return cls._unmarshal_primitive(inst, type_ref) if type_ref == list: return cls._unmarshal_list(inst, path, Any) + if type_ref == dict: + return cls._unmarshal_dict(inst, path, Any) if type_ref == databricks.sdk.core.Config: if not inst: inst = {} From 5eab803f2ffb5664b22381e7d07113fb42de993b Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 19 May 2025 16:49:27 +0200 Subject: [PATCH 12/27] more scenarios --- tests/unit/test_marshalling.py | 80 ++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/tests/unit/test_marshalling.py b/tests/unit/test_marshalling.py index df34441..28f9501 100644 --- a/tests/unit/test_marshalling.py +++ b/tests/unit/test_marshalling.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from typing import Any import pytest from dataclasses import dataclass @@ -39,3 +40,82 @@ class SampleClass: installation.save(saved, filename="backups/SampleClass.json") loaded = installation.load(SampleClass, filename="backups/SampleClass.json") assert loaded == saved + +@pytest.mark.parametrize("allow_weak_types", [ True, False ]) +def test_progressive_typing_with_list(allow_weak_types) -> None: + + # this example corresponds to a frequent Python coding pattern + # where users only specify the item type of a list once they need it + + @dataclass + class SampleClassV1: + field: list + + @dataclass + class SampleClassV2: + field: list[str] + + Installation.allow_weak_types = allow_weak_types + installation = MockInstallation() + saved = SampleClassV1(field=["a", "b", "c"]) + installation.save(saved, filename="backups/SampleClass.json") + # problem: can't directly use untyped item values + # loaded_v1 = installation.load(SampleClassV1, filename="backups/SampleClass.json") + # stuff = loaded_v1[0][1:2] + # so they've stored weakly typed data, and they need to read it as typed data + loaded = installation.load(SampleClassV2, filename="backups/SampleClass.json") + assert loaded == SampleClassV2(field=saved.field) + + +@pytest.mark.parametrize("allow_weak_types", [ True, False ]) +def test_progressive_typing_with_dict(allow_weak_types) -> None: + + # this example corresponds to a frequent Python coding pattern + # where users only specify the item type of a dict once they need it + + @dataclass + class SampleClassV1: + field: dict + + @dataclass + class SampleClassV2: + field: dict[str, str] + + Installation.allow_weak_types = allow_weak_types + installation = MockInstallation() + saved = SampleClassV1(field={"x": "abc", "y": "def", "z": "ghi"}) + installation.save(saved, filename="backups/SampleClass.json") + # problem: can't directly use untyped item values + # loaded_v1 = installation.load(SampleClassV1, filename="backups/SampleClass.json") + # stuff = loaded_v1["x"][1:2] + # so they've stored weakly typed data, and they need to read it as typed data + loaded = installation.load(SampleClassV2, filename="backups/SampleClass.json") + assert loaded == SampleClassV2(field=saved.field) + +@pytest.mark.parametrize("allow_weak_types", [True, False]) +def test_lost_code_with_list(allow_weak_types) -> None: + # this example corresponds to a scenario where data was stored + # using code that is no longer available + + @dataclass + class LostSampleClass: + field: list[str] + + # we don't know the type of 'field' + # so we'll use code to restore the data + @dataclass + class RecoverySampleClass: + field: object + + @dataclass + class SampleClass: + field: list[str] + + Installation.allow_weak_types = allow_weak_types + installation = MockInstallation() + saved = LostSampleClass(field=["a", "b", "c"]) + installation.save(saved, filename="backups/SampleClass.json") + # problem: we don't know how SampleClass.json was stored + # so we're loading the data as weakly typed + loaded = installation.load(RecoverySampleClass, filename="backups/SampleClass.json") + assert SampleClass(field=loaded.field) == SampleClass(field=saved.field) From d5a5cbd9c681bade3ecd9efdc60bf5db69470f0f Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 19 May 2025 16:57:43 +0200 Subject: [PATCH 13/27] more scenarios --- tests/unit/test_marshalling.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/unit/test_marshalling.py b/tests/unit/test_marshalling.py index 28f9501..54d6a08 100644 --- a/tests/unit/test_marshalling.py +++ b/tests/unit/test_marshalling.py @@ -92,6 +92,31 @@ class SampleClassV2: loaded = installation.load(SampleClassV2, filename="backups/SampleClass.json") assert loaded == SampleClassV2(field=saved.field) +@pytest.mark.parametrize("allow_weak_types", [ True, False ]) +def test_type_migration(allow_weak_types) -> None: + + # this example corresponds to a frequent Python coding scenario + # where users change their mind about a type + + @dataclass + class SampleClassV1: + field: list[str] + + @dataclass + class SampleClassV2: + field: list[int | None] + + Installation.allow_weak_types = allow_weak_types + installation = MockInstallation() + saved = SampleClassV1(field=["1", "2", ""]) + installation.save(saved, filename="backups/SampleClass.json") + # problem: can't directly convert an item value + # loaded_v1 = installation.load(SampleClassV2, filename="backups/SampleClass.json") + # so they've stored strings, and they need to read ints + converted = SampleClassV2(field=[(int(val) if val else None) for val in saved.field]) + loaded = installation.load(SampleClassV2, filename="backups/SampleClass.json") + assert loaded == converted + @pytest.mark.parametrize("allow_weak_types", [True, False]) def test_lost_code_with_list(allow_weak_types) -> None: # this example corresponds to a scenario where data was stored From d86e651c00046295fd3a5c60ca35583c66b8193a Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 19 May 2025 16:58:58 +0200 Subject: [PATCH 14/27] fix unmarshalling of raw list --- src/databricks/labs/blueprint/installation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/databricks/labs/blueprint/installation.py b/src/databricks/labs/blueprint/installation.py index 75afc8f..553ac77 100644 --- a/src/databricks/labs/blueprint/installation.py +++ b/src/databricks/labs/blueprint/installation.py @@ -654,12 +654,12 @@ def _unmarshal(cls, inst: Any, path: list[str], type_ref: type[T]) -> T | None: return type_ref(inst) if type_ref in cls._PRIMITIVES: return cls._unmarshal_primitive(inst, type_ref) + if type_ref == list: + return cls._unmarshal_list(inst, path, Any) if type_ref == databricks.sdk.core.Config: if not inst: inst = {} return databricks.sdk.core.Config(**inst) # type: ignore[return-value] - if type_ref == types.NoneType: - return None if isinstance(type_ref, cls._FromDict): return type_ref.from_dict(inst) return cls._unmarshal_generic_types(type_ref, path, inst) From efd602875e847ddb48ced39e11ed4cb467e36904 Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 19 May 2025 16:49:16 +0200 Subject: [PATCH 15/27] fix unmarshalling of raw dict --- src/databricks/labs/blueprint/installation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/databricks/labs/blueprint/installation.py b/src/databricks/labs/blueprint/installation.py index 553ac77..a858cd0 100644 --- a/src/databricks/labs/blueprint/installation.py +++ b/src/databricks/labs/blueprint/installation.py @@ -656,6 +656,8 @@ def _unmarshal(cls, inst: Any, path: list[str], type_ref: type[T]) -> T | None: return cls._unmarshal_primitive(inst, type_ref) if type_ref == list: return cls._unmarshal_list(inst, path, Any) + if type_ref == dict: + return cls._unmarshal_dict(inst, path, Any) if type_ref == databricks.sdk.core.Config: if not inst: inst = {} From 5512c276ffbe3635651237bcea0bb68af522559d Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 19 May 2025 17:39:49 +0200 Subject: [PATCH 16/27] more flags and scenarios --- src/databricks/labs/blueprint/installation.py | 10 ++- tests/unit/test_marshalling.py | 85 +++++++++++++------ 2 files changed, 66 insertions(+), 29 deletions(-) diff --git a/src/databricks/labs/blueprint/installation.py b/src/databricks/labs/blueprint/installation.py index d893db8..f7afa57 100644 --- a/src/databricks/labs/blueprint/installation.py +++ b/src/databricks/labs/blueprint/installation.py @@ -67,6 +67,7 @@ class Installation: T = TypeVar("T") _PRIMITIVES = (int, bool, float, str) + allow_raw_types = True allow_weak_types = True def __init__(self, ws: WorkspaceClient, product: str, *, install_folder: str | None = None): @@ -482,11 +483,12 @@ def _marshal(self, type_ref: type, path: list[str], inst: Any) -> tuple[Any, boo return inst.as_dict(), True if dataclasses.is_dataclass(type_ref): return self._marshal_dataclass(type_ref, path, inst) - if type_ref == list: - return self._marshal_raw_list(path, inst) - if self.allow_weak_types: + if self.allow_raw_types: + if type_ref == list: + return self._marshal_raw_list(path, inst) if type_ref == dict: return self._marshal_raw_dict(path, inst) + if self.allow_weak_types: if type_ref in (object, Any): return self._marshal(type(inst), path, inst) if isinstance(type_ref, enum.EnumMeta): @@ -692,7 +694,7 @@ def _unmarshal_object(cls, inst, path): return None if isinstance(inst, (bool, int, float, str)): return cls._unmarshal_primitive(inst, type(inst)) - if cls.allow_weak_types: + if cls.allow_raw_types: if isinstance(inst, list): return cls._unmarshal_list(inst, path, object) if isinstance(inst, dict): diff --git a/tests/unit/test_marshalling.py b/tests/unit/test_marshalling.py index 54d6a08..9832b59 100644 --- a/tests/unit/test_marshalling.py +++ b/tests/unit/test_marshalling.py @@ -1,15 +1,30 @@ from dataclasses import dataclass +from enum import Enum from typing import Any import pytest -from dataclasses import dataclass from databricks.labs.blueprint.installation import Installation, MockInstallation - -@pytest.mark.parametrize("allow_weak_types", [ True, False ]) -def test_weak_typing_with_list(allow_weak_types) -> None: - +class TypeSupport(Enum): + STRICT = "STRICT" + RAW_TYPES = "RAW" + WEAK_TYPES = "WEAK" + +def set_type_support(type_support: TypeSupport): + if type_support == TypeSupport.STRICT: + Installation.allow_raw_types = False + Installation.allow_weak_types = False + elif type_support == TypeSupport.RAW_TYPES: + Installation.allow_raw_types = True + Installation.allow_weak_types = False + if type_support == TypeSupport.WEAK_TYPES: + Installation.allow_raw_types = True + Installation.allow_weak_types = True + +@pytest.mark.parametrize("type_support", [s for s in TypeSupport]) +def test_weak_typing_with_list(type_support) -> None: + set_type_support(type_support) # this example corresponds to a frequent Python coding pattern # where users don't specify the item type of a list @@ -17,7 +32,6 @@ def test_weak_typing_with_list(allow_weak_types) -> None: class SampleClass: field: list - Installation.allow_weak_types = allow_weak_types installation = MockInstallation() saved = SampleClass(field=["a", 1, True]) installation.save(saved, filename="backups/SampleClass.json") @@ -25,8 +39,9 @@ class SampleClass: assert loaded == saved -@pytest.mark.parametrize("allow_weak_types", [True, False]) -def test_weak_typing_with_dict(allow_weak_types) -> None: +@pytest.mark.parametrize("type_support", [s for s in TypeSupport]) +def test_weak_typing_with_dict(type_support) -> None: + set_type_support(type_support) # this example corresponds to a frequent Python coding pattern # where users don't specify the key and item types of a dict @@ -34,15 +49,15 @@ def test_weak_typing_with_dict(allow_weak_types) -> None: class SampleClass: field: dict - Installation.allow_weak_types = allow_weak_types installation = MockInstallation() saved = SampleClass(field={"x": "a", "y": 1, "z": True}) installation.save(saved, filename="backups/SampleClass.json") loaded = installation.load(SampleClass, filename="backups/SampleClass.json") assert loaded == saved -@pytest.mark.parametrize("allow_weak_types", [ True, False ]) -def test_progressive_typing_with_list(allow_weak_types) -> None: +@pytest.mark.parametrize("type_support", [s for s in TypeSupport]) +def test_progressive_typing_with_list(type_support) -> None: + set_type_support(type_support) # this example corresponds to a frequent Python coding pattern # where users only specify the item type of a list once they need it @@ -55,7 +70,6 @@ class SampleClassV1: class SampleClassV2: field: list[str] - Installation.allow_weak_types = allow_weak_types installation = MockInstallation() saved = SampleClassV1(field=["a", "b", "c"]) installation.save(saved, filename="backups/SampleClass.json") @@ -67,8 +81,9 @@ class SampleClassV2: assert loaded == SampleClassV2(field=saved.field) -@pytest.mark.parametrize("allow_weak_types", [ True, False ]) -def test_progressive_typing_with_dict(allow_weak_types) -> None: +@pytest.mark.parametrize("type_support", [s for s in TypeSupport]) +def test_progressive_typing_with_dict(type_support) -> None: + set_type_support(type_support) # this example corresponds to a frequent Python coding pattern # where users only specify the item type of a dict once they need it @@ -81,7 +96,6 @@ class SampleClassV1: class SampleClassV2: field: dict[str, str] - Installation.allow_weak_types = allow_weak_types installation = MockInstallation() saved = SampleClassV1(field={"x": "abc", "y": "def", "z": "ghi"}) installation.save(saved, filename="backups/SampleClass.json") @@ -92,8 +106,9 @@ class SampleClassV2: loaded = installation.load(SampleClassV2, filename="backups/SampleClass.json") assert loaded == SampleClassV2(field=saved.field) -@pytest.mark.parametrize("allow_weak_types", [ True, False ]) -def test_type_migration(allow_weak_types) -> None: +@pytest.mark.parametrize("type_support", [s for s in TypeSupport]) +def test_type_migration(type_support) -> None: + set_type_support(type_support) # this example corresponds to a frequent Python coding scenario # where users change their mind about a type @@ -106,7 +121,6 @@ class SampleClassV1: class SampleClassV2: field: list[int | None] - Installation.allow_weak_types = allow_weak_types installation = MockInstallation() saved = SampleClassV1(field=["1", "2", ""]) installation.save(saved, filename="backups/SampleClass.json") @@ -117,8 +131,9 @@ class SampleClassV2: loaded = installation.load(SampleClassV2, filename="backups/SampleClass.json") assert loaded == converted -@pytest.mark.parametrize("allow_weak_types", [True, False]) -def test_lost_code_with_list(allow_weak_types) -> None: +@pytest.mark.parametrize("type_support", [s for s in TypeSupport]) +def test_lost_code_with_list(type_support) -> None: + set_type_support(type_support) # this example corresponds to a scenario where data was stored # using code that is no longer available @@ -132,15 +147,35 @@ class LostSampleClass: class RecoverySampleClass: field: object + installation = MockInstallation() + saved = LostSampleClass(field=["a", "b", "c"]) + installation.save(saved, filename="backups/SampleClass.json") + # problem: we don't know how SampleClass.json was stored + # so we're loading the data as weakly typed + loaded = installation.load(RecoverySampleClass, filename="backups/SampleClass.json") + assert loaded.field == saved.field + + +@pytest.mark.parametrize("type_support", [s for s in TypeSupport]) +def test_lost_code_with_any(type_support) -> None: + set_type_support(type_support) + # this example corresponds to a scenario where data was stored + # using code that is no longer available + @dataclass - class SampleClass: - field: list[str] + class LostSampleClass: + field: dict[str, str | int | None] + + # we don't know the type of 'field' + # so we'll use code to restore the data + @dataclass + class RecoverySampleClass: + field: Any - Installation.allow_weak_types = allow_weak_types installation = MockInstallation() - saved = LostSampleClass(field=["a", "b", "c"]) + saved = LostSampleClass(field={"a": "b", "b": 2, "c": None}) installation.save(saved, filename="backups/SampleClass.json") # problem: we don't know how SampleClass.json was stored # so we're loading the data as weakly typed loaded = installation.load(RecoverySampleClass, filename="backups/SampleClass.json") - assert SampleClass(field=loaded.field) == SampleClass(field=saved.field) + assert loaded.field == saved.field From 83872af753cbbcfe3fe89243f4665ea97b32015b Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 19 May 2025 17:40:53 +0200 Subject: [PATCH 17/27] fix marshalling of None --- src/databricks/labs/blueprint/installation.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/databricks/labs/blueprint/installation.py b/src/databricks/labs/blueprint/installation.py index f7afa57..18a6ac1 100644 --- a/src/databricks/labs/blueprint/installation.py +++ b/src/databricks/labs/blueprint/installation.py @@ -476,7 +476,9 @@ def _marshal(self, type_ref: type, path: list[str], inst: Any) -> tuple[Any, boo """The `_marshal` method is a private method that is used to serialize an object of type `type_ref` to a dictionary. This method is called by the `save` method.""" if inst is None: - return None, False + none_allowed = type_ref is types.NoneType or ( + isinstance(type_ref, types.UnionType) and types.NoneType in get_args(type_ref)) + return None, none_allowed if isinstance(inst, databricks.sdk.core.Config): return self._marshal_databricks_config(inst) if hasattr(inst, "as_dict"): From 80c9de93163ab15253c97acd60926b1d0a6015f5 Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 19 May 2025 17:41:19 +0200 Subject: [PATCH 18/27] fix marshalling of None --- src/databricks/labs/blueprint/installation.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/databricks/labs/blueprint/installation.py b/src/databricks/labs/blueprint/installation.py index 18a6ac1..0ac4bc8 100644 --- a/src/databricks/labs/blueprint/installation.py +++ b/src/databricks/labs/blueprint/installation.py @@ -476,8 +476,7 @@ def _marshal(self, type_ref: type, path: list[str], inst: Any) -> tuple[Any, boo """The `_marshal` method is a private method that is used to serialize an object of type `type_ref` to a dictionary. This method is called by the `save` method.""" if inst is None: - none_allowed = type_ref is types.NoneType or ( - isinstance(type_ref, types.UnionType) and types.NoneType in get_args(type_ref)) + none_allowed = type_ref is types.NoneType or (isinstance(type_ref, types.UnionType) and types.NoneType in get_args(type_ref)) return None, none_allowed if isinstance(inst, databricks.sdk.core.Config): return self._marshal_databricks_config(inst) From f2c92685f8522ec5737d3c87fd322ddd8bca1674 Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 19 May 2025 17:40:53 +0200 Subject: [PATCH 19/27] fix marshalling of None --- src/databricks/labs/blueprint/installation.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/databricks/labs/blueprint/installation.py b/src/databricks/labs/blueprint/installation.py index a858cd0..4a45bdd 100644 --- a/src/databricks/labs/blueprint/installation.py +++ b/src/databricks/labs/blueprint/installation.py @@ -474,7 +474,9 @@ def _marshal(self, type_ref: type, path: list[str], inst: Any) -> tuple[Any, boo """The `_marshal` method is a private method that is used to serialize an object of type `type_ref` to a dictionary. This method is called by the `save` method.""" if inst is None: - return None, False + none_allowed = type_ref is types.NoneType or ( + isinstance(type_ref, types.UnionType) and types.NoneType in get_args(type_ref)) + return None, none_allowed if isinstance(inst, databricks.sdk.core.Config): return self._marshal_databricks_config(inst) if hasattr(inst, "as_dict"): From d73173852f24745186c307524e12a15985ba0e7c Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 19 May 2025 17:43:34 +0200 Subject: [PATCH 20/27] fix marshalling of None --- src/databricks/labs/blueprint/installation.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/databricks/labs/blueprint/installation.py b/src/databricks/labs/blueprint/installation.py index 4a45bdd..77aead0 100644 --- a/src/databricks/labs/blueprint/installation.py +++ b/src/databricks/labs/blueprint/installation.py @@ -475,7 +475,8 @@ def _marshal(self, type_ref: type, path: list[str], inst: Any) -> tuple[Any, boo a dictionary. This method is called by the `save` method.""" if inst is None: none_allowed = type_ref is types.NoneType or ( - isinstance(type_ref, types.UnionType) and types.NoneType in get_args(type_ref)) + isinstance(type_ref, types.UnionType) and types.NoneType in get_args(type_ref) + ) return None, none_allowed if isinstance(inst, databricks.sdk.core.Config): return self._marshal_databricks_config(inst) @@ -736,7 +737,7 @@ def _unmarshal_union(cls, inst, path, type_ref): value = cls._unmarshal(inst, path, variant) if value is not None: return value - except: + except SerdeError: pass return None @@ -790,7 +791,7 @@ def _unmarshal_primitive(cls, inst, type_ref): of type `type_ref`. This method is called by the `load` method.""" if inst is None: return None - if type(inst)==type_ref: + if type(inst) == type_ref: return inst converted = inst # convert from str From f962a1f99926a600b57d309d25c779696c6c2449 Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 19 May 2025 17:45:35 +0200 Subject: [PATCH 21/27] rename --- tests/unit/{test_marshalling.py => test_marshalling_scenarios.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/unit/{test_marshalling.py => test_marshalling_scenarios.py} (100%) diff --git a/tests/unit/test_marshalling.py b/tests/unit/test_marshalling_scenarios.py similarity index 100% rename from tests/unit/test_marshalling.py rename to tests/unit/test_marshalling_scenarios.py From 8e9ed4c603e1719f58348fb105df1ed647a83afa Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 19 May 2025 17:56:37 +0200 Subject: [PATCH 22/27] handle invalid conversion --- src/databricks/labs/blueprint/installation.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/databricks/labs/blueprint/installation.py b/src/databricks/labs/blueprint/installation.py index 3263a67..eab61f2 100644 --- a/src/databricks/labs/blueprint/installation.py +++ b/src/databricks/labs/blueprint/installation.py @@ -802,7 +802,10 @@ def _unmarshal_primitive(cls, inst, type_ref): # convert from str if isinstance(inst, str): if type_ref in (int, float): - converted = type_ref(inst) # type: ignore[call-arg] + try: + converted = type_ref(inst) # type: ignore[call-arg] + except ValueError: + raise SerdeError(f"Not a number {inst}!") elif type_ref == bool: if inst.lower() == "true": converted = True From 3cf14ba0403e61b3f0bf560e0134178cc9a0cafe Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 19 May 2025 17:56:37 +0200 Subject: [PATCH 23/27] handle invalid conversion --- src/databricks/labs/blueprint/installation.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/databricks/labs/blueprint/installation.py b/src/databricks/labs/blueprint/installation.py index 77aead0..c0ae8d0 100644 --- a/src/databricks/labs/blueprint/installation.py +++ b/src/databricks/labs/blueprint/installation.py @@ -797,7 +797,10 @@ def _unmarshal_primitive(cls, inst, type_ref): # convert from str if isinstance(inst, str): if type_ref in (int, float): - converted = type_ref(inst) # type: ignore[call-arg] + try: + converted = type_ref(inst) # type: ignore[call-arg] + except ValueError: + raise SerdeError(f"Not a number {inst}!") elif type_ref == bool: if inst.lower() == "true": converted = True From 2a7645fb157b7d903eef6829864403eebc5e321f Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 19 May 2025 18:08:42 +0200 Subject: [PATCH 24/27] fix crasher with union type --- src/databricks/labs/blueprint/installation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/databricks/labs/blueprint/installation.py b/src/databricks/labs/blueprint/installation.py index c0ae8d0..5e1b04b 100644 --- a/src/databricks/labs/blueprint/installation.py +++ b/src/databricks/labs/blueprint/installation.py @@ -814,7 +814,8 @@ def _explain_why(type_ref: type, path: list[str], raw: Any) -> str: type. This method is called by the `_unmarshal` and `_marshal` methods.""" if raw is None: raw = "value is missing" - return f'{".".join(path)}: not a {type_ref.__name__}: {raw}' + type_name = getattr(type_ref, "__name__", str(type_ref)) + return f'{".".join(path)}: not a {type_name}: {raw}' @staticmethod def _dump_json(as_dict: Json, _: type) -> bytes: From 7fbd299374df67186e5acc7eaba2346faf21d7f9 Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 19 May 2025 18:17:58 +0200 Subject: [PATCH 25/27] fix incorrect test --- tests/unit/test_installer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_installer.py b/tests/unit/test_installer.py index 0532174..e4c959e 100644 --- a/tests/unit/test_installer.py +++ b/tests/unit/test_installer.py @@ -31,7 +31,7 @@ def test_jobs_state(): state = InstallState(ws, "blueprint") - assert {"foo": "123"} == state.jobs + assert {"foo": 123} == state.jobs assert {} == state.dashboards ws.workspace.download.assert_called_with("/Users/foo/.blueprint/state.json") From bc84426e16f98004927e4daf0ddb9e6f98661eed Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 19 May 2025 18:21:25 +0200 Subject: [PATCH 26/27] formatting --- src/databricks/labs/blueprint/installation.py | 6 +++--- tests/unit/test_installation.py | 8 +++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/databricks/labs/blueprint/installation.py b/src/databricks/labs/blueprint/installation.py index 5e1b04b..8eeea11 100644 --- a/src/databricks/labs/blueprint/installation.py +++ b/src/databricks/labs/blueprint/installation.py @@ -791,7 +791,7 @@ def _unmarshal_primitive(cls, inst, type_ref): of type `type_ref`. This method is called by the `load` method.""" if inst is None: return None - if type(inst) == type_ref: + if isinstance(inst, type_ref): return inst converted = inst # convert from str @@ -799,8 +799,8 @@ def _unmarshal_primitive(cls, inst, type_ref): if type_ref in (int, float): try: converted = type_ref(inst) # type: ignore[call-arg] - except ValueError: - raise SerdeError(f"Not a number {inst}!") + except ValueError as exc: + raise SerdeError(f"Not a number {inst}!") from exc elif type_ref == bool: if inst.lower() == "true": converted = True diff --git a/tests/unit/test_installation.py b/tests/unit/test_installation.py index b40ac5c..aebe6f2 100644 --- a/tests/unit/test_installation.py +++ b/tests/unit/test_installation.py @@ -597,6 +597,7 @@ class SampleClass: loaded = installation.load(SampleClass, filename="backups/SampleClass.json") assert loaded == saved + def test_bool_in_union(): @dataclass class SampleClass: @@ -611,8 +612,8 @@ class SampleClass: JsonType: typing.TypeAlias = None | bool | int | float | str | list["JsonType"] | dict[str, "JsonType"] -def test_complex_union(): +def test_complex_union(): @dataclass class SampleClass: field: dict[str, JsonType] @@ -623,10 +624,11 @@ class SampleClass: loaded = installation.load(SampleClass, filename="backups/SampleClass.json") assert loaded == saved + JsonType2: typing.TypeAlias = dict[str, "JsonType2"] | list["JsonType2"] | str | float | int | bool | None -def test_complex_union2(): +def test_complex_union2(): @dataclass class SampleClass: field: dict[str, JsonType2] @@ -635,4 +637,4 @@ class SampleClass: saved = SampleClass(field={"a": "b"}) installation.save(saved, filename="backups/SampleClass.json") loaded = installation.load(SampleClass, filename="backups/SampleClass.json") - assert loaded == saved \ No newline at end of file + assert loaded == saved From f26c03113d9eef135d6fad688bcb92cf22637441 Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Mon, 19 May 2025 18:46:59 +0200 Subject: [PATCH 27/27] more scenarios --- tests/unit/test_marshalling_scenarios.py | 43 ++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/tests/unit/test_marshalling_scenarios.py b/tests/unit/test_marshalling_scenarios.py index 9832b59..6670145 100644 --- a/tests/unit/test_marshalling_scenarios.py +++ b/tests/unit/test_marshalling_scenarios.py @@ -1,8 +1,10 @@ +from abc import ABC from dataclasses import dataclass from enum import Enum from typing import Any import pytest +from mypy.metastore import abstractmethod from databricks.labs.blueprint.installation import Installation, MockInstallation @@ -156,6 +158,47 @@ class RecoverySampleClass: assert loaded.field == saved.field +@pytest.mark.parametrize("type_support", [s for s in TypeSupport]) +def test_dynamic_config_data(type_support) -> None: + set_type_support(type_support) + # this example corresponds to a scenario where we store data provided + # by some object, without a schema for it + + class AbstractDriver(ABC): + + @abstractmethod + def get_config_data(self) -> object: ... + + class XDriver(AbstractDriver): + def get_config_data(self) -> object: + return "oracle:jdbc:thin://my_login:my_password@myserver:2312" + + class YDriver(AbstractDriver): + def get_config_data(self) -> object: + return { + "login": "my_login", + "password": "my_password", + "host": "myserver", + "port": 2312 + } + + @dataclass + class SampleClass: + driver_class: str + driver_config: object + + installation = MockInstallation() + saved_x = SampleClass(driver_class=type(XDriver).__name__, driver_config=XDriver().get_config_data()) + installation.save(saved_x, filename="backups/SampleDriverX.json") + saved_y = SampleClass(driver_class=type(YDriver).__name__, driver_config=YDriver().get_config_data()) + installation.save(saved_y, filename="backups/SampleDriverY.json") + loaded_x = installation.load(SampleClass, filename="backups/SampleDriverX.json") + assert loaded_x == saved_x + loaded_y = installation.load(SampleClass, filename="backups/SampleDriverY.json") + assert loaded_y == saved_y + + + @pytest.mark.parametrize("type_support", [s for s in TypeSupport]) def test_lost_code_with_any(type_support) -> None: set_type_support(type_support)