From 8a1578be2ab79200563e589e5ac643be82498463 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Tue, 15 Jun 2021 18:35:59 +0100 Subject: [PATCH 01/70] TST trigger the bug of #425 in a test --- tests/cloudpickle_test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 845f27962..2be6c7009 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -604,6 +604,9 @@ def __reduce__(self): assert hasattr(depickled_mod.__builtins__, "abs") assert depickled_mod.f(-1) == 1 + # Additional check testing that the issue #425 is fixed + assert mod.f(-1) == 1 + def test_load_dynamic_module_in_grandchild_process(self): # Make sure that when loaded, a dynamic module preserves its dynamic # property. Otherwise, this will lead to an ImportError if pickled in From 1de1c4fb059de4d2f9bb85f49e6fa69d9ab8f020 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Tue, 15 Jun 2021 18:41:23 +0100 Subject: [PATCH 02/70] FIX remove the __builtins__ from a copy of the module --- cloudpickle/cloudpickle_fast.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cloudpickle/cloudpickle_fast.py b/cloudpickle/cloudpickle_fast.py index 46a9540ec..fb9d28ac3 100644 --- a/cloudpickle/cloudpickle_fast.py +++ b/cloudpickle/cloudpickle_fast.py @@ -342,8 +342,9 @@ def _module_reduce(obj): if _is_importable(obj): return subimport, (obj.__name__,) else: - obj.__dict__.pop('__builtins__', None) - return dynamic_subimport, (obj.__name__, vars(obj)) + state = obj.__dict__.copy() + state.pop('__builtins__', None) + return dynamic_subimport, (obj.__name__, state) def _method_reduce(obj): From 05ebfd3396df029c5eaa3374bcfd47c42618aa03 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Tue, 15 Jun 2021 18:48:00 +0100 Subject: [PATCH 03/70] MNT document change inside CHANGES.md --- CHANGES.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index c9b3d9384..91f57174c 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,6 +4,9 @@ dev === +- Fix a side effect altering dynamic modules at pickling time. + ([PR #426](https://github.com/cloudpipe/cloudpickle/pull/426)) + - Support for pickling type annotations on Python 3.10 as per [PEP 563]( https://www.python.org/dev/peps/pep-0563/) ([PR #400](https://github.com/cloudpipe/cloudpickle/pull/400)) From 38364d67e9486545a90a5e75b33af29dbbc32979 Mon Sep 17 00:00:00 2001 From: Samuel Hinton Date: Tue, 20 Apr 2021 09:10:13 +0100 Subject: [PATCH 04/70] Fixes #206. Adding tested deep serialisation option. Based on PR391 by kinghuang, but taking on feedback from maintainers and adding tests. --- cloudpickle/cloudpickle.py | 30 ++++++++++++++ tests/cloudpickle_test.py | 80 ++++++++++++++++++++++++++++++++++++++ tests/external.py | 5 +++ 3 files changed, 115 insertions(+) create mode 100644 tests/external.py diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 20e9a9550..3953b882f 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -45,6 +45,7 @@ import builtins import dis import opcode +import inspect import platform import sys import types @@ -87,6 +88,9 @@ def g(): # communication speed over compatibility: DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL +# Names of modules whose resources should be treated as dynamic. +_DEEP_SERIALIZATION_MODULES = set() + # Track the provenance of reconstructed dynamic classes to make it possible to # reconstruct instances from the matching singleton class definition when # appropriate and preserve the usual "isinstance" semantics of Python objects. @@ -122,6 +126,29 @@ def _lookup_class_or_track(class_tracker_id, class_def): _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id return class_def +def register_deep_serialization(module): + module_name = module.__name__ if inspect.ismodule(module) else module + _DEEP_SERIALIZATION_MODULES.add(module_name) + + +def unregister_deep_serialization(module): + module_name = module.__name__ if inspect.ismodule(module) else module + _DEEP_SERIALIZATION_MODULES.remove(module_name) + + +def _is_explicitly_serialized_module(module, submodules=True): + module_name = module.__name__ if inspect.ismodule(module) else module + if module_name in _DEEP_SERIALIZATION_MODULES: + return True + if submodules: + while True: + parent_name = module_name.rsplit(".", 1)[0] + if parent_name == module_name: + break + if parent_name in _DEEP_SERIALIZATION_MODULES: + return True + module_name = parent_name + return False def _whichmodule(obj, name): """Find the module an object belongs to. @@ -208,6 +235,9 @@ def _lookup_module_and_qualname(obj, name=None): if module_name == "__main__": return None + if _is_explicitly_serialized_module(module_name): + return None + # Note: if module_name is in sys.modules, the corresponding module is # assumed importable at unpickling time. See #357 module = sys.modules.get(module_name, None) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 2be6c7009..f5b442490 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -48,7 +48,11 @@ from cloudpickle.cloudpickle import _make_empty_cell, cell_set from cloudpickle.cloudpickle import _extract_class_dict, _whichmodule from cloudpickle.cloudpickle import _lookup_module_and_qualname +from cloudpickle.cloudpickle import register_deep_serialization +from cloudpickle.cloudpickle import unregister_deep_serialization +from cloudpickle.cloudpickle import _is_explicitly_serialized_module +from .external import an_external_function from .testutils import subprocess_pickle_echo from .testutils import assert_run_python_script from .testutils import subprocess_worker @@ -2324,6 +2328,21 @@ def __type__(self): o = MyClass() pickle_depickle(o, protocol=self.protocol) + def test_deep_serialization_external_func(self): + reference = cloudpickle.dumps(an_external_function, protocol=cloudpickle.DEFAULT_PROTOCOL) + f1 = cloudpickle.loads(reference) + + register_deep_serialization("tests.external") + deep = cloudpickle.dumps(an_external_function, protocol=cloudpickle.DEFAULT_PROTOCOL) + f2 = cloudpickle.loads(deep) + unregister_deep_serialization("tests.external") + + # Ensure the serialisation is not the same + assert reference != deep + assert len(deep) > len(reference) + assert f1() == f2() + + class Protocol2CloudPickleTest(CloudPickleTest): @@ -2354,6 +2373,67 @@ def test_lookup_module_and_qualname_stdlib_typevar(): assert module is typing assert name == 'AnyStr' +def test_lookup_module_and_qualname_external_module_remove(): + import _cloudpickle_testpkg + T = _cloudpickle_testpkg.T + + register_deep_serialization(_cloudpickle_testpkg) + unregister_deep_serialization(_cloudpickle_testpkg) + module_and_name = _lookup_module_and_qualname(T, name=T.__name__) + + assert module_and_name is not None + + +def test_lookup_module_and_qualname_external_module_remove_name(): + import _cloudpickle_testpkg + T = _cloudpickle_testpkg.T + + register_deep_serialization("_cloudpickle_testpkg") + unregister_deep_serialization("_cloudpickle_testpkg") + module_and_name = _lookup_module_and_qualname(T, name=T.__name__) + assert module_and_name is not None + + + +def test_lookup_module_and_qualname_external_module(): + import _cloudpickle_testpkg + T = _cloudpickle_testpkg.T + + register_deep_serialization(_cloudpickle_testpkg) + module_and_name = _lookup_module_and_qualname(T, name=T.__name__) + unregister_deep_serialization(_cloudpickle_testpkg) + assert module_and_name is None + +def test_lookup_module_and_qualname_external_module_name(): + import _cloudpickle_testpkg + T = _cloudpickle_testpkg.T + + register_deep_serialization("_cloudpickle_testpkg") + module_and_name = _lookup_module_and_qualname(T, name=T.__name__) + unregister_deep_serialization("_cloudpickle_testpkg") + assert module_and_name is None + +def test_deep_serialization_package_parsing_parents(): + # We should deep copy children of explicit modules, not parents + package = "foo.bar.baz" + register_deep_serialization(package) + result = _is_explicitly_serialized_module("foo.bar") + unregister_deep_serialization(package) + assert not result + +def test_deep_serialization_package_parsing_children(): + package = "foo.bar" + register_deep_serialization(package) + result = _is_explicitly_serialized_module("foo.bar.baz") + unregister_deep_serialization(package) + assert result + +def test_deep_serialization_package_parsing_children_no_submodule(): + package = "foo.bar" + register_deep_serialization(package) + result = _is_explicitly_serialized_module("foo.bar.baz", submodules=False) + unregister_deep_serialization(package) + assert not result def _all_types_to_test(): T = typing.TypeVar('T') diff --git a/tests/external.py b/tests/external.py new file mode 100644 index 000000000..af15d1565 --- /dev/null +++ b/tests/external.py @@ -0,0 +1,5 @@ +# simulate an external function which cloudpickle would normally +# only save a reference to + +def an_external_function(): + return "this is something" \ No newline at end of file From 30461116c529be7279d221ab5cc67c21646f36d6 Mon Sep 17 00:00:00 2001 From: Samuel Hinton Date: Tue, 20 Apr 2021 09:29:22 +0100 Subject: [PATCH 05/70] Adding newline to end of file --- tests/external.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/external.py b/tests/external.py index af15d1565..a4aba792d 100644 --- a/tests/external.py +++ b/tests/external.py @@ -2,4 +2,4 @@ # only save a reference to def an_external_function(): - return "this is something" \ No newline at end of file + return "this is something" From 05b0f83dbb186c220ae1a0d7720002736d309aa5 Mon Sep 17 00:00:00 2001 From: Samuel Hinton Date: Tue, 20 Apr 2021 09:34:06 +0100 Subject: [PATCH 06/70] Further flake8 changes --- cloudpickle/cloudpickle.py | 1 + tests/cloudpickle_test.py | 5 +++++ tests/external.py | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 3953b882f..72e745808 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -126,6 +126,7 @@ def _lookup_class_or_track(class_tracker_id, class_def): _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id return class_def + def register_deep_serialization(module): module_name = module.__name__ if inspect.ismodule(module) else module _DEEP_SERIALIZATION_MODULES.add(module_name) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index f5b442490..5fec28d56 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2404,6 +2404,7 @@ def test_lookup_module_and_qualname_external_module(): unregister_deep_serialization(_cloudpickle_testpkg) assert module_and_name is None + def test_lookup_module_and_qualname_external_module_name(): import _cloudpickle_testpkg T = _cloudpickle_testpkg.T @@ -2413,6 +2414,7 @@ def test_lookup_module_and_qualname_external_module_name(): unregister_deep_serialization("_cloudpickle_testpkg") assert module_and_name is None + def test_deep_serialization_package_parsing_parents(): # We should deep copy children of explicit modules, not parents package = "foo.bar.baz" @@ -2421,6 +2423,7 @@ def test_deep_serialization_package_parsing_parents(): unregister_deep_serialization(package) assert not result + def test_deep_serialization_package_parsing_children(): package = "foo.bar" register_deep_serialization(package) @@ -2428,6 +2431,7 @@ def test_deep_serialization_package_parsing_children(): unregister_deep_serialization(package) assert result + def test_deep_serialization_package_parsing_children_no_submodule(): package = "foo.bar" register_deep_serialization(package) @@ -2435,6 +2439,7 @@ def test_deep_serialization_package_parsing_children_no_submodule(): unregister_deep_serialization(package) assert not result + def _all_types_to_test(): T = typing.TypeVar('T') diff --git a/tests/external.py b/tests/external.py index a4aba792d..df8f1c64d 100644 --- a/tests/external.py +++ b/tests/external.py @@ -1,4 +1,4 @@ -# simulate an external function which cloudpickle would normally +# simulate an external function which cloudpickle would normally # only save a reference to def an_external_function(): From de1b3756259067465d25d86c8fde0da489c5f31d Mon Sep 17 00:00:00 2001 From: Samuel Hinton Date: Tue, 20 Apr 2021 09:36:30 +0100 Subject: [PATCH 07/70] More formatting fixes --- tests/cloudpickle_test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 5fec28d56..4036121c9 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2373,6 +2373,7 @@ def test_lookup_module_and_qualname_stdlib_typevar(): assert module is typing assert name == 'AnyStr' + def test_lookup_module_and_qualname_external_module_remove(): import _cloudpickle_testpkg T = _cloudpickle_testpkg.T @@ -2380,7 +2381,6 @@ def test_lookup_module_and_qualname_external_module_remove(): register_deep_serialization(_cloudpickle_testpkg) unregister_deep_serialization(_cloudpickle_testpkg) module_and_name = _lookup_module_and_qualname(T, name=T.__name__) - assert module_and_name is not None @@ -2394,7 +2394,6 @@ def test_lookup_module_and_qualname_external_module_remove_name(): assert module_and_name is not None - def test_lookup_module_and_qualname_external_module(): import _cloudpickle_testpkg T = _cloudpickle_testpkg.T From 261f127a9449febd8af14f84a1fd46101a15a503 Mon Sep 17 00:00:00 2001 From: Samuel Hinton Date: Mon, 14 Jun 2021 14:20:52 +0100 Subject: [PATCH 08/70] Renaming function --- cloudpickle/cloudpickle.py | 16 +++++++++------- tests/cloudpickle_test.py | 36 ++++++++++++++++++------------------ 2 files changed, 27 insertions(+), 25 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 72e745808..d27b68674 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -89,7 +89,7 @@ def g(): DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL # Names of modules whose resources should be treated as dynamic. -_DEEP_SERIALIZATION_MODULES = set() +_PICKLE_BY_VALUE_MODULES = set() # Track the provenance of reconstructed dynamic classes to make it possible to # reconstruct instances from the matching singleton class definition when @@ -127,26 +127,28 @@ def _lookup_class_or_track(class_tracker_id, class_def): return class_def -def register_deep_serialization(module): +def register_pickle_by_value(module): + """Register that the input module should be pickled by value.""" module_name = module.__name__ if inspect.ismodule(module) else module - _DEEP_SERIALIZATION_MODULES.add(module_name) + _PICKLE_BY_VALUE_MODULES.add(module_name) -def unregister_deep_serialization(module): +def unregister_pickle_by_value(module): + """ nregister that the input module should be pickled by value.""" module_name = module.__name__ if inspect.ismodule(module) else module - _DEEP_SERIALIZATION_MODULES.remove(module_name) + _PICKLE_BY_VALUE_MODULES.remove(module_name) def _is_explicitly_serialized_module(module, submodules=True): module_name = module.__name__ if inspect.ismodule(module) else module - if module_name in _DEEP_SERIALIZATION_MODULES: + if module_name in _PICKLE_BY_VALUE_MODULES: return True if submodules: while True: parent_name = module_name.rsplit(".", 1)[0] if parent_name == module_name: break - if parent_name in _DEEP_SERIALIZATION_MODULES: + if parent_name in _PICKLE_BY_VALUE_MODULES: return True module_name = parent_name return False diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 4036121c9..626d46f1f 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -48,8 +48,8 @@ from cloudpickle.cloudpickle import _make_empty_cell, cell_set from cloudpickle.cloudpickle import _extract_class_dict, _whichmodule from cloudpickle.cloudpickle import _lookup_module_and_qualname -from cloudpickle.cloudpickle import register_deep_serialization -from cloudpickle.cloudpickle import unregister_deep_serialization +from cloudpickle.cloudpickle import register_pickle_by_value +from cloudpickle.cloudpickle import unregister_pickle_by_value from cloudpickle.cloudpickle import _is_explicitly_serialized_module from .external import an_external_function @@ -2332,10 +2332,10 @@ def test_deep_serialization_external_func(self): reference = cloudpickle.dumps(an_external_function, protocol=cloudpickle.DEFAULT_PROTOCOL) f1 = cloudpickle.loads(reference) - register_deep_serialization("tests.external") + register_pickle_by_value("tests.external") deep = cloudpickle.dumps(an_external_function, protocol=cloudpickle.DEFAULT_PROTOCOL) f2 = cloudpickle.loads(deep) - unregister_deep_serialization("tests.external") + unregister_pickle_by_value("tests.external") # Ensure the serialisation is not the same assert reference != deep @@ -2378,8 +2378,8 @@ def test_lookup_module_and_qualname_external_module_remove(): import _cloudpickle_testpkg T = _cloudpickle_testpkg.T - register_deep_serialization(_cloudpickle_testpkg) - unregister_deep_serialization(_cloudpickle_testpkg) + register_pickle_by_value(_cloudpickle_testpkg) + unregister_pickle_by_value(_cloudpickle_testpkg) module_and_name = _lookup_module_and_qualname(T, name=T.__name__) assert module_and_name is not None @@ -2388,8 +2388,8 @@ def test_lookup_module_and_qualname_external_module_remove_name(): import _cloudpickle_testpkg T = _cloudpickle_testpkg.T - register_deep_serialization("_cloudpickle_testpkg") - unregister_deep_serialization("_cloudpickle_testpkg") + register_pickle_by_value("_cloudpickle_testpkg") + unregister_pickle_by_value("_cloudpickle_testpkg") module_and_name = _lookup_module_and_qualname(T, name=T.__name__) assert module_and_name is not None @@ -2398,9 +2398,9 @@ def test_lookup_module_and_qualname_external_module(): import _cloudpickle_testpkg T = _cloudpickle_testpkg.T - register_deep_serialization(_cloudpickle_testpkg) + register_pickle_by_value(_cloudpickle_testpkg) module_and_name = _lookup_module_and_qualname(T, name=T.__name__) - unregister_deep_serialization(_cloudpickle_testpkg) + unregister_pickle_by_value(_cloudpickle_testpkg) assert module_and_name is None @@ -2408,34 +2408,34 @@ def test_lookup_module_and_qualname_external_module_name(): import _cloudpickle_testpkg T = _cloudpickle_testpkg.T - register_deep_serialization("_cloudpickle_testpkg") + register_pickle_by_value("_cloudpickle_testpkg") module_and_name = _lookup_module_and_qualname(T, name=T.__name__) - unregister_deep_serialization("_cloudpickle_testpkg") + unregister_pickle_by_value("_cloudpickle_testpkg") assert module_and_name is None def test_deep_serialization_package_parsing_parents(): # We should deep copy children of explicit modules, not parents package = "foo.bar.baz" - register_deep_serialization(package) + register_pickle_by_value(package) result = _is_explicitly_serialized_module("foo.bar") - unregister_deep_serialization(package) + unregister_pickle_by_value(package) assert not result def test_deep_serialization_package_parsing_children(): package = "foo.bar" - register_deep_serialization(package) + register_pickle_by_value(package) result = _is_explicitly_serialized_module("foo.bar.baz") - unregister_deep_serialization(package) + unregister_pickle_by_value(package) assert result def test_deep_serialization_package_parsing_children_no_submodule(): package = "foo.bar" - register_deep_serialization(package) + register_pickle_by_value(package) result = _is_explicitly_serialized_module("foo.bar.baz", submodules=False) - unregister_deep_serialization(package) + unregister_pickle_by_value(package) assert not result From 9dcb540d3544393b88b4579f55486ce697387422 Mon Sep 17 00:00:00 2001 From: Samuel Hinton Date: Tue, 15 Jun 2021 12:38:38 +0100 Subject: [PATCH 09/70] Document changes in changelog --- CHANGES.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 91f57174c..a2fd31963 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -6,6 +6,11 @@ dev - Fix a side effect altering dynamic modules at pickling time. ([PR #426](https://github.com/cloudpipe/cloudpickle/pull/426)) + +- Support for registering modules to be serialised by value. This will + allow for code defined in local modules to be serialised and executed + remotely without those local modules installed on the remote machine. + ([PR #417](https://github.com/cloudpipe/cloudpickle/pull/417)) - Support for pickling type annotations on Python 3.10 as per [PEP 563]( https://www.python.org/dev/peps/pep-0563/) From a2ec3a256c4eb712a078801fc3c055a8ef9f958d Mon Sep 17 00:00:00 2001 From: Samuel Hinton Date: Tue, 15 Jun 2021 14:19:32 +0100 Subject: [PATCH 10/70] Unifying the naming convention --- tests/cloudpickle_test.py | 50 +++++++++++++++++++++------------------ tests/external.py | 2 +- 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 626d46f1f..02faea572 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -44,7 +44,7 @@ import cloudpickle from cloudpickle.compat import pickle -from cloudpickle.cloudpickle import _is_importable +from cloudpickle.cloudpickle import _is_importable, _PICKLE_BY_VALUE_MODULES from cloudpickle.cloudpickle import _make_empty_cell, cell_set from cloudpickle.cloudpickle import _extract_class_dict, _whichmodule from cloudpickle.cloudpickle import _lookup_module_and_qualname @@ -2328,7 +2328,7 @@ def __type__(self): o = MyClass() pickle_depickle(o, protocol=self.protocol) - def test_deep_serialization_external_func(self): + def test_pickle_module_registered_for_pickling_by_value(self): reference = cloudpickle.dumps(an_external_function, protocol=cloudpickle.DEFAULT_PROTOCOL) f1 = cloudpickle.loads(reference) @@ -2340,6 +2340,8 @@ def test_deep_serialization_external_func(self): # Ensure the serialisation is not the same assert reference != deep assert len(deep) > len(reference) + assert b"return_a_string" in deep + assert b"return_a_string" not in reference assert f1() == f2() @@ -2374,69 +2376,71 @@ def test_lookup_module_and_qualname_stdlib_typevar(): assert name == 'AnyStr' -def test_lookup_module_and_qualname_external_module_remove(): +def test_register_pickle_by_value(): import _cloudpickle_testpkg T = _cloudpickle_testpkg.T + # Test that adding and removing a module to pickle by value returns + # us to the original pickle by reference + assert len(_PICKLE_BY_VALUE_MODULES) == 0 register_pickle_by_value(_cloudpickle_testpkg) unregister_pickle_by_value(_cloudpickle_testpkg) module_and_name = _lookup_module_and_qualname(T, name=T.__name__) assert module_and_name is not None + assert len(_PICKLE_BY_VALUE_MODULES) == 0 - -def test_lookup_module_and_qualname_external_module_remove_name(): - import _cloudpickle_testpkg - T = _cloudpickle_testpkg.T - + # Test that doing the same with the string module name + # functions identically register_pickle_by_value("_cloudpickle_testpkg") unregister_pickle_by_value("_cloudpickle_testpkg") module_and_name = _lookup_module_and_qualname(T, name=T.__name__) assert module_and_name is not None + assert len(_PICKLE_BY_VALUE_MODULES) == 0 - -def test_lookup_module_and_qualname_external_module(): - import _cloudpickle_testpkg - T = _cloudpickle_testpkg.T - + # Test now that if we look up the module before removing + # we correctly get a None result, indicating to pickle + # by value register_pickle_by_value(_cloudpickle_testpkg) module_and_name = _lookup_module_and_qualname(T, name=T.__name__) unregister_pickle_by_value(_cloudpickle_testpkg) assert module_and_name is None + assert len(_PICKLE_BY_VALUE_MODULES) == 0 - -def test_lookup_module_and_qualname_external_module_name(): - import _cloudpickle_testpkg - T = _cloudpickle_testpkg.T - + # Test pickle by value behaviour using string name register_pickle_by_value("_cloudpickle_testpkg") module_and_name = _lookup_module_and_qualname(T, name=T.__name__) unregister_pickle_by_value("_cloudpickle_testpkg") assert module_and_name is None + assert len(_PICKLE_BY_VALUE_MODULES) == 0 -def test_deep_serialization_package_parsing_parents(): +def test_register_pickle_by_value_parents_and_children(): # We should deep copy children of explicit modules, not parents package = "foo.bar.baz" + assert len(_PICKLE_BY_VALUE_MODULES) == 0 register_pickle_by_value(package) result = _is_explicitly_serialized_module("foo.bar") unregister_pickle_by_value(package) assert not result + assert len(_PICKLE_BY_VALUE_MODULES) == 0 - -def test_deep_serialization_package_parsing_children(): + # Given a child of a pickle by value module, we should + # pickle it by value package = "foo.bar" register_pickle_by_value(package) result = _is_explicitly_serialized_module("foo.bar.baz") unregister_pickle_by_value(package) assert result + assert len(_PICKLE_BY_VALUE_MODULES) == 0 - -def test_deep_serialization_package_parsing_children_no_submodule(): + # Given a child of a pickle by value method with submodules set + # to false, we should no longer pickle it by value package = "foo.bar" register_pickle_by_value(package) result = _is_explicitly_serialized_module("foo.bar.baz", submodules=False) unregister_pickle_by_value(package) assert not result + assert len(_PICKLE_BY_VALUE_MODULES) == 0 def _all_types_to_test(): diff --git a/tests/external.py b/tests/external.py index df8f1c64d..3bf135150 100644 --- a/tests/external.py +++ b/tests/external.py @@ -2,4 +2,4 @@ # only save a reference to def an_external_function(): - return "this is something" + return "return_a_string" From 6e882e86514bef10d356a9e52687dd843301e707 Mon Sep 17 00:00:00 2001 From: Samuel Hinton Date: Tue, 15 Jun 2021 14:21:51 +0100 Subject: [PATCH 11/70] Updating naming conventions --- cloudpickle/cloudpickle.py | 4 ++-- tests/cloudpickle_test.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index d27b68674..a5acd009b 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -139,7 +139,7 @@ def unregister_pickle_by_value(module): _PICKLE_BY_VALUE_MODULES.remove(module_name) -def _is_explicitly_serialized_module(module, submodules=True): +def is_registered_pickle_by_value(module, submodules=True): module_name = module.__name__ if inspect.ismodule(module) else module if module_name in _PICKLE_BY_VALUE_MODULES: return True @@ -238,7 +238,7 @@ def _lookup_module_and_qualname(obj, name=None): if module_name == "__main__": return None - if _is_explicitly_serialized_module(module_name): + if is_registered_pickle_by_value(module_name): return None # Note: if module_name is in sys.modules, the corresponding module is diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 02faea572..54b0749e2 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -50,7 +50,7 @@ from cloudpickle.cloudpickle import _lookup_module_and_qualname from cloudpickle.cloudpickle import register_pickle_by_value from cloudpickle.cloudpickle import unregister_pickle_by_value -from cloudpickle.cloudpickle import _is_explicitly_serialized_module +from cloudpickle.cloudpickle import is_registered_pickle_by_value from .external import an_external_function from .testutils import subprocess_pickle_echo @@ -2419,7 +2419,7 @@ def test_register_pickle_by_value_parents_and_children(): package = "foo.bar.baz" assert len(_PICKLE_BY_VALUE_MODULES) == 0 register_pickle_by_value(package) - result = _is_explicitly_serialized_module("foo.bar") + result = is_registered_pickle_by_value("foo.bar") unregister_pickle_by_value(package) assert not result assert len(_PICKLE_BY_VALUE_MODULES) == 0 @@ -2428,7 +2428,7 @@ def test_register_pickle_by_value_parents_and_children(): # pickle it by value package = "foo.bar" register_pickle_by_value(package) - result = _is_explicitly_serialized_module("foo.bar.baz") + result = is_registered_pickle_by_value("foo.bar.baz") unregister_pickle_by_value(package) assert result assert len(_PICKLE_BY_VALUE_MODULES) == 0 @@ -2437,7 +2437,7 @@ def test_register_pickle_by_value_parents_and_children(): # to false, we should no longer pickle it by value package = "foo.bar" register_pickle_by_value(package) - result = _is_explicitly_serialized_module("foo.bar.baz", submodules=False) + result = is_registered_pickle_by_value("foo.bar.baz", submodules=False) unregister_pickle_by_value(package) assert not result assert len(_PICKLE_BY_VALUE_MODULES) == 0 From cd1fde3d8c5d662fa2ff7561803243219d1fc740 Mon Sep 17 00:00:00 2001 From: Samuel Hinton Date: Tue, 15 Jun 2021 17:23:01 +0100 Subject: [PATCH 12/70] Adding broken module test --- cloudpickle/cloudpickle.py | 29 +++-- cloudpickle/cloudpickle_fast.py | 12 +- tests/cloudpickle_test.py | 202 ++++++++++++++++++-------------- 3 files changed, 140 insertions(+), 103 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index a5acd009b..9bd2b40eb 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -199,18 +199,25 @@ def _whichmodule(obj, name): return None -def _is_importable(obj, name=None): - """Dispatcher utility to test the importability of various constructs.""" - if isinstance(obj, types.FunctionType): - return _lookup_module_and_qualname(obj, name=name) is not None - elif issubclass(type(obj), type): - return _lookup_module_and_qualname(obj, name=name) is not None +def _should_pickle_by_reference(obj, name=None): + """Dispatcher utility to test whether an object should be serialised by + value or reference by using importability as a proxy.""" + if isinstance(obj, types.FunctionType) or issubclass(type(obj), + type): + module_name = _lookup_module_and_qualname(obj, name=name) + if module_name is None: + return False + module, name = module_name + return not is_registered_pickle_by_value(module.__name__) + elif isinstance(obj, types.ModuleType): # We assume that sys.modules is primarily used as a cache mechanism for # the Python import machinery. Checking if a module has been added in # is sys.modules therefore a cheap and simple heuristic to tell us whether # we can assume that a given module could be imported by name in # another Python process. + if is_registered_pickle_by_value(obj.__name__): + return False return obj.__name__ in sys.modules else: raise TypeError( @@ -238,9 +245,6 @@ def _lookup_module_and_qualname(obj, name=None): if module_name == "__main__": return None - if is_registered_pickle_by_value(module_name): - return None - # Note: if module_name is in sys.modules, the corresponding module is # assumed importable at unpickling time. See #357 module = sys.modules.get(module_name, None) @@ -868,10 +872,15 @@ def _decompose_typevar(obj): def _typevar_reduce(obj): - # TypeVar instances have no __qualname__ hence we pass the name explicitly. + # TypeVar instances require the module information hence why we + # are not using the _should_pickle_by_reference directly module_and_name = _lookup_module_and_qualname(obj, name=obj.__name__) + if module_and_name is None: return (_make_typevar, _decompose_typevar(obj)) + elif is_registered_pickle_by_value(module_and_name[0].__name__): + return (_make_typevar, _decompose_typevar(obj)) + return (getattr, module_and_name) diff --git a/cloudpickle/cloudpickle_fast.py b/cloudpickle/cloudpickle_fast.py index fb9d28ac3..7cdc35d35 100644 --- a/cloudpickle/cloudpickle_fast.py +++ b/cloudpickle/cloudpickle_fast.py @@ -28,7 +28,7 @@ from .compat import pickle, Pickler from .cloudpickle import ( _extract_code_globals, _BUILTIN_TYPE_NAMES, DEFAULT_PROTOCOL, - _find_imported_submodules, _get_cell_contents, _is_importable, + _find_imported_submodules, _get_cell_contents, _should_pickle_by_reference, _builtin_type, _get_or_create_tracker_id, _make_skeleton_class, _make_skeleton_enum, _extract_class_dict, dynamic_subimport, subimport, _typevar_reduce, _get_bases, _make_cell, _make_empty_cell, CellType, @@ -339,7 +339,7 @@ def _memoryview_reduce(obj): def _module_reduce(obj): - if _is_importable(obj): + if _should_pickle_by_reference(obj): return subimport, (obj.__name__,) else: state = obj.__dict__.copy() @@ -397,7 +397,7 @@ def _class_reduce(obj): return type, (NotImplemented,) elif obj in _BUILTIN_TYPE_NAMES: return _builtin_type, (_BUILTIN_TYPE_NAMES[obj],) - elif not _is_importable(obj): + elif not _should_pickle_by_reference(obj): return _dynamic_class_reduce(obj) return NotImplemented @@ -521,7 +521,7 @@ def _function_reduce(self, obj): As opposed to cloudpickle.py, There no special handling for builtin pypy functions because cloudpickle_fast is CPython-specific. """ - if _is_importable(obj): + if _should_pickle_by_reference(obj): return NotImplemented else: return self._dynamic_function_reduce(obj) @@ -725,7 +725,7 @@ def save_global(self, obj, name=None, pack=struct.pack): ) elif name is not None: Pickler.save_global(self, obj, name=name) - elif not _is_importable(obj, name=name): + elif not _should_pickle_by_reference(obj, name=name): self._save_reduce_pickle5(*_dynamic_class_reduce(obj), obj=obj) else: Pickler.save_global(self, obj, name=name) @@ -737,7 +737,7 @@ def save_function(self, obj, name=None): Determines what kind of function obj is (e.g. lambda, defined at interactive prompt, etc) and handles the pickling appropriately. """ - if _is_importable(obj, name=name): + if _should_pickle_by_reference(obj, name=name): return Pickler.save_global(self, obj, name=name) elif PYPY and isinstance(obj.__code__, builtin_code_type): return self.save_pypy_builtin_func(obj) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 54b0749e2..3eae282d3 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -44,13 +44,13 @@ import cloudpickle from cloudpickle.compat import pickle -from cloudpickle.cloudpickle import _is_importable, _PICKLE_BY_VALUE_MODULES +from cloudpickle import register_pickle_by_value +from cloudpickle import unregister_pickle_by_value +from cloudpickle import is_registered_pickle_by_value +from cloudpickle.cloudpickle import _should_pickle_by_reference, _PICKLE_BY_VALUE_MODULES from cloudpickle.cloudpickle import _make_empty_cell, cell_set from cloudpickle.cloudpickle import _extract_class_dict, _whichmodule from cloudpickle.cloudpickle import _lookup_module_and_qualname -from cloudpickle.cloudpickle import register_pickle_by_value -from cloudpickle.cloudpickle import unregister_pickle_by_value -from cloudpickle.cloudpickle import is_registered_pickle_by_value from .external import an_external_function from .testutils import subprocess_pickle_echo @@ -712,24 +712,24 @@ def test_module_importability(self): import distutils import distutils.ccompiler - assert _is_importable(pickle) - assert _is_importable(os.path) # fake (aliased) module - assert _is_importable(distutils) # package - assert _is_importable(distutils.ccompiler) # module in package + assert _should_pickle_by_reference(pickle) + assert _should_pickle_by_reference(os.path) # fake (aliased) module + assert _should_pickle_by_reference(distutils) # package + assert _should_pickle_by_reference(distutils.ccompiler) # module in package dynamic_module = types.ModuleType('dynamic_module') - assert not _is_importable(dynamic_module) + assert not _should_pickle_by_reference(dynamic_module) if platform.python_implementation() == 'PyPy': import _codecs - assert _is_importable(_codecs) + assert _should_pickle_by_reference(_codecs) # #354: Check that modules created dynamically during the import of # their parent modules are considered importable by cloudpickle. # See the mod_with_dynamic_submodule documentation for more # details of this use case. import _cloudpickle_testpkg.mod.dynamic_submodule as m - assert _is_importable(m) + assert _should_pickle_by_reference(m) assert pickle_depickle(m, protocol=self.protocol) is m # Check for similar behavior for a module that cannot be imported by @@ -737,14 +737,14 @@ def test_module_importability(self): from _cloudpickle_testpkg.mod import dynamic_submodule_two as m2 # Note: import _cloudpickle_testpkg.mod.dynamic_submodule_two as m2 # works only for Python 3.7+ - assert _is_importable(m2) + assert _should_pickle_by_reference(m2) assert pickle_depickle(m2, protocol=self.protocol) is m2 # Submodule_three is a dynamic module only importable via module lookup with pytest.raises(ImportError): import _cloudpickle_testpkg.mod.submodule_three # noqa from _cloudpickle_testpkg.mod import submodule_three as m3 - assert not _is_importable(m3) + assert not _should_pickle_by_reference(m3) # This module cannot be pickled using attribute lookup (as it does not # have a `__module__` attribute like classes and functions. @@ -756,12 +756,12 @@ def test_module_importability(self): # Do the same for an importable dynamic submodule inside a dynamic # module inside a file-backed module. import _cloudpickle_testpkg.mod.dynamic_submodule.dynamic_subsubmodule as sm # noqa - assert _is_importable(sm) + assert _should_pickle_by_reference(sm) assert pickle_depickle(sm, protocol=self.protocol) is sm expected = "cannot check importability of object instances" with pytest.raises(TypeError, match=expected): - _is_importable(object()) + _should_pickle_by_reference(object()) def test_Ellipsis(self): self.assertEqual(Ellipsis, @@ -2329,20 +2329,44 @@ def __type__(self): pickle_depickle(o, protocol=self.protocol) def test_pickle_module_registered_for_pickling_by_value(self): - reference = cloudpickle.dumps(an_external_function, protocol=cloudpickle.DEFAULT_PROTOCOL) - f1 = cloudpickle.loads(reference) + try: + reference = cloudpickle.dumps(an_external_function, protocol=cloudpickle.DEFAULT_PROTOCOL) + f1 = cloudpickle.loads(reference) + + register_pickle_by_value("tests.external") + deep = cloudpickle.dumps(an_external_function, protocol=cloudpickle.DEFAULT_PROTOCOL) + f2 = cloudpickle.loads(deep) + unregister_pickle_by_value("tests.external") + + # Ensure the serialisation is not the same + assert reference != deep + assert len(deep) > len(reference) + assert b"return_a_string" in deep + assert b"return_a_string" not in reference + assert f1() == f2() + finally: + _PICKLE_BY_VALUE_MODULES.clear() - register_pickle_by_value("tests.external") - deep = cloudpickle.dumps(an_external_function, protocol=cloudpickle.DEFAULT_PROTOCOL) - f2 = cloudpickle.loads(deep) - unregister_pickle_by_value("tests.external") + def test_pickle_entire_module_by_value(self): + import _cloudpickle_testpkg.mod as m - # Ensure the serialisation is not the same - assert reference != deep - assert len(deep) > len(reference) - assert b"return_a_string" in deep - assert b"return_a_string" not in reference - assert f1() == f2() + try: + reference = cloudpickle.dumps(m, protocol=cloudpickle.DEFAULT_PROTOCOL) + m1 = cloudpickle.loads(reference) + + register_pickle_by_value("_cloudpickle_testpkg") + deep = cloudpickle.dumps(m, protocol=cloudpickle.DEFAULT_PROTOCOL) + m2 = cloudpickle.loads(deep) + unregister_pickle_by_value("_cloudpickle_testpkg") + + # Ensure the serialisation is not the same + assert reference != deep + assert len(deep) > len(reference) + assert b"hello from a package" in deep + assert b"hello from a package" not in reference + assert m1.package_function() == m2.package_function() + finally: + _PICKLE_BY_VALUE_MODULES.clear() @@ -2380,68 +2404,72 @@ def test_register_pickle_by_value(): import _cloudpickle_testpkg T = _cloudpickle_testpkg.T - # Test that adding and removing a module to pickle by value returns - # us to the original pickle by reference - assert len(_PICKLE_BY_VALUE_MODULES) == 0 - register_pickle_by_value(_cloudpickle_testpkg) - unregister_pickle_by_value(_cloudpickle_testpkg) - module_and_name = _lookup_module_and_qualname(T, name=T.__name__) - assert module_and_name is not None - assert len(_PICKLE_BY_VALUE_MODULES) == 0 - - # Test that doing the same with the string module name - # functions identically - register_pickle_by_value("_cloudpickle_testpkg") - unregister_pickle_by_value("_cloudpickle_testpkg") - module_and_name = _lookup_module_and_qualname(T, name=T.__name__) - assert module_and_name is not None - assert len(_PICKLE_BY_VALUE_MODULES) == 0 - - # Test now that if we look up the module before removing - # we correctly get a None result, indicating to pickle - # by value - register_pickle_by_value(_cloudpickle_testpkg) - module_and_name = _lookup_module_and_qualname(T, name=T.__name__) - unregister_pickle_by_value(_cloudpickle_testpkg) - assert module_and_name is None - assert len(_PICKLE_BY_VALUE_MODULES) == 0 - - # Test pickle by value behaviour using string name - register_pickle_by_value("_cloudpickle_testpkg") - module_and_name = _lookup_module_and_qualname(T, name=T.__name__) - unregister_pickle_by_value("_cloudpickle_testpkg") - assert module_and_name is None - assert len(_PICKLE_BY_VALUE_MODULES) == 0 - + try: + # Test that adding and removing a module to pickle by value returns + # us to the original pickle by reference + assert len(_PICKLE_BY_VALUE_MODULES) == 0 + register_pickle_by_value(_cloudpickle_testpkg) + unregister_pickle_by_value(_cloudpickle_testpkg) + pickle_by_ref = _should_pickle_by_reference(T, name=T.__name__) + assert pickle_by_ref + assert len(_PICKLE_BY_VALUE_MODULES) == 0 + + # Test that doing the same with the string module name + # functions identically + register_pickle_by_value("_cloudpickle_testpkg") + unregister_pickle_by_value("_cloudpickle_testpkg") + pickle_by_ref = _should_pickle_by_reference(T, name=T.__name__) + assert pickle_by_ref + assert len(_PICKLE_BY_VALUE_MODULES) == 0 + + # Test now that if we look up the module before removing + # we correctly get a None result, indicating to pickle + # by value + register_pickle_by_value(_cloudpickle_testpkg) + pickle_by_ref = _should_pickle_by_reference(T, name=T.__name__) + unregister_pickle_by_value(_cloudpickle_testpkg) + assert not pickle_by_ref + assert len(_PICKLE_BY_VALUE_MODULES) == 0 + + # Test pickle by value behaviour using string name + register_pickle_by_value("_cloudpickle_testpkg") + pickle_by_ref = _should_pickle_by_reference(T, name=T.__name__) + unregister_pickle_by_value("_cloudpickle_testpkg") + assert not pickle_by_ref + assert len(_PICKLE_BY_VALUE_MODULES) == 0 + finally: + _PICKLE_BY_VALUE_MODULES.clear() def test_register_pickle_by_value_parents_and_children(): - # We should deep copy children of explicit modules, not parents - package = "foo.bar.baz" - assert len(_PICKLE_BY_VALUE_MODULES) == 0 - register_pickle_by_value(package) - result = is_registered_pickle_by_value("foo.bar") - unregister_pickle_by_value(package) - assert not result - assert len(_PICKLE_BY_VALUE_MODULES) == 0 - - # Given a child of a pickle by value module, we should - # pickle it by value - package = "foo.bar" - register_pickle_by_value(package) - result = is_registered_pickle_by_value("foo.bar.baz") - unregister_pickle_by_value(package) - assert result - assert len(_PICKLE_BY_VALUE_MODULES) == 0 - - # Given a child of a pickle by value method with submodules set - # to false, we should no longer pickle it by value - package = "foo.bar" - register_pickle_by_value(package) - result = is_registered_pickle_by_value("foo.bar.baz", submodules=False) - unregister_pickle_by_value(package) - assert not result - assert len(_PICKLE_BY_VALUE_MODULES) == 0 - + try: + # We should deep copy children of explicit modules, not parents + package = "foo.bar.baz" + assert len(_PICKLE_BY_VALUE_MODULES) == 0 + register_pickle_by_value(package) + result = is_registered_pickle_by_value("foo.bar") + unregister_pickle_by_value(package) + assert not result + assert len(_PICKLE_BY_VALUE_MODULES) == 0 + + # Given a child of a pickle by value module, we should + # pickle it by value + package = "foo.bar" + register_pickle_by_value(package) + result = is_registered_pickle_by_value("foo.bar.baz") + unregister_pickle_by_value(package) + assert result + assert len(_PICKLE_BY_VALUE_MODULES) == 0 + + # Given a child of a pickle by value method with submodules set + # to false, we should no longer pickle it by value + package = "foo.bar" + register_pickle_by_value(package) + result = is_registered_pickle_by_value("foo.bar.baz", submodules=False) + unregister_pickle_by_value(package) + assert not result + assert len(_PICKLE_BY_VALUE_MODULES) == 0 + finally: + _PICKLE_BY_VALUE_MODULES.clear() def _all_types_to_test(): T = typing.TypeVar('T') From 50d56a984bf88f81302fc67cf4f676e731e1403d Mon Sep 17 00:00:00 2001 From: Samuel Hinton Date: Tue, 15 Jun 2021 17:49:06 +0100 Subject: [PATCH 13/70] Resolving protocol --- tests/cloudpickle_test.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 3eae282d3..fecd80291 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2330,11 +2330,11 @@ def __type__(self): def test_pickle_module_registered_for_pickling_by_value(self): try: - reference = cloudpickle.dumps(an_external_function, protocol=cloudpickle.DEFAULT_PROTOCOL) + reference = cloudpickle.dumps(an_external_function, protocol=self.protocol) f1 = cloudpickle.loads(reference) register_pickle_by_value("tests.external") - deep = cloudpickle.dumps(an_external_function, protocol=cloudpickle.DEFAULT_PROTOCOL) + deep = cloudpickle.dumps(an_external_function, protocol=self.protocol) f2 = cloudpickle.loads(deep) unregister_pickle_by_value("tests.external") @@ -2348,14 +2348,14 @@ def test_pickle_module_registered_for_pickling_by_value(self): _PICKLE_BY_VALUE_MODULES.clear() def test_pickle_entire_module_by_value(self): - import _cloudpickle_testpkg.mod as m + import _cloudpickle_testpkg as m try: - reference = cloudpickle.dumps(m, protocol=cloudpickle.DEFAULT_PROTOCOL) + reference = cloudpickle.dumps(m, protocol=self.protocol) m1 = cloudpickle.loads(reference) register_pickle_by_value("_cloudpickle_testpkg") - deep = cloudpickle.dumps(m, protocol=cloudpickle.DEFAULT_PROTOCOL) + deep = cloudpickle.dumps(m, protocol=self.protocol) m2 = cloudpickle.loads(deep) unregister_pickle_by_value("_cloudpickle_testpkg") From 898cf05d4738253a91a2781a86b6f4c367977a4a Mon Sep 17 00:00:00 2001 From: Samuel Hinton Date: Tue, 15 Jun 2021 19:08:49 +0100 Subject: [PATCH 14/70] Adding instnace check for TypeVar --- cloudpickle/cloudpickle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 9bd2b40eb..8b92424ca 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -203,7 +203,7 @@ def _should_pickle_by_reference(obj, name=None): """Dispatcher utility to test whether an object should be serialised by value or reference by using importability as a proxy.""" if isinstance(obj, types.FunctionType) or issubclass(type(obj), - type): + type) or isinstance(obj, typing.TypeVar): module_name = _lookup_module_and_qualname(obj, name=name) if module_name is None: return False From fd9d25089deca9431febfc7fdec1d02ca0a74d24 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Sat, 19 Jun 2021 22:01:11 +0100 Subject: [PATCH 15/70] CLN fix linting errors --- cloudpickle/cloudpickle.py | 12 ++++++------ tests/cloudpickle_test.py | 7 ++++--- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 8b92424ca..fcca058a3 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -200,10 +200,10 @@ def _whichmodule(obj, name): def _should_pickle_by_reference(obj, name=None): - """Dispatcher utility to test whether an object should be serialised by + """Dispatcher utility to test whether an object should be serialised by value or reference by using importability as a proxy.""" - if isinstance(obj, types.FunctionType) or issubclass(type(obj), - type) or isinstance(obj, typing.TypeVar): + if (isinstance(obj, types.FunctionType) or issubclass(type(obj),type) or + isinstance(obj, typing.TypeVar): module_name = _lookup_module_and_qualname(obj, name=name) if module_name is None: return False @@ -213,9 +213,9 @@ def _should_pickle_by_reference(obj, name=None): elif isinstance(obj, types.ModuleType): # We assume that sys.modules is primarily used as a cache mechanism for # the Python import machinery. Checking if a module has been added in - # is sys.modules therefore a cheap and simple heuristic to tell us whether - # we can assume that a given module could be imported by name in - # another Python process. + # is sys.modules therefore a cheap and simple heuristic to tell us + # whether we can assume that a given module could be imported by name + # in another Python process. if is_registered_pickle_by_value(obj.__name__): return False return obj.__name__ in sys.modules diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index b96142ae3..19853a800 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2364,7 +2364,7 @@ def test_pickle_entire_module_by_value(self): deep = cloudpickle.dumps(m, protocol=self.protocol) m2 = cloudpickle.loads(deep) unregister_pickle_by_value("_cloudpickle_testpkg") - + # Ensure the serialisation is not the same assert reference != deep assert len(deep) > len(reference) @@ -2420,8 +2420,8 @@ def test_register_pickle_by_value(): assert pickle_by_ref assert len(_PICKLE_BY_VALUE_MODULES) == 0 - # Test that doing the same with the string module name - # functions identically + # Test that doing the same with the string module name functions + # identically register_pickle_by_value("_cloudpickle_testpkg") unregister_pickle_by_value("_cloudpickle_testpkg") pickle_by_ref = _should_pickle_by_reference(T, name=T.__name__) @@ -2446,6 +2446,7 @@ def test_register_pickle_by_value(): finally: _PICKLE_BY_VALUE_MODULES.clear() + def test_register_pickle_by_value_parents_and_children(): try: # We should deep copy children of explicit modules, not parents From 5f841562209f1a52ab734cbcbd57a067dd52f477 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Sat, 19 Jun 2021 22:03:33 +0100 Subject: [PATCH 16/70] fixup! CLN fix linting errors --- cloudpickle/cloudpickle.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index fcca058a3..44099b96d 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -202,8 +202,8 @@ def _whichmodule(obj, name): def _should_pickle_by_reference(obj, name=None): """Dispatcher utility to test whether an object should be serialised by value or reference by using importability as a proxy.""" - if (isinstance(obj, types.FunctionType) or issubclass(type(obj),type) or - isinstance(obj, typing.TypeVar): + if (isinstance(obj, types.FunctionType) or issubclass(type(obj), type) or + isinstance(obj, typing.TypeVar)): module_name = _lookup_module_and_qualname(obj, name=name) if module_name is None: return False From 14067eb36e6501e1281fe29841436ed0411a52b5 Mon Sep 17 00:00:00 2001 From: Samuel Hinton Date: Mon, 21 Jun 2021 15:20:13 +0100 Subject: [PATCH 17/70] Making separate typevar chec, and using functions for the `_should_pickle_by_reference` test --- tests/cloudpickle_test.py | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 19853a800..588d0b8b6 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2353,6 +2353,28 @@ def test_pickle_module_registered_for_pickling_by_value(self): finally: _PICKLE_BY_VALUE_MODULES.clear() + def test_pickle_typevar_module_by_value(self): + try: + import _cloudpickle_testpkg + T = _cloudpickle_testpkg.T + reference = cloudpickle.dumps(T, protocol=self.protocol) + f1 = cloudpickle.loads(reference) + + register_pickle_by_value(_cloudpickle_testpkg) + deep = cloudpickle.dumps(T, protocol=self.protocol) + f2 = cloudpickle.loads(deep) + unregister_pickle_by_value(_cloudpickle_testpkg) + + # Ensure the serialisation is not the same + assert reference != deep + assert b"typevar" in deep + assert b"typevar" not in reference + assert b"getattr" in reference + assert b"getattr" not in deep + assert f1 == f2 + finally: + _PICKLE_BY_VALUE_MODULES.clear() + def test_pickle_entire_module_by_value(self): import _cloudpickle_testpkg as m @@ -2408,7 +2430,7 @@ def test_lookup_module_and_qualname_stdlib_typevar(): def test_register_pickle_by_value(): import _cloudpickle_testpkg - T = _cloudpickle_testpkg.T + fn = _cloudpickle_testpkg.package_function try: # Test that adding and removing a module to pickle by value returns @@ -2416,7 +2438,7 @@ def test_register_pickle_by_value(): assert len(_PICKLE_BY_VALUE_MODULES) == 0 register_pickle_by_value(_cloudpickle_testpkg) unregister_pickle_by_value(_cloudpickle_testpkg) - pickle_by_ref = _should_pickle_by_reference(T, name=T.__name__) + pickle_by_ref = _should_pickle_by_reference(fn, name=fn.__name__) assert pickle_by_ref assert len(_PICKLE_BY_VALUE_MODULES) == 0 @@ -2424,7 +2446,7 @@ def test_register_pickle_by_value(): # identically register_pickle_by_value("_cloudpickle_testpkg") unregister_pickle_by_value("_cloudpickle_testpkg") - pickle_by_ref = _should_pickle_by_reference(T, name=T.__name__) + pickle_by_ref = _should_pickle_by_reference(fn, name=fn.__name__) assert pickle_by_ref assert len(_PICKLE_BY_VALUE_MODULES) == 0 @@ -2432,14 +2454,14 @@ def test_register_pickle_by_value(): # we correctly get a None result, indicating to pickle # by value register_pickle_by_value(_cloudpickle_testpkg) - pickle_by_ref = _should_pickle_by_reference(T, name=T.__name__) + pickle_by_ref = _should_pickle_by_reference(fn, name=fn.__name__) unregister_pickle_by_value(_cloudpickle_testpkg) assert not pickle_by_ref assert len(_PICKLE_BY_VALUE_MODULES) == 0 # Test pickle by value behaviour using string name register_pickle_by_value("_cloudpickle_testpkg") - pickle_by_ref = _should_pickle_by_reference(T, name=T.__name__) + pickle_by_ref = _should_pickle_by_reference(fn, name=fn.__name__) unregister_pickle_by_value("_cloudpickle_testpkg") assert not pickle_by_ref assert len(_PICKLE_BY_VALUE_MODULES) == 0 From cce47089b44cacc2c57a0b7b4d2e9466e0ab2fac Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Tue, 22 Jun 2021 17:35:27 +0200 Subject: [PATCH 18/70] cosmit --- CHANGES.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 3990fb008..ddad50be5 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -11,7 +11,6 @@ dev - Fix a side effect altering dynamic modules at pickling time. ([PR #426](https://github.com/cloudpipe/cloudpickle/pull/426)) - - Support for pickling type annotations on Python 3.10 as per [PEP 563]( https://www.python.org/dev/peps/pep-0563/) ([PR #400](https://github.com/cloudpipe/cloudpickle/pull/400)) From 14a5cfe5e7e7c407e93ea179aa75f7f017cbc72b Mon Sep 17 00:00:00 2001 From: Samuel Hinton Date: Tue, 22 Jun 2021 16:36:27 +0100 Subject: [PATCH 19/70] Update cloudpickle/cloudpickle.py Co-authored-by: Olivier Grisel --- cloudpickle/cloudpickle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 44099b96d..c1c314e46 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -134,7 +134,7 @@ def register_pickle_by_value(module): def unregister_pickle_by_value(module): - """ nregister that the input module should be pickled by value.""" + """Unregister that the input module should be pickled by value.""" module_name = module.__name__ if inspect.ismodule(module) else module _PICKLE_BY_VALUE_MODULES.remove(module_name) From ab57df866c883cd3bbc3be42a4c67746c3babf25 Mon Sep 17 00:00:00 2001 From: Samuel Hinton Date: Tue, 22 Jun 2021 17:15:25 +0100 Subject: [PATCH 20/70] Update tests/cloudpickle_test.py Co-authored-by: Olivier Grisel --- tests/cloudpickle_test.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 588d0b8b6..6f6a83333 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2336,19 +2336,19 @@ def __type__(self): def test_pickle_module_registered_for_pickling_by_value(self): try: - reference = cloudpickle.dumps(an_external_function, protocol=self.protocol) - f1 = cloudpickle.loads(reference) + by_ref = cloudpickle.dumps(an_external_function, protocol=self.protocol) + f1 = cloudpickle.loads(by_ref) register_pickle_by_value("tests.external") - deep = cloudpickle.dumps(an_external_function, protocol=self.protocol) - f2 = cloudpickle.loads(deep) + by_value = cloudpickle.dumps(an_external_function, protocol=self.protocol) + f2 = cloudpickle.loads(by_value) unregister_pickle_by_value("tests.external") # Ensure the serialisation is not the same - assert reference != deep - assert len(deep) > len(reference) - assert b"return_a_string" in deep - assert b"return_a_string" not in reference + assert by_ref != by_value + assert len(by_value) > len(by_ref) + assert b"return_a_string" in by_value + assert b"return_a_string" not in by_ref assert f1() == f2() finally: _PICKLE_BY_VALUE_MODULES.clear() From cfcf843a65c3b26ce2bc8b9314ef064982a69f0d Mon Sep 17 00:00:00 2001 From: Samuel Hinton Date: Tue, 22 Jun 2021 17:17:39 +0100 Subject: [PATCH 21/70] Updating variable names --- tests/cloudpickle_test.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 6f6a83333..0d82bce06 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2357,20 +2357,20 @@ def test_pickle_typevar_module_by_value(self): try: import _cloudpickle_testpkg T = _cloudpickle_testpkg.T - reference = cloudpickle.dumps(T, protocol=self.protocol) - f1 = cloudpickle.loads(reference) + by_ref = cloudpickle.dumps(T, protocol=self.protocol) + f1 = cloudpickle.loads(by_ref) register_pickle_by_value(_cloudpickle_testpkg) - deep = cloudpickle.dumps(T, protocol=self.protocol) - f2 = cloudpickle.loads(deep) + by_value = cloudpickle.dumps(T, protocol=self.protocol) + f2 = cloudpickle.loads(by_value) unregister_pickle_by_value(_cloudpickle_testpkg) # Ensure the serialisation is not the same - assert reference != deep - assert b"typevar" in deep - assert b"typevar" not in reference - assert b"getattr" in reference - assert b"getattr" not in deep + assert by_ref != by_value + assert b"typevar" in by_value + assert b"typevar" not in by_ref + assert b"getattr" in by_ref + assert b"getattr" not in by_value assert f1 == f2 finally: _PICKLE_BY_VALUE_MODULES.clear() @@ -2379,19 +2379,19 @@ def test_pickle_entire_module_by_value(self): import _cloudpickle_testpkg as m try: - reference = cloudpickle.dumps(m, protocol=self.protocol) - m1 = cloudpickle.loads(reference) + by_ref = cloudpickle.dumps(m, protocol=self.protocol) + m1 = cloudpickle.loads(by_ref) register_pickle_by_value("_cloudpickle_testpkg") - deep = cloudpickle.dumps(m, protocol=self.protocol) - m2 = cloudpickle.loads(deep) + by_value = cloudpickle.dumps(m, protocol=self.protocol) + m2 = cloudpickle.loads(by_value) unregister_pickle_by_value("_cloudpickle_testpkg") # Ensure the serialisation is not the same - assert reference != deep - assert len(deep) > len(reference) - assert b"hello from a package" in deep - assert b"hello from a package" not in reference + assert by_ref != by_value + assert len(by_value) > len(by_ref) + assert b"hello from a package" in by_value + assert b"hello from a package" not in by_ref assert m1.package_function() == m2.package_function() finally: _PICKLE_BY_VALUE_MODULES.clear() @@ -2471,7 +2471,7 @@ def test_register_pickle_by_value(): def test_register_pickle_by_value_parents_and_children(): try: - # We should deep copy children of explicit modules, not parents + # We should by value copy children of explicit modules, not parents package = "foo.bar.baz" assert len(_PICKLE_BY_VALUE_MODULES) == 0 register_pickle_by_value(package) From cc04c2bccdefa782893e498c98d976a480c97c5a Mon Sep 17 00:00:00 2001 From: Samuel Hinton Date: Wed, 30 Jun 2021 12:34:21 +0100 Subject: [PATCH 22/70] Adding another unit test to check local module patching --- tests/cloudpickle_test.py | 51 +++++++++++++++++++++++++++++++++++++++ tests/external.py | 7 +++++- 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 0d82bce06..effffc6f9 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -52,6 +52,7 @@ from cloudpickle.cloudpickle import _extract_class_dict, _whichmodule from cloudpickle.cloudpickle import _lookup_module_and_qualname +from tests import external from .external import an_external_function from .testutils import subprocess_pickle_echo from .testutils import assert_run_python_script @@ -2353,6 +2354,56 @@ def test_pickle_module_registered_for_pickling_by_value(self): finally: _PICKLE_BY_VALUE_MODULES.clear() + def test_nested_function_for_pickling_by_value(self): + original_var = external.mutable_variable[0] + original_var2 = external.mutable_variable2[0] + original_func = external.an_external_function + try: + by_ref = cloudpickle.dumps(external.nested_function, protocol=self.protocol) + + register_pickle_by_value("tests.external") + by_value = cloudpickle.dumps(external.nested_function, protocol=self.protocol) + + # Here we exploit the mutable variables and update it, + # to ensure that even nested dependencies are saved correctly + # This will change the data in the underlying module + external.mutable_variable[0] = "modified" + external.mutable_variable2[0] = "_suffix" + + # Importing as a non-relative package allows us to properly + # moneky-patch the function called by nested_function, + # rather than just changing a local reference to the module func + external.an_external_function = lambda: "newfunc" + + # Loading by reference should pick up these local changes + # to the variables and the patch. This simulates + # module differences between the save env and load env + f1 = cloudpickle.loads(by_ref) + + # Loading by value should not care about local module differences + # and should respect only the save env + f2 = cloudpickle.loads(by_value) + unregister_pickle_by_value("tests.external") + + # Ensure the serialisation is not the same + assert by_ref != by_value + assert len(by_value) > len(by_ref) + + # Ensure that the original content at the time + # of pickling is correct + assert b"return_a_string" in by_value + assert b"return_a_string" not in by_ref + + # Ensure even if the local module is different, it + # doesnt get used + assert f1() == "newfunc_suffix" + assert f2() == "return_a_string_nested" + finally: + external.an_external_function = original_func + external.mutable_variable[0] = original_var + external.mutable_variable2[0] = original_var2 + _PICKLE_BY_VALUE_MODULES.clear() + def test_pickle_typevar_module_by_value(self): try: import _cloudpickle_testpkg diff --git a/tests/external.py b/tests/external.py index 3bf135150..ce4913609 100644 --- a/tests/external.py +++ b/tests/external.py @@ -1,5 +1,10 @@ # simulate an external function which cloudpickle would normally # only save a reference to +mutable_variable = ["return_a_string"] +mutable_variable2 = ["_nested"] def an_external_function(): - return "return_a_string" + return mutable_variable[0] + +def nested_function(): + return an_external_function() + mutable_variable2[0] \ No newline at end of file From c689adc96ae14107d4d7223d372650e82d25a063 Mon Sep 17 00:00:00 2001 From: Samuel Hinton Date: Wed, 30 Jun 2021 14:06:00 +0100 Subject: [PATCH 23/70] Updating names and comments --- tests/cloudpickle_test.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index effffc6f9..2a742745b 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2378,11 +2378,11 @@ def test_nested_function_for_pickling_by_value(self): # Loading by reference should pick up these local changes # to the variables and the patch. This simulates # module differences between the save env and load env - f1 = cloudpickle.loads(by_ref) + f_by_ref = cloudpickle.loads(by_ref) # Loading by value should not care about local module differences # and should respect only the save env - f2 = cloudpickle.loads(by_value) + f_by_val = cloudpickle.loads(by_value) unregister_pickle_by_value("tests.external") # Ensure the serialisation is not the same @@ -2394,10 +2394,10 @@ def test_nested_function_for_pickling_by_value(self): assert b"return_a_string" in by_value assert b"return_a_string" not in by_ref - # Ensure even if the local module is different, it - # doesnt get used - assert f1() == "newfunc_suffix" - assert f2() == "return_a_string_nested" + # Ensure even that any mutation of the module does not + # impact the behavior of the function if it was pickled by value. + assert f_by_ref() == "newfunc_suffix" + assert f_by_val() == "return_a_string_nested" finally: external.an_external_function = original_func external.mutable_variable[0] = original_var From bad9f7527d2660de8f09132cc6ad1475143596ac Mon Sep 17 00:00:00 2001 From: Samuel Hinton Date: Wed, 30 Jun 2021 15:06:27 +0100 Subject: [PATCH 24/70] I am a savage that didnt run flake8 and I apologise --- tests/cloudpickle_test.py | 4 ++-- tests/external.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 2a742745b..bdc370dbc 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -52,7 +52,7 @@ from cloudpickle.cloudpickle import _extract_class_dict, _whichmodule from cloudpickle.cloudpickle import _lookup_module_and_qualname -from tests import external +from tests import external from .external import an_external_function from .testutils import subprocess_pickle_echo from .testutils import assert_run_python_script @@ -2364,7 +2364,7 @@ def test_nested_function_for_pickling_by_value(self): register_pickle_by_value("tests.external") by_value = cloudpickle.dumps(external.nested_function, protocol=self.protocol) - # Here we exploit the mutable variables and update it, + # Here we exploit the mutable variables and update it, # to ensure that even nested dependencies are saved correctly # This will change the data in the underlying module external.mutable_variable[0] = "modified" diff --git a/tests/external.py b/tests/external.py index ce4913609..2abbb1d4b 100644 --- a/tests/external.py +++ b/tests/external.py @@ -3,8 +3,10 @@ mutable_variable = ["return_a_string"] mutable_variable2 = ["_nested"] + def an_external_function(): return mutable_variable[0] + def nested_function(): - return an_external_function() + mutable_variable2[0] \ No newline at end of file + return an_external_function() + mutable_variable2[0] From b29547efc56a2394c95a317ffa5d7cdf15027c1a Mon Sep 17 00:00:00 2001 From: Samuel Hinton Date: Thu, 1 Jul 2021 10:21:29 +0100 Subject: [PATCH 25/70] Updating function and variable names --- tests/cloudpickle_test.py | 32 +++++++++++++++++--------------- tests/external.py | 10 +++++----- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index b77481385..62ddf2e3f 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -54,7 +54,7 @@ from cloudpickle.cloudpickle import _lookup_module_and_qualname from tests import external -from .external import an_external_function +from .external import inner_function from .testutils import subprocess_pickle_echo from .testutils import subprocess_pickle_string from .testutils import assert_run_python_script @@ -2340,19 +2340,19 @@ def __type__(self): def test_pickle_module_registered_for_pickling_by_value(self): try: - by_ref = cloudpickle.dumps(an_external_function, protocol=self.protocol) + by_ref = cloudpickle.dumps(inner_function, protocol=self.protocol) f1 = cloudpickle.loads(by_ref) register_pickle_by_value("tests.external") - by_value = cloudpickle.dumps(an_external_function, protocol=self.protocol) + by_value = cloudpickle.dumps(inner_function, protocol=self.protocol) f2 = cloudpickle.loads(by_value) unregister_pickle_by_value("tests.external") # Ensure the serialisation is not the same assert by_ref != by_value assert len(by_value) > len(by_ref) - assert b"return_a_string" in by_value - assert b"return_a_string" not in by_ref + assert b"original_string" in by_value + assert b"original_string" not in by_ref assert f1() == f2() finally: _PICKLE_BY_VALUE_MODULES.clear() @@ -2360,12 +2360,12 @@ def test_pickle_module_registered_for_pickling_by_value(self): def test_nested_function_for_pickling_by_value(self): original_var = external.mutable_variable[0] original_var2 = external.mutable_variable2[0] - original_func = external.an_external_function + original_func = external.inner_function try: - by_ref = cloudpickle.dumps(external.nested_function, protocol=self.protocol) + by_ref = cloudpickle.dumps(external.wrapping_func, protocol=self.protocol) register_pickle_by_value("tests.external") - by_value = cloudpickle.dumps(external.nested_function, protocol=self.protocol) + by_value = cloudpickle.dumps(external.wrapping_func, protocol=self.protocol) # Here we exploit the mutable variables and update it, # to ensure that even nested dependencies are saved correctly @@ -2374,9 +2374,9 @@ def test_nested_function_for_pickling_by_value(self): external.mutable_variable2[0] = "_suffix" # Importing as a non-relative package allows us to properly - # moneky-patch the function called by nested_function, - # rather than just changing a local reference to the module func - external.an_external_function = lambda: "newfunc" + # monkey-patch the function calls, rather than just + # changing a local reference to the module func + external.inner_function = lambda: "newfunc" # Loading by reference should pick up these local changes # to the variables and the patch. This simulates @@ -2394,15 +2394,17 @@ def test_nested_function_for_pickling_by_value(self): # Ensure that the original content at the time # of pickling is correct - assert b"return_a_string" in by_value - assert b"return_a_string" not in by_ref + assert b"original_string" in by_value + assert b"second_string" in by_value + assert b"original_string" not in by_ref + assert b"second_string" not in by_ref # Ensure even that any mutation of the module does not # impact the behavior of the function if it was pickled by value. assert f_by_ref() == "newfunc_suffix" - assert f_by_val() == "return_a_string_nested" + assert f_by_val() == "original_string_second_string" finally: - external.an_external_function = original_func + external.inner_function = original_func external.mutable_variable[0] = original_var external.mutable_variable2[0] = original_var2 _PICKLE_BY_VALUE_MODULES.clear() diff --git a/tests/external.py b/tests/external.py index 2abbb1d4b..c886f13d1 100644 --- a/tests/external.py +++ b/tests/external.py @@ -1,12 +1,12 @@ # simulate an external function which cloudpickle would normally # only save a reference to -mutable_variable = ["return_a_string"] -mutable_variable2 = ["_nested"] +mutable_variable = ["original_string"] +mutable_variable2 = ["_second_string"] -def an_external_function(): +def inner_function(): return mutable_variable[0] -def nested_function(): - return an_external_function() + mutable_variable2[0] +def wrapping_func(): + return inner_function() + mutable_variable2[0] From bd4967f5d4d3d60e54be2f128802ce41d4b8f5b0 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 5 Jul 2021 00:29:35 +0100 Subject: [PATCH 26/70] highlight possible bugs silenced by pytest --- tests/external.py | 6 +++++- tests/external_two.py | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 tests/external_two.py diff --git a/tests/external.py b/tests/external.py index c886f13d1..ffe841d5c 100644 --- a/tests/external.py +++ b/tests/external.py @@ -9,4 +9,8 @@ def inner_function(): def wrapping_func(): - return inner_function() + mutable_variable2[0] + from .external_two import function_from_external_two + return ( + inner_function() + mutable_variable2[0] + + function_from_external_two() + ) diff --git a/tests/external_two.py b/tests/external_two.py new file mode 100644 index 000000000..064585be9 --- /dev/null +++ b/tests/external_two.py @@ -0,0 +1,2 @@ +def function_from_external_two(): + return "" From d83e25aac02f4bf8abcbfb2e15572ec0b27b6880 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Sun, 11 Jul 2021 16:56:19 +0100 Subject: [PATCH 27/70] DOC Present the feature in the README --- README.md | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/README.md b/README.md index 5229d57c4..0f3524e86 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,51 @@ Pickling a function interactively defined in a Python shell session 85 ``` + +Overriding pickle's serialization mechanism for importable constructs: +---------------------------------------------------------------------- + +An important difference between `cloudpickle` and `pickle` is that +`cloudpickle` can serialize a function or class **by value**, whereas `pickle` +can only serialize it **by reference**, e.g. by serializing its *module +attribute path* (such as `my_module.my_function`). + +By default, `cloudpickle` only uses serialization by value in cases where +serialization by reference is usually ineffective, for example when the +function/class to be pickled was constructed in an interactive Python session. + +Since `cloudpickle 1.7.0`, it is possible to extend the use of serialization by +value to functions or classes coming from **any pure Python module**. This feature +is useful when the said module is unavailable in the unpickling environment +(making traditional serialization by reference ineffective). To this end, +`cloudpickle` exposes the +`register_pickle_by_value`/`unregister_pickle_by_value` functions: + +```python +>>> import cloudpickle +>>> import my_module +>>> cloudpickle.register_pickle_by_value(my_module) +>>> # cloudpickle.register_pickle_by_value("my_module") also works +>>> cloudpickle.dumps(my_module.my_function) # my_function is pickled by value +>>> cloudpickle.unregister_pickle_by_value(my_module) +>>> cloudpickle.dumps(my_module.my_function) # my_function is pickled by reference +``` + +Note that this feature is still **experimental**, and may fail in the following +situations: + +- If the body of a function/class pickled by value contains an `import` statement: + ```python + >>> def f(): + >>> ... from another_module import g + >>> ... # calling f in the unpickling environment may fail if another_module + >>> ... # is unavailable + >>> ... return g() + 1 + ``` + +- If a function pickled by reference uses a function pickled by value during its execution. + + Running the tests ----------------- From a7725bd823ef73e03ac0e729c208b7687f7395f6 Mon Sep 17 00:00:00 2001 From: Samuel Hinton Date: Sun, 11 Jul 2021 17:41:30 +0100 Subject: [PATCH 28/70] Removing submodules from arg as it is not directly used --- cloudpickle/cloudpickle.py | 17 ++++++++--------- tests/cloudpickle_test.py | 9 --------- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index e4c790ec1..bdfd02c56 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -139,18 +139,17 @@ def unregister_pickle_by_value(module): _PICKLE_BY_VALUE_MODULES.remove(module_name) -def is_registered_pickle_by_value(module, submodules=True): +def is_registered_pickle_by_value(module): module_name = module.__name__ if inspect.ismodule(module) else module if module_name in _PICKLE_BY_VALUE_MODULES: return True - if submodules: - while True: - parent_name = module_name.rsplit(".", 1)[0] - if parent_name == module_name: - break - if parent_name in _PICKLE_BY_VALUE_MODULES: - return True - module_name = parent_name + while True: + parent_name = module_name.rsplit(".", 1)[0] + if parent_name == module_name: + break + if parent_name in _PICKLE_BY_VALUE_MODULES: + return True + module_name = parent_name return False def _whichmodule(obj, name): diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 62ddf2e3f..0e42d6192 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2569,15 +2569,6 @@ def test_register_pickle_by_value_parents_and_children(): unregister_pickle_by_value(package) assert result assert len(_PICKLE_BY_VALUE_MODULES) == 0 - - # Given a child of a pickle by value method with submodules set - # to false, we should no longer pickle it by value - package = "foo.bar" - register_pickle_by_value(package) - result = is_registered_pickle_by_value("foo.bar.baz", submodules=False) - unregister_pickle_by_value(package) - assert not result - assert len(_PICKLE_BY_VALUE_MODULES) == 0 finally: _PICKLE_BY_VALUE_MODULES.clear() From c799711dcaf760f11bd24ff00f859a63d32884f0 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 12 Jul 2021 22:46:47 +0100 Subject: [PATCH 29/70] TST rework the test structure --- tests/cloudpickle_test.py | 84 ++++++++++++++------------------------- 1 file changed, 29 insertions(+), 55 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 0e42d6192..423e931b8 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2509,69 +2509,43 @@ def test_lookup_module_and_qualname_stdlib_typevar(): assert name == 'AnyStr' -def test_register_pickle_by_value(): +@pytest.mark.parametrize("use_module_name", [True, False]) +def test_register_pickle_by_value(use_module_name): import _cloudpickle_testpkg - fn = _cloudpickle_testpkg.package_function + + if use_module_name: + package_ref = "_cloudpickle_testpkg" + module_ref = "_cloudpickle_testpkg.mod" + else: + package_ref = _cloudpickle_testpkg + module_ref = _cloudpickle_testpkg.mod try: - # Test that adding and removing a module to pickle by value returns - # us to the original pickle by reference - assert len(_PICKLE_BY_VALUE_MODULES) == 0 - register_pickle_by_value(_cloudpickle_testpkg) - unregister_pickle_by_value(_cloudpickle_testpkg) - pickle_by_ref = _should_pickle_by_reference(fn, name=fn.__name__) - assert pickle_by_ref - assert len(_PICKLE_BY_VALUE_MODULES) == 0 - - # Test that doing the same with the string module name functions - # identically - register_pickle_by_value("_cloudpickle_testpkg") - unregister_pickle_by_value("_cloudpickle_testpkg") - pickle_by_ref = _should_pickle_by_reference(fn, name=fn.__name__) - assert pickle_by_ref - assert len(_PICKLE_BY_VALUE_MODULES) == 0 - - # Test now that if we look up the module before removing - # we correctly get a None result, indicating to pickle - # by value - register_pickle_by_value(_cloudpickle_testpkg) - pickle_by_ref = _should_pickle_by_reference(fn, name=fn.__name__) - unregister_pickle_by_value(_cloudpickle_testpkg) - assert not pickle_by_ref - assert len(_PICKLE_BY_VALUE_MODULES) == 0 - - # Test pickle by value behaviour using string name - register_pickle_by_value("_cloudpickle_testpkg") - pickle_by_ref = _should_pickle_by_reference(fn, name=fn.__name__) - unregister_pickle_by_value("_cloudpickle_testpkg") - assert not pickle_by_ref - assert len(_PICKLE_BY_VALUE_MODULES) == 0 - finally: - _PICKLE_BY_VALUE_MODULES.clear() + assert not is_registered_pickle_by_value(package_ref) + assert not is_registered_pickle_by_value(module_ref) + register_pickle_by_value(package_ref) + assert is_registered_pickle_by_value(package_ref) + + # registering a package for pickling by value automatically registers + # all of its submodules: + assert is_registered_pickle_by_value(module_ref) + unregister_pickle_by_value(package_ref) + + # registering a submodule for pickling by value should not register + # its parent package + register_pickle_by_value(module_ref) + assert is_registered_pickle_by_value(module_ref) + assert not is_registered_pickle_by_value(package_ref) + + unregister_pickle_by_value(module_ref) + assert not is_registered_pickle_by_value(package_ref) + assert not is_registered_pickle_by_value(module_ref) -def test_register_pickle_by_value_parents_and_children(): - try: - # We should by value copy children of explicit modules, not parents - package = "foo.bar.baz" - assert len(_PICKLE_BY_VALUE_MODULES) == 0 - register_pickle_by_value(package) - result = is_registered_pickle_by_value("foo.bar") - unregister_pickle_by_value(package) - assert not result - assert len(_PICKLE_BY_VALUE_MODULES) == 0 - - # Given a child of a pickle by value module, we should - # pickle it by value - package = "foo.bar" - register_pickle_by_value(package) - result = is_registered_pickle_by_value("foo.bar.baz") - unregister_pickle_by_value(package) - assert result - assert len(_PICKLE_BY_VALUE_MODULES) == 0 finally: _PICKLE_BY_VALUE_MODULES.clear() + def _all_types_to_test(): T = typing.TypeVar('T') From b111a56f1b6333f23b4352f36aa81c12975feacb Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 14 Jul 2021 00:11:26 +0100 Subject: [PATCH 30/70] TST test by simulating an interactive session --- tests/cloudpickle_test.py | 162 ++++++++++++-------------------------- tests/external.py | 16 ---- tests/mock_local_file.py | 20 +++++ 3 files changed, 70 insertions(+), 128 deletions(-) delete mode 100644 tests/external.py create mode 100644 tests/mock_local_file.py diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index c745e0a92..fe70fa57c 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -53,8 +53,6 @@ from cloudpickle.cloudpickle import _extract_class_dict, _whichmodule from cloudpickle.cloudpickle import _lookup_module_and_qualname -from tests import external -from .external import inner_function from .testutils import subprocess_pickle_echo from .testutils import subprocess_pickle_string from .testutils import assert_run_python_script @@ -2356,119 +2354,59 @@ def __type__(self): o = MyClass() pickle_depickle(o, protocol=self.protocol) - def test_pickle_module_registered_for_pickling_by_value(self): + def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): # noqa try: - by_ref = cloudpickle.dumps(inner_function, protocol=self.protocol) - f1 = cloudpickle.loads(by_ref) - - register_pickle_by_value("tests.external") - by_value = cloudpickle.dumps(inner_function, protocol=self.protocol) - f2 = cloudpickle.loads(by_value) - unregister_pickle_by_value("tests.external") - - # Ensure the serialisation is not the same - assert by_ref != by_value - assert len(by_value) > len(by_ref) - assert b"original_string" in by_value - assert b"original_string" not in by_ref - assert f1() == f2() - finally: - _PICKLE_BY_VALUE_MODULES.clear() - - def test_nested_function_for_pickling_by_value(self): - original_var = external.mutable_variable[0] - original_var2 = external.mutable_variable2[0] - original_func = external.inner_function - try: - by_ref = cloudpickle.dumps(external.wrapping_func, protocol=self.protocol) - - register_pickle_by_value("tests.external") - by_value = cloudpickle.dumps(external.wrapping_func, protocol=self.protocol) - - # Here we exploit the mutable variables and update it, - # to ensure that even nested dependencies are saved correctly - # This will change the data in the underlying module - external.mutable_variable[0] = "modified" - external.mutable_variable2[0] = "_suffix" - - # Importing as a non-relative package allows us to properly - # monkey-patch the function calls, rather than just - # changing a local reference to the module func - external.inner_function = lambda: "newfunc" - - # Loading by reference should pick up these local changes - # to the variables and the patch. This simulates - # module differences between the save env and load env - f_by_ref = cloudpickle.loads(by_ref) - - # Loading by value should not care about local module differences - # and should respect only the save env - f_by_val = cloudpickle.loads(by_value) - unregister_pickle_by_value("tests.external") - - # Ensure the serialisation is not the same - assert by_ref != by_value - assert len(by_value) > len(by_ref) - - # Ensure that the original content at the time - # of pickling is correct - assert b"original_string" in by_value - assert b"second_string" in by_value - assert b"original_string" not in by_ref - assert b"second_string" not in by_ref - - # Ensure even that any mutation of the module does not - # impact the behavior of the function if it was pickled by value. - assert f_by_ref() == "newfunc_suffix" - assert f_by_val() == "original_string_second_string" - finally: - external.inner_function = original_func - external.mutable_variable[0] = original_var - external.mutable_variable2[0] = original_var2 - _PICKLE_BY_VALUE_MODULES.clear() - - def test_pickle_typevar_module_by_value(self): - try: - import _cloudpickle_testpkg - T = _cloudpickle_testpkg.T - by_ref = cloudpickle.dumps(T, protocol=self.protocol) - f1 = cloudpickle.loads(by_ref) - - register_pickle_by_value(_cloudpickle_testpkg) - by_value = cloudpickle.dumps(T, protocol=self.protocol) - f2 = cloudpickle.loads(by_value) - unregister_pickle_by_value(_cloudpickle_testpkg) - - # Ensure the serialisation is not the same - assert by_ref != by_value - assert b"typevar" in by_value - assert b"typevar" not in by_ref - assert b"getattr" in by_ref - assert b"getattr" not in by_value - assert f1 == f2 - finally: - _PICKLE_BY_VALUE_MODULES.clear() - - def test_pickle_entire_module_by_value(self): - import _cloudpickle_testpkg as m + # We simulate an interactive session that: + # - relies on locally-importable constructs defined in .py files + # located in the directory from which the session was launched. + # - uses these constructs in remote workers that do not have + # access to the files in which the constructs were defined: this + # situation is the justification behind the + # (un)register_pickle_by_value(module) api that cloudpickle + # exposes. + + # Simulate an interactive session started from + # /path/to/cloudpickle/tests. + _mock_interactive_session_cwd = os.path.dirname(__file__) + if _mock_interactive_session_cwd not in sys.path: + sys.path.insert(0, _mock_interactive_session_cwd) + + # The constructs whose pickling mechanism is changed using + # register_pickle_by_value are functions, classes, TypeVar and + # modules. + import mock_local_file as mod + from mock_local_file import local_function, LocalT, LocalClass + with subprocess_worker(protocol=self.protocol) as w: + # make the module unavailable in the remote worker + w.run( + lambda p: sys.path.remove(p), _mock_interactive_session_cwd + ) + with pytest.raises(ImportError): + w.run(lambda: __import__("mock_local_file")) + + for o in [mod, local_function, LocalT, LocalClass]: + with pytest.raises(ImportError): + w.run(lambda: o) + + register_pickle_by_value("mock_local_file") + # function + assert w.run(lambda: local_function()) == local_function() + # typevar + assert w.run(lambda: LocalT.__name__) == LocalT.__name__ + # classes + assert w.run( + lambda: LocalClass().method() == LocalClass().method() + ) + # modules + assert ( + w.run(lambda: mod).local_function() == local_function() + ) - try: - by_ref = cloudpickle.dumps(m, protocol=self.protocol) - m1 = cloudpickle.loads(by_ref) - - register_pickle_by_value("_cloudpickle_testpkg") - by_value = cloudpickle.dumps(m, protocol=self.protocol) - m2 = cloudpickle.loads(by_value) - unregister_pickle_by_value("_cloudpickle_testpkg") - - # Ensure the serialisation is not the same - assert by_ref != by_value - assert len(by_value) > len(by_ref) - assert b"hello from a package" in by_value - assert b"hello from a package" not in by_ref - assert m1.package_function() == m2.package_function() finally: - _PICKLE_BY_VALUE_MODULES.clear() + if _mock_interactive_session_cwd in sys.path: + sys.path.remove(_mock_interactive_session_cwd) + if is_registered_pickle_by_value("mock_local_file"): + unregister_pickle_by_value("mock_local_file") @pytest.mark.skipif( sys.version_info < (3, 7), diff --git a/tests/external.py b/tests/external.py deleted file mode 100644 index ffe841d5c..000000000 --- a/tests/external.py +++ /dev/null @@ -1,16 +0,0 @@ -# simulate an external function which cloudpickle would normally -# only save a reference to -mutable_variable = ["original_string"] -mutable_variable2 = ["_second_string"] - - -def inner_function(): - return mutable_variable[0] - - -def wrapping_func(): - from .external_two import function_from_external_two - return ( - inner_function() + mutable_variable2[0] + - function_from_external_two() - ) diff --git a/tests/mock_local_file.py b/tests/mock_local_file.py new file mode 100644 index 000000000..1a1c1da46 --- /dev/null +++ b/tests/mock_local_file.py @@ -0,0 +1,20 @@ +""" +In the distributed computing setting, this file plays the role of a "local +development" file, e.g. a file that is importable locally, but unimportable in +remote workers. Constructs defined in this file and usually pickled by +reference should instead flagged to cloudpickle for pickling by value: this is +done using the register_pickle_by_value api exposed by cloudpickle. +""" +import typing + + +def local_function(): + return "hello from a function importable locally!" + + +class LocalClass: + def method(self): + return "hello from a class importable locally" + + +LocalT = typing.TypeVar("LocalT") From 4a3b3dd44197d750f9f15b06bee184d8dba179c7 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 14 Jul 2021 00:20:34 +0100 Subject: [PATCH 31/70] CI debug sys.path issues in CI --- tests/cloudpickle_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index fe70fa57c..d641744d5 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2381,6 +2381,7 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): w.run( lambda p: sys.path.remove(p), _mock_interactive_session_cwd ) + raise ValueError(w.run(lambda p: sys.path)) with pytest.raises(ImportError): w.run(lambda: __import__("mock_local_file")) From ad85452e832ae89db7863fc28d2e1980cff4e559 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 14 Jul 2021 00:22:05 +0100 Subject: [PATCH 32/70] CI again --- tests/cloudpickle_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index d641744d5..c65676bb4 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2381,7 +2381,7 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): w.run( lambda p: sys.path.remove(p), _mock_interactive_session_cwd ) - raise ValueError(w.run(lambda p: sys.path)) + raise ValueError(w.run(lambda: sys.path)) with pytest.raises(ImportError): w.run(lambda: __import__("mock_local_file")) From 4f99ad11ec117ff2145d1d37cdf0c39e2fed1a31 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 14 Jul 2021 00:25:55 +0100 Subject: [PATCH 33/70] CI again --- tests/cloudpickle_test.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index c65676bb4..59ad2e48d 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2381,10 +2381,22 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): w.run( lambda p: sys.path.remove(p), _mock_interactive_session_cwd ) - raise ValueError(w.run(lambda: sys.path)) with pytest.raises(ImportError): w.run(lambda: __import__("mock_local_file")) + for o in [mod, local_function, LocalT, LocalClass]: + imported_objs = [] + try: + w.run(lambda: o) + except ImportError: + continue + else: + imported_objs.append(o) + __import__('pdb').set_trace() + + if imported_objs != []: + raise ValueError(imported_objs) + for o in [mod, local_function, LocalT, LocalClass]: with pytest.raises(ImportError): w.run(lambda: o) From 59a0903b15fdaf8d3fae123034d525e8d42cdc34 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 14 Jul 2021 00:27:56 +0100 Subject: [PATCH 34/70] CI again --- tests/cloudpickle_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 59ad2e48d..395356dc4 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2381,14 +2381,14 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): w.run( lambda p: sys.path.remove(p), _mock_interactive_session_cwd ) - with pytest.raises(ImportError): - w.run(lambda: __import__("mock_local_file")) + # with pytest.raises(ImportError): + # w.run(lambda: __import__("mock_local_file")) for o in [mod, local_function, LocalT, LocalClass]: imported_objs = [] try: w.run(lambda: o) - except ImportError: + except Exception: continue else: imported_objs.append(o) From 0bf7c58e9bceb183d01f63ccb379426d7926ad9d Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 14 Jul 2021 00:28:31 +0100 Subject: [PATCH 35/70] CI again --- tests/cloudpickle_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 395356dc4..357ee9dc3 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2384,8 +2384,8 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): # with pytest.raises(ImportError): # w.run(lambda: __import__("mock_local_file")) + imported_objs = [] for o in [mod, local_function, LocalT, LocalClass]: - imported_objs = [] try: w.run(lambda: o) except Exception: From 43210dc32fec846c5b2455013a1f1a656a871d91 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 14 Jul 2021 00:30:01 +0100 Subject: [PATCH 36/70] CI again --- tests/cloudpickle_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 357ee9dc3..83f81a0eb 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2392,7 +2392,6 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): continue else: imported_objs.append(o) - __import__('pdb').set_trace() if imported_objs != []: raise ValueError(imported_objs) From 2fdd912e35d6743c19cdb593a9717af80bdb9981 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 14 Jul 2021 00:34:07 +0100 Subject: [PATCH 37/70] CI again --- tests/cloudpickle_test.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 83f81a0eb..e2f2bce1e 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2393,8 +2393,13 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): else: imported_objs.append(o) - if imported_objs != []: - raise ValueError(imported_objs) + if imported_objs != [] or True: + raise ValueError( + [*imported_objs, + w.run(lambda: __import__("mock_local_file").__path__), + w.run(lambda: __import__("sys").path) + ] + ) for o in [mod, local_function, LocalT, LocalClass]: with pytest.raises(ImportError): From 03c82cb22e59f05c3cf0394346cc6da68b3207bd Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 14 Jul 2021 00:36:02 +0100 Subject: [PATCH 38/70] CI again --- tests/cloudpickle_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index e2f2bce1e..4d390ac2a 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2375,6 +2375,7 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): # register_pickle_by_value are functions, classes, TypeVar and # modules. import mock_local_file as mod + raise ValueError(cloudpickle.dumps(mod)) from mock_local_file import local_function, LocalT, LocalClass with subprocess_worker(protocol=self.protocol) as w: # make the module unavailable in the remote worker From 730c11b3226671ba242b5a77800ca87f2805c05e Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 14 Jul 2021 00:38:39 +0100 Subject: [PATCH 39/70] CI again --- tests/cloudpickle_test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 4d390ac2a..55f8de6ac 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2375,7 +2375,6 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): # register_pickle_by_value are functions, classes, TypeVar and # modules. import mock_local_file as mod - raise ValueError(cloudpickle.dumps(mod)) from mock_local_file import local_function, LocalT, LocalClass with subprocess_worker(protocol=self.protocol) as w: # make the module unavailable in the remote worker @@ -2397,7 +2396,7 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): if imported_objs != [] or True: raise ValueError( [*imported_objs, - w.run(lambda: __import__("mock_local_file").__path__), + w.run(lambda: __import__("mock_local_file").__file__), w.run(lambda: __import__("sys").path) ] ) From 1d97bff66fee94efd9a08b2e47dc981dc7917111 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 14 Jul 2021 00:40:56 +0100 Subject: [PATCH 40/70] CI again --- tests/cloudpickle_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 55f8de6ac..f9e2278ed 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2397,6 +2397,7 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): raise ValueError( [*imported_objs, w.run(lambda: __import__("mock_local_file").__file__), + w.run(lambda: __import__("mock_local_file").__spec__), w.run(lambda: __import__("sys").path) ] ) From aa12ac28c3ba6987e9abc861821de5cc1baee294 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 14 Jul 2021 00:47:06 +0100 Subject: [PATCH 41/70] CI look only at macos ci builds --- .github/workflows/testing.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 6566800e8..048281bba 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -28,7 +28,7 @@ jobs: build: strategy: matrix: - os: [ubuntu-latest, windows-latest, macos-latest] + os: [macos-latest] python_version: [3.6, 3.7, 3.8, 3.9, "3.10-dev", "pypy3"] exclude: # Do not test all minor versions on all platforms, especially if they From 93544c5e16546eb09d35b442a395d2bd267d8adf Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 14 Jul 2021 00:50:19 +0100 Subject: [PATCH 42/70] CI again --- tests/cloudpickle_test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index f9e2278ed..6911f7d0a 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2381,6 +2381,7 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): w.run( lambda p: sys.path.remove(p), _mock_interactive_session_cwd ) + # START OF CI DEBUGGING STATEMENTS # with pytest.raises(ImportError): # w.run(lambda: __import__("mock_local_file")) @@ -2393,7 +2394,7 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): else: imported_objs.append(o) - if imported_objs != [] or True: + if imported_objs != []: raise ValueError( [*imported_objs, w.run(lambda: __import__("mock_local_file").__file__), @@ -2401,6 +2402,7 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): w.run(lambda: __import__("sys").path) ] ) + # END OF CI DEBUGGING STATEMENTS for o in [mod, local_function, LocalT, LocalClass]: with pytest.raises(ImportError): From 2d3d1ed41f69ba865a6dfe4f4c8a1643e776e4db Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 15 Jul 2021 00:25:45 +0100 Subject: [PATCH 43/70] TST: fix isolation procedure on linux --- tests/cloudpickle_test.py | 81 ++++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 40 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 6911f7d0a..581f8a16c 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -86,6 +86,14 @@ def _escape(raw_filepath): return raw_filepath.replace("\\", r"\\\\") +def _maybe_remove(list_, item): + try: + list_.remove(item) + except ValueError: + pass + return list_ + + def test_extract_class_dict(): class A(int): """A docstring""" @@ -2355,54 +2363,47 @@ def __type__(self): pickle_depickle(o, protocol=self.protocol) def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): # noqa + _prev_sys_path = sys.path.copy() try: # We simulate an interactive session that: - # - relies on locally-importable constructs defined in .py files - # located in the directory from which the session was launched. - # - uses these constructs in remote workers that do not have - # access to the files in which the constructs were defined: this - # situation is the justification behind the + # - we start from the /path/to/cloudpickle/tests directory, where a + # local .py file (mock_local_file) is located. + # - uses constructs from mock_local_file in remote workers that do + # not have access to this file. This situation is + # the justification behind the # (un)register_pickle_by_value(module) api that cloudpickle # exposes. - # Simulate an interactive session started from - # /path/to/cloudpickle/tests. + # First, remove sys.path entries that could point to + # /path/to/cloudpickle/tests and be in inherited by the worker + _maybe_remove(sys.path, '') + _maybe_remove(sys.path, os.getcwd()) + + # Add the desired session working directory _mock_interactive_session_cwd = os.path.dirname(__file__) - if _mock_interactive_session_cwd not in sys.path: - sys.path.insert(0, _mock_interactive_session_cwd) - - # The constructs whose pickling mechanism is changed using - # register_pickle_by_value are functions, classes, TypeVar and - # modules. - import mock_local_file as mod - from mock_local_file import local_function, LocalT, LocalClass + sys.path.insert(0, _mock_interactive_session_cwd) + with subprocess_worker(protocol=self.protocol) as w: - # make the module unavailable in the remote worker + # Make the module unavailable in the remote worker w.run( lambda p: sys.path.remove(p), _mock_interactive_session_cwd ) - # START OF CI DEBUGGING STATEMENTS - # with pytest.raises(ImportError): - # w.run(lambda: __import__("mock_local_file")) - - imported_objs = [] - for o in [mod, local_function, LocalT, LocalClass]: - try: - w.run(lambda: o) - except Exception: - continue - else: - imported_objs.append(o) - - if imported_objs != []: - raise ValueError( - [*imported_objs, - w.run(lambda: __import__("mock_local_file").__file__), - w.run(lambda: __import__("mock_local_file").__spec__), - w.run(lambda: __import__("sys").path) - ] - ) - # END OF CI DEBUGGING STATEMENTS + # Import the actual file after starting the module since the + # worker is started using fork on Linux, which will inherits + # the parent sys.modules. On Python>3.6, the worker can be + # started using spawn using mp_context in ProcessPoolExectutor. + # TODO Once Python 3.6 reaches end of life, rely on mp_context + # instead. + import mock_local_file as mod + # The constructs whose pickling mechanism is changed using + # register_pickle_by_value are functions, classes, TypeVar and + # modules. + from mock_local_file import local_function, LocalT, LocalClass + + # Make sure the mdodule/constructs are unimportable in the + # worker. + with pytest.raises(ImportError): + w.run(lambda: __import__("mock_local_file")) for o in [mod, local_function, LocalT, LocalClass]: with pytest.raises(ImportError): @@ -2423,8 +2424,8 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): ) finally: - if _mock_interactive_session_cwd in sys.path: - sys.path.remove(_mock_interactive_session_cwd) + sys.path = _prev_sys_path + sys.modules.pop("mock_local_file", None) if is_registered_pickle_by_value("mock_local_file"): unregister_pickle_by_value("mock_local_file") From fd740f20ab68bf46248ab738eb6d4459bd1e477e Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 15 Jul 2021 00:26:20 +0100 Subject: [PATCH 44/70] CI restore testing for all OSes --- .github/workflows/testing.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 048281bba..6566800e8 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -28,7 +28,7 @@ jobs: build: strategy: matrix: - os: [macos-latest] + os: [ubuntu-latest, windows-latest, macos-latest] python_version: [3.6, 3.7, 3.8, 3.9, "3.10-dev", "pypy3"] exclude: # Do not test all minor versions on all platforms, especially if they From 1794c2e5c93c8db611b84542962b0804cc7934e2 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 15 Jul 2021 00:35:12 +0100 Subject: [PATCH 45/70] TST take into account possisble PYTHONPATH values --- tests/cloudpickle_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 581f8a16c..2502d1fd3 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2373,14 +2373,14 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): # the justification behind the # (un)register_pickle_by_value(module) api that cloudpickle # exposes. + _mock_interactive_session_cwd = os.path.dirname(__file__) # First, remove sys.path entries that could point to # /path/to/cloudpickle/tests and be in inherited by the worker _maybe_remove(sys.path, '') - _maybe_remove(sys.path, os.getcwd()) + _maybe_remove(sys.path, _mock_interactive_session_cwd) # Add the desired session working directory - _mock_interactive_session_cwd = os.path.dirname(__file__) sys.path.insert(0, _mock_interactive_session_cwd) with subprocess_worker(protocol=self.protocol) as w: From a6637d566b32f6ef7e2b7004dd37edee2f0ba9c9 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 15 Jul 2021 01:10:38 +0100 Subject: [PATCH 46/70] API replace is_registered_... by list_registry --- cloudpickle/cloudpickle.py | 13 +++-- tests/cloudpickle_test.py | 48 +++++++++---------- .../mod.py} | 0 3 files changed, 32 insertions(+), 29 deletions(-) rename tests/{mock_local_file.py => mock_local_folder/mod.py} (100%) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 730772a12..9f99430c4 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -140,7 +140,11 @@ def unregister_pickle_by_value(module): _PICKLE_BY_VALUE_MODULES.remove(module_name) -def is_registered_pickle_by_value(module): +def list_registry_pickle_by_value(): + return _PICKLE_BY_VALUE_MODULES.copy() + + +def _is_registered_pickle_by_value(module): module_name = module.__name__ if inspect.ismodule(module) else module if module_name in _PICKLE_BY_VALUE_MODULES: return True @@ -153,6 +157,7 @@ def is_registered_pickle_by_value(module): module_name = parent_name return False + def _whichmodule(obj, name): """Find the module an object belongs to. @@ -208,7 +213,7 @@ def _should_pickle_by_reference(obj, name=None): if module_name is None: return False module, name = module_name - return not is_registered_pickle_by_value(module.__name__) + return not _is_registered_pickle_by_value(module.__name__) elif isinstance(obj, types.ModuleType): # We assume that sys.modules is primarily used as a cache mechanism for @@ -216,7 +221,7 @@ def _should_pickle_by_reference(obj, name=None): # is sys.modules therefore a cheap and simple heuristic to tell us # whether we can assume that a given module could be imported by name # in another Python process. - if is_registered_pickle_by_value(obj.__name__): + if _is_registered_pickle_by_value(obj.__name__): return False return obj.__name__ in sys.modules else: @@ -881,7 +886,7 @@ def _typevar_reduce(obj): if module_and_name is None: return (_make_typevar, _decompose_typevar(obj)) - elif is_registered_pickle_by_value(module_and_name[0].__name__): + elif _is_registered_pickle_by_value(module_and_name[0].__name__): return (_make_typevar, _decompose_typevar(obj)) return (getattr, module_and_name) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 2502d1fd3..2e293c094 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -47,7 +47,8 @@ from cloudpickle.compat import pickle from cloudpickle import register_pickle_by_value from cloudpickle import unregister_pickle_by_value -from cloudpickle import is_registered_pickle_by_value +from cloudpickle import list_registry_pickle_by_value +from cloudpickle.cloudpickle import _is_registered_pickle_by_value from cloudpickle.cloudpickle import _should_pickle_by_reference, _PICKLE_BY_VALUE_MODULES from cloudpickle.cloudpickle import _make_empty_cell, cell_set from cloudpickle.cloudpickle import _extract_class_dict, _whichmodule @@ -2394,22 +2395,24 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): # started using spawn using mp_context in ProcessPoolExectutor. # TODO Once Python 3.6 reaches end of life, rely on mp_context # instead. - import mock_local_file as mod + import mock_local_folder.mod as mod # The constructs whose pickling mechanism is changed using # register_pickle_by_value are functions, classes, TypeVar and # modules. - from mock_local_file import local_function, LocalT, LocalClass + from mock_local_folder.mod import ( + local_function, LocalT, LocalClass + ) - # Make sure the mdodule/constructs are unimportable in the + # Make sure the module/constructs are unimportable in the # worker. with pytest.raises(ImportError): - w.run(lambda: __import__("mock_local_file")) + w.run(lambda: __import__("mock_local_folder.mod")) for o in [mod, local_function, LocalT, LocalClass]: with pytest.raises(ImportError): w.run(lambda: o) - register_pickle_by_value("mock_local_file") + register_pickle_by_value("mock_local_folder.mod") # function assert w.run(lambda: local_function()) == local_function() # typevar @@ -2425,9 +2428,10 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): finally: sys.path = _prev_sys_path - sys.modules.pop("mock_local_file", None) - if is_registered_pickle_by_value("mock_local_file"): - unregister_pickle_by_value("mock_local_file") + for m in ["mock_local_folder", "mock_local_folder.mod"]: + sys.modules.pop(m, None) + if m in list_registry_pickle_by_value(): + unregister_pickle_by_value(m) @pytest.mark.skipif( sys.version_info < (3, 7), @@ -2490,35 +2494,29 @@ def test_lookup_module_and_qualname_stdlib_typevar(): def test_register_pickle_by_value(use_module_name): import _cloudpickle_testpkg + package_name = "_cloudpickle_testpkg" + module_name = "_cloudpickle_testpkg.mod" if use_module_name: - package_ref = "_cloudpickle_testpkg" - module_ref = "_cloudpickle_testpkg.mod" + package_ref = package_name + module_ref = module_name else: package_ref = _cloudpickle_testpkg module_ref = _cloudpickle_testpkg.mod try: - assert not is_registered_pickle_by_value(package_ref) - assert not is_registered_pickle_by_value(module_ref) + assert list_registry_pickle_by_value() == set() register_pickle_by_value(package_ref) - assert is_registered_pickle_by_value(package_ref) - - # registering a package for pickling by value automatically registers - # all of its submodules: - assert is_registered_pickle_by_value(module_ref) - unregister_pickle_by_value(package_ref) + assert list_registry_pickle_by_value() == {package_name} - # registering a submodule for pickling by value should not register - # its parent package register_pickle_by_value(module_ref) - assert is_registered_pickle_by_value(module_ref) - assert not is_registered_pickle_by_value(package_ref) + assert list_registry_pickle_by_value() == {package_name, module_name} unregister_pickle_by_value(module_ref) - assert not is_registered_pickle_by_value(package_ref) - assert not is_registered_pickle_by_value(module_ref) + assert list_registry_pickle_by_value() == {package_name} + unregister_pickle_by_value(package_ref) + assert list_registry_pickle_by_value() == set() finally: _PICKLE_BY_VALUE_MODULES.clear() diff --git a/tests/mock_local_file.py b/tests/mock_local_folder/mod.py similarity index 100% rename from tests/mock_local_file.py rename to tests/mock_local_folder/mod.py From 866f09666d50b683e63a4e8fefc89d26086d44ba Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 15 Jul 2021 01:19:30 +0100 Subject: [PATCH 47/70] CLN remove unused import --- tests/cloudpickle_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 2e293c094..f2cc71e97 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -48,7 +48,6 @@ from cloudpickle import register_pickle_by_value from cloudpickle import unregister_pickle_by_value from cloudpickle import list_registry_pickle_by_value -from cloudpickle.cloudpickle import _is_registered_pickle_by_value from cloudpickle.cloudpickle import _should_pickle_by_reference, _PICKLE_BY_VALUE_MODULES from cloudpickle.cloudpickle import _make_empty_cell, cell_set from cloudpickle.cloudpickle import _extract_class_dict, _whichmodule From 1206827d1a5cc8d69f0f73a875b60739e0f5298a Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 21 Jul 2021 23:26:47 +0100 Subject: [PATCH 48/70] TST add tests invoving namespace modules and subfolders --- tests/cloudpickle_test.py | 77 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 76 insertions(+), 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index f2cc71e97..9e8e2fd06 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2406,6 +2406,10 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): # worker. with pytest.raises(ImportError): w.run(lambda: __import__("mock_local_folder.mod")) + with pytest.raises(ImportError): + w.run( + lambda: __import__("mock_local_folder.subfolder.mod") + ) for o in [mod, local_function, LocalT, LocalClass]: with pytest.raises(ImportError): @@ -2425,9 +2429,80 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): w.run(lambda: mod).local_function() == local_function() ) + # Constructs from modules inside subfolders should be pickled + # by value if a namespace module pointing to some parent folder + # was registered for pickling by value. A "mock_local_folder" + # namespace module falls into that category, but a + # "mock_local_folder.mod" one does not. + from mock_local_folder.subfolder.submod import ( + LocalSubmodClass, LocalSubmodT, local_submod_function + ) + # Shorter aliases to comply with line-length limits + _t, _func, _class = ( + LocalSubmodT, local_submod_function, LocalSubmodClass + ) + with pytest.raises(ImportError): + w.run( + lambda: __import__("mock_local_folder.subfolder.mod") + ) + with pytest.raises(ImportError): + w.run(lambda: local_submod_function) + + unregister_pickle_by_value("mock_local_folder.mod") + + with pytest.raises(ImportError): + w.run(lambda: local_function) + + with pytest.raises(ImportError): + w.run(lambda: __import__("mock_local_folder.mod")) + + # Test the namespace folder case + register_pickle_by_value("mock_local_folder") + assert w.run(lambda: local_function()) == local_function() + assert w.run(lambda: _func()) == _func() + unregister_pickle_by_value("mock_local_folder") + + with pytest.raises(ImportError): + w.run(lambda: local_function) + with pytest.raises(ImportError): + w.run(lambda: local_submod_function) + + # Test the case of registering a single module inside a + # subfolder. + register_pickle_by_value("mock_local_folder.subfolder.submod") + assert w.run(lambda: _func()) == _func() + assert w.run(lambda: _t.__name__) == _t.__name__ + assert w.run(lambda: _class().method()) == _class().method() + + # Registering a module from a subfolder for pickling by value + # should not make constructs from modules from the parent + # folder pickleable + with pytest.raises(ImportError): + w.run(lambda: local_function) + with pytest.raises(ImportError): + w.run(lambda: __import__("mock_local_folder.mod")) + + unregister_pickle_by_value( + "mock_local_folder.subfolder.submod" + ) + with pytest.raises(ImportError): + w.run(lambda: local_submod_function) + + # Test the subfolder namespace module case + register_pickle_by_value("mock_local_folder.subfolder") + assert w.run(lambda: _func()) == _func() + assert w.run(lambda: _t.__name__) == _t.__name__ + assert w.run(lambda: _class().method()) == _class().method() + + unregister_pickle_by_value("mock_local_folder.subfolder") + # TODO: add a test making sure that different version of the + # same function (with possibly mutated globals) can peacefully + # co-exist in the worker. finally: + _fname = "mock_local_folder" sys.path = _prev_sys_path - for m in ["mock_local_folder", "mock_local_folder.mod"]: + for m in [_fname, f"{_fname}.mod", f"{_fname}.subfolder", + f"{_fname}.subfolder.submod"]: sys.modules.pop(m, None) if m in list_registry_pickle_by_value(): unregister_pickle_by_value(m) From 4ee37d91f82528c487ea6a7ad3cfe5997172761d Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 22 Jul 2021 00:06:37 +0100 Subject: [PATCH 49/70] TST test pickling by value installed packages --- tests/cloudpickle_test.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 9e8e2fd06..152c2e49f 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2507,6 +2507,34 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): if m in list_registry_pickle_by_value(): unregister_pickle_by_value(m) + def test_pickle_constructs_from_installed_packages_registered_for_pickling_by_value( # noqa + self + ): + for package_or_module in ["package", "modue"]: + if package_or_module == "package": + m = __import__("_cloudpickle_testpkg") + f = m.package_function_with_global + _original_global = m.global_variable + elif package_or_module == "module": + m = __import__("_cloudpickle_testpkg.mod") + f = m.module_function_with_global + _original_global = m.global_variable + try: + with subprocess_worker(protocol=self.protocol) as w: + assert w.run(lambda: f()) == _original_global + + # Test that f is pickled by value by modifying a global + # variable that f uses, and making sure that this + # modification shows up when calling the function remotely + register_pickle_by_value(m.__name__) + assert w.run(lambda: f()) == _original_global + m.global_variable = "modified global" + assert w.run(lambda: f()) == "modified global" + unregister_pickle_by_value(m.__name__) + finally: + m.global_variable = _original_global + if m.__name__ in list_registry_pickle_by_value(): + unregister_pickle_by_value(m.__name__) @pytest.mark.skipif( sys.version_info < (3, 7), reason="Determinism can only be guaranteed for Python 3.7+" From 4faafbc1c5a39cf0774d235854a4e32f6f561626 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 22 Jul 2021 00:11:43 +0100 Subject: [PATCH 50/70] TST add module inside locally importable subfolder --- tests/mock_local_folder/subfolder/submod.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 tests/mock_local_folder/subfolder/submod.py diff --git a/tests/mock_local_folder/subfolder/submod.py b/tests/mock_local_folder/subfolder/submod.py new file mode 100644 index 000000000..deebc1477 --- /dev/null +++ b/tests/mock_local_folder/subfolder/submod.py @@ -0,0 +1,13 @@ +import typing + + +def local_submod_function(): + return "hello from a file located in a locally-importable subfolder!" + + +class LocalSubmodClass: + def method(self): + return "hello from a class located in a locally-importable subfolder!" + + +LocalSubmodT = typing.TypeVar("LocalSubmodT") From dae05bb4e96a8e7f3f9783edf8109e945a2b39c7 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 22 Jul 2021 00:15:25 +0100 Subject: [PATCH 51/70] TST add funcs with globals in _cloudpickle_testpkg --- .../cloudpickle_testpkg/_cloudpickle_testpkg/__init__.py | 8 ++++++++ tests/cloudpickle_testpkg/_cloudpickle_testpkg/mod.py | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/tests/cloudpickle_testpkg/_cloudpickle_testpkg/__init__.py b/tests/cloudpickle_testpkg/_cloudpickle_testpkg/__init__.py index 595243c26..ea7aeb9e9 100644 --- a/tests/cloudpickle_testpkg/_cloudpickle_testpkg/__init__.py +++ b/tests/cloudpickle_testpkg/_cloudpickle_testpkg/__init__.py @@ -7,6 +7,14 @@ def package_function(): return "hello from a package!" +global_variable = "some global variable" + + +def package_function_with_global(): + global global_variable + return global_variable + + class _SingletonClass(object): def __reduce__(self): # This reducer is only valid for the top level "some_singleton" object. diff --git a/tests/cloudpickle_testpkg/_cloudpickle_testpkg/mod.py b/tests/cloudpickle_testpkg/_cloudpickle_testpkg/mod.py index 02b144c30..e8225a4ca 100644 --- a/tests/cloudpickle_testpkg/_cloudpickle_testpkg/mod.py +++ b/tests/cloudpickle_testpkg/_cloudpickle_testpkg/mod.py @@ -63,3 +63,11 @@ def f(x): def module_function(): return "hello from a module!" + + +global_variable = "some global variable" + + +def module_function_with_global(): + global global_variable + return global_variable From 3d4c896c3fb58b3b8434f0820816177e631c0e48 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 22 Jul 2021 00:20:30 +0100 Subject: [PATCH 52/70] TST remote co-existence of multiple versions of a func --- tests/cloudpickle_test.py | 48 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 152c2e49f..d5ebe3867 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2495,9 +2495,6 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): assert w.run(lambda: _class().method()) == _class().method() unregister_pickle_by_value("mock_local_folder.subfolder") - # TODO: add a test making sure that different version of the - # same function (with possibly mutated globals) can peacefully - # co-exist in the worker. finally: _fname = "mock_local_folder" sys.path = _prev_sys_path @@ -2535,6 +2532,51 @@ def test_pickle_constructs_from_installed_packages_registered_for_pickling_by_va m.global_variable = _original_global if m.__name__ in list_registry_pickle_by_value(): unregister_pickle_by_value(m.__name__) + + def test_pickle_various_versions_of_the_same_function_with_different_pickling_method( # noqa + self + ): + # Make sure that different version of the same function (possibly + # pickled in a different way - by value and/or by reference) can + # peacefully co-exist (e.g. without globals interaction) in a remote + # worker. + import _cloudpickle_testpkg + from _cloudpickle_testpkg import package_function_with_global as f + _original_global = _cloudpickle_testpkg.global_variable + + def _create_registry(): + _main = __import__("sys").modules["__main__"] + _main._cloudpickle_registry = {} + # global _cloudpickle_registry + + def _add_to_registry(v, k): + _main = __import__("sys").modules["__main__"] + _main._cloudpickle_registry[k] = v + + def _call_from_registry(k): + _main = __import__("sys").modules["__main__"] + return _main._cloudpickle_registry[k]() + + try: + with subprocess_worker(protocol=self.protocol) as w: + w.run(_create_registry) + w.run(_add_to_registry, f, "f_by_ref") + + register_pickle_by_value("_cloudpickle_testpkg") + w.run(_add_to_registry, f, "f_by_val") + assert ( + w.run(_call_from_registry, "f_by_ref") == _original_global + ) + assert ( + w.run(_call_from_registry, "f_by_val") == "modified global" + ) + + finally: + _cloudpickle_testpkg.global_variable = _original_global + + if "_cloudpickle_testpkg" in list_registry_pickle_by_value(): + unregister_pickle_by_value("_cloudpickle_testpkg") + @pytest.mark.skipif( sys.version_info < (3, 7), reason="Determinism can only be guaranteed for Python 3.7+" From 4f5942e0079e6ce57791698eb56dc22b7c4c176d Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 22 Jul 2021 00:25:09 +0100 Subject: [PATCH 53/70] TST, FIX some crucial line dissapearing --- tests/cloudpickle_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index d5ebe3867..a315da2ed 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2563,6 +2563,7 @@ def _call_from_registry(k): w.run(_add_to_registry, f, "f_by_ref") register_pickle_by_value("_cloudpickle_testpkg") + _cloudpickle_testpkg.global_variable = "modified global" w.run(_add_to_registry, f, "f_by_val") assert ( w.run(_call_from_registry, "f_by_ref") == _original_global From c50aa4cbda2182c0ea73d66c6795db3c55269876 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 22 Jul 2021 20:50:34 +0100 Subject: [PATCH 54/70] API enforce module-type input for registration api --- cloudpickle/cloudpickle.py | 28 ++++++++++-- tests/cloudpickle_test.py | 91 +++++++++++++++++++++----------------- 2 files changed, 75 insertions(+), 44 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 9f99430c4..27b2c08b0 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -130,14 +130,34 @@ def _lookup_class_or_track(class_tracker_id, class_def): def register_pickle_by_value(module): """Register that the input module should be pickled by value.""" - module_name = module.__name__ if inspect.ismodule(module) else module - _PICKLE_BY_VALUE_MODULES.add(module_name) + if not isinstance(module, types.ModuleType): + raise ValueError( + f"Input should be a module object, got {type(module)} instead" + ) + # cloudpickle needs a way to access any module registered for pickling by + # value when introspecting relative imports inside functions pickled by + # value. Access is ensured by checking that module is present in + # sys.modules at registering time and assuming that it will still be in + # there when accessed during pickling. Another alternative would be to + # store a weakref to the module. + if module.__name__ not in sys.modules: + raise ValueError( + f"{module} was not imported correctly, have you used an " + f"`import` statement to access it?" + ) + _PICKLE_BY_VALUE_MODULES.add(module.__name__) def unregister_pickle_by_value(module): """Unregister that the input module should be pickled by value.""" - module_name = module.__name__ if inspect.ismodule(module) else module - _PICKLE_BY_VALUE_MODULES.remove(module_name) + if not isinstance(module, types.ModuleType): + raise ValueError( + f"Input should be a module object, got {type(module)} instead" + ) + if module.__name__ not in _PICKLE_BY_VALUE_MODULES: + raise ValueError(f"{module} is not registered for pickle by value") + else: + _PICKLE_BY_VALUE_MODULES.remove(module.__name__) def list_registry_pickle_by_value(): diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index a315da2ed..4566cbfc5 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -48,7 +48,7 @@ from cloudpickle import register_pickle_by_value from cloudpickle import unregister_pickle_by_value from cloudpickle import list_registry_pickle_by_value -from cloudpickle.cloudpickle import _should_pickle_by_reference, _PICKLE_BY_VALUE_MODULES +from cloudpickle.cloudpickle import _should_pickle_by_reference from cloudpickle.cloudpickle import _make_empty_cell, cell_set from cloudpickle.cloudpickle import _extract_class_dict, _whichmodule from cloudpickle.cloudpickle import _lookup_module_and_qualname @@ -2415,7 +2415,7 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): with pytest.raises(ImportError): w.run(lambda: o) - register_pickle_by_value("mock_local_folder.mod") + register_pickle_by_value(mod) # function assert w.run(lambda: local_function()) == local_function() # typevar @@ -2448,7 +2448,7 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): with pytest.raises(ImportError): w.run(lambda: local_submod_function) - unregister_pickle_by_value("mock_local_folder.mod") + unregister_pickle_by_value(mod) with pytest.raises(ImportError): w.run(lambda: local_function) @@ -2457,10 +2457,11 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): w.run(lambda: __import__("mock_local_folder.mod")) # Test the namespace folder case - register_pickle_by_value("mock_local_folder") + import mock_local_folder + register_pickle_by_value(mock_local_folder) assert w.run(lambda: local_function()) == local_function() assert w.run(lambda: _func()) == _func() - unregister_pickle_by_value("mock_local_folder") + unregister_pickle_by_value(mock_local_folder) with pytest.raises(ImportError): w.run(lambda: local_function) @@ -2469,7 +2470,8 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): # Test the case of registering a single module inside a # subfolder. - register_pickle_by_value("mock_local_folder.subfolder.submod") + import mock_local_folder.subfolder.submod + register_pickle_by_value(mock_local_folder.subfolder.submod) assert w.run(lambda: _func()) == _func() assert w.run(lambda: _t.__name__) == _t.__name__ assert w.run(lambda: _class().method()) == _class().method() @@ -2483,26 +2485,27 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): w.run(lambda: __import__("mock_local_folder.mod")) unregister_pickle_by_value( - "mock_local_folder.subfolder.submod" + mock_local_folder.subfolder.submod ) with pytest.raises(ImportError): w.run(lambda: local_submod_function) # Test the subfolder namespace module case - register_pickle_by_value("mock_local_folder.subfolder") + import mock_local_folder.subfolder + register_pickle_by_value(mock_local_folder.subfolder) assert w.run(lambda: _func()) == _func() assert w.run(lambda: _t.__name__) == _t.__name__ assert w.run(lambda: _class().method()) == _class().method() - unregister_pickle_by_value("mock_local_folder.subfolder") + unregister_pickle_by_value(mock_local_folder.subfolder) finally: _fname = "mock_local_folder" sys.path = _prev_sys_path for m in [_fname, f"{_fname}.mod", f"{_fname}.subfolder", f"{_fname}.subfolder.submod"]: - sys.modules.pop(m, None) - if m in list_registry_pickle_by_value(): - unregister_pickle_by_value(m) + mod = sys.modules.pop(m, None) + if mod and mod.__name__ in list_registry_pickle_by_value(): + unregister_pickle_by_value(mod) def test_pickle_constructs_from_installed_packages_registered_for_pickling_by_value( # noqa self @@ -2523,15 +2526,15 @@ def test_pickle_constructs_from_installed_packages_registered_for_pickling_by_va # Test that f is pickled by value by modifying a global # variable that f uses, and making sure that this # modification shows up when calling the function remotely - register_pickle_by_value(m.__name__) + register_pickle_by_value(m) assert w.run(lambda: f()) == _original_global m.global_variable = "modified global" assert w.run(lambda: f()) == "modified global" - unregister_pickle_by_value(m.__name__) + unregister_pickle_by_value(m) finally: m.global_variable = _original_global if m.__name__ in list_registry_pickle_by_value(): - unregister_pickle_by_value(m.__name__) + unregister_pickle_by_value(m) def test_pickle_various_versions_of_the_same_function_with_different_pickling_method( # noqa self @@ -2562,7 +2565,7 @@ def _call_from_registry(k): w.run(_create_registry) w.run(_add_to_registry, f, "f_by_ref") - register_pickle_by_value("_cloudpickle_testpkg") + register_pickle_by_value(_cloudpickle_testpkg) _cloudpickle_testpkg.global_variable = "modified global" w.run(_add_to_registry, f, "f_by_val") assert ( @@ -2576,7 +2579,7 @@ def _call_from_registry(k): _cloudpickle_testpkg.global_variable = _original_global if "_cloudpickle_testpkg" in list_registry_pickle_by_value(): - unregister_pickle_by_value("_cloudpickle_testpkg") + unregister_pickle_by_value(_cloudpickle_testpkg) @pytest.mark.skipif( sys.version_info < (3, 7), @@ -2635,35 +2638,43 @@ def test_lookup_module_and_qualname_stdlib_typevar(): assert name == 'AnyStr' -@pytest.mark.parametrize("use_module_name", [True, False]) -def test_register_pickle_by_value(use_module_name): - import _cloudpickle_testpkg +def test_register_pickle_by_value(): + import _cloudpickle_testpkg as pkg + import _cloudpickle_testpkg.mod as mod - package_name = "_cloudpickle_testpkg" - module_name = "_cloudpickle_testpkg.mod" - if use_module_name: - package_ref = package_name - module_ref = module_name - else: - package_ref = _cloudpickle_testpkg - module_ref = _cloudpickle_testpkg.mod + assert list_registry_pickle_by_value() == set() - try: - assert list_registry_pickle_by_value() == set() + register_pickle_by_value(pkg) + assert list_registry_pickle_by_value() == {pkg.__name__} - register_pickle_by_value(package_ref) - assert list_registry_pickle_by_value() == {package_name} + register_pickle_by_value(mod) + assert list_registry_pickle_by_value() == {pkg.__name__, mod.__name__} - register_pickle_by_value(module_ref) - assert list_registry_pickle_by_value() == {package_name, module_name} + unregister_pickle_by_value(mod) + assert list_registry_pickle_by_value() == {pkg.__name__} - unregister_pickle_by_value(module_ref) - assert list_registry_pickle_by_value() == {package_name} + msg = f"Input should be a module object, got {str} instead" + with pytest.raises(ValueError, match=msg): + unregister_pickle_by_value(pkg.__name__) - unregister_pickle_by_value(package_ref) - assert list_registry_pickle_by_value() == set() - finally: - _PICKLE_BY_VALUE_MODULES.clear() + unregister_pickle_by_value(pkg) + assert list_registry_pickle_by_value() == set() + + msg = f"{pkg} is not registered for pickle by value" + with pytest.raises(ValueError, match=msg): + unregister_pickle_by_value(pkg) + + msg = f"Input should be a module object, got {str} instead" + with pytest.raises(ValueError, match=msg): + register_pickle_by_value(pkg.__name__) + + dynamic_mod = types.ModuleType('dynamic_mod') + msg = ( + f"{dynamic_mod} was not imported correctly, have you used an " + f"`import` statement to access it?" + ) + with pytest.raises(ValueError, match=msg): + register_pickle_by_value(dynamic_mod) def _all_types_to_test(): From 584320564b9c34cee380745368347e119370d991 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 22 Jul 2021 20:56:18 +0100 Subject: [PATCH 55/70] TST (try to) escape backslashes on windows --- tests/cloudpickle_test.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 4566cbfc5..942551e85 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2653,25 +2653,25 @@ def test_register_pickle_by_value(): unregister_pickle_by_value(mod) assert list_registry_pickle_by_value() == {pkg.__name__} - msg = f"Input should be a module object, got {str} instead" + msg = fr"Input should be a module object, got {str} instead" with pytest.raises(ValueError, match=msg): unregister_pickle_by_value(pkg.__name__) unregister_pickle_by_value(pkg) assert list_registry_pickle_by_value() == set() - msg = f"{pkg} is not registered for pickle by value" + msg = fr"{pkg} is not registered for pickle by value" with pytest.raises(ValueError, match=msg): unregister_pickle_by_value(pkg) - msg = f"Input should be a module object, got {str} instead" + msg = fr"Input should be a module object, got {str} instead" with pytest.raises(ValueError, match=msg): register_pickle_by_value(pkg.__name__) dynamic_mod = types.ModuleType('dynamic_mod') msg = ( - f"{dynamic_mod} was not imported correctly, have you used an " - f"`import` statement to access it?" + fr"{dynamic_mod} was not imported correctly, have you used an " + fr"`import` statement to access it?" ) with pytest.raises(ValueError, match=msg): register_pickle_by_value(dynamic_mod) From 5a2b25beb56545218349cc8e92521c0b4e17fef7 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 22 Jul 2021 21:01:20 +0100 Subject: [PATCH 56/70] TST (try to) escape backslashes on windows (2) --- tests/cloudpickle_test.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 942551e85..23b926bec 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -13,6 +13,7 @@ import pickletools import platform import random +import re import shutil import subprocess import sys @@ -2653,27 +2654,27 @@ def test_register_pickle_by_value(): unregister_pickle_by_value(mod) assert list_registry_pickle_by_value() == {pkg.__name__} - msg = fr"Input should be a module object, got {str} instead" + msg = f"Input should be a module object, got {str} instead" with pytest.raises(ValueError, match=msg): unregister_pickle_by_value(pkg.__name__) unregister_pickle_by_value(pkg) assert list_registry_pickle_by_value() == set() - msg = fr"{pkg} is not registered for pickle by value" - with pytest.raises(ValueError, match=msg): + msg = f"{pkg} is not registered for pickle by value" + with pytest.raises(ValueError, match=re.escape(msg)): unregister_pickle_by_value(pkg) - msg = fr"Input should be a module object, got {str} instead" + msg = f"Input should be a module object, got {str} instead" with pytest.raises(ValueError, match=msg): register_pickle_by_value(pkg.__name__) dynamic_mod = types.ModuleType('dynamic_mod') msg = ( - fr"{dynamic_mod} was not imported correctly, have you used an " - fr"`import` statement to access it?" + f"{dynamic_mod} was not imported correctly, have you used an " + f"`import` statement to access it?" ) - with pytest.raises(ValueError, match=msg): + with pytest.raises(ValueError, match=re.escape(msg)): register_pickle_by_value(dynamic_mod) From 2b82f4d086a0a02f8d41702652913844a7f62613 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 22 Jul 2021 22:58:50 +0100 Subject: [PATCH 57/70] CLN update README after API change --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 0f3524e86..cd35b04c4 100644 --- a/README.md +++ b/README.md @@ -90,7 +90,6 @@ is useful when the said module is unavailable in the unpickling environment >>> import cloudpickle >>> import my_module >>> cloudpickle.register_pickle_by_value(my_module) ->>> # cloudpickle.register_pickle_by_value("my_module") also works >>> cloudpickle.dumps(my_module.my_function) # my_function is pickled by value >>> cloudpickle.unregister_pickle_by_value(my_module) >>> cloudpickle.dumps(my_module.my_function) # my_function is pickled by reference From 058c5373580e931e57a7f6bff90087b169e367f3 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 22 Jul 2021 22:59:55 +0100 Subject: [PATCH 58/70] TST fix typo in test --- tests/cloudpickle_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 23b926bec..c6f3e7921 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2511,13 +2511,13 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): def test_pickle_constructs_from_installed_packages_registered_for_pickling_by_value( # noqa self ): - for package_or_module in ["package", "modue"]: + for package_or_module in ["package", "module"]: if package_or_module == "package": - m = __import__("_cloudpickle_testpkg") + import _cloudpickle_testpkg as m f = m.package_function_with_global _original_global = m.global_variable elif package_or_module == "module": - m = __import__("_cloudpickle_testpkg.mod") + import _cloudpickle_testpkg.mod as m f = m.module_function_with_global _original_global = m.global_variable try: From 850be6caa2f9410ac84f1eb8c53cb55b6cc6c67b Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 22 Jul 2021 23:02:50 +0100 Subject: [PATCH 59/70] CLN remove stale file --- tests/external_two.py | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 tests/external_two.py diff --git a/tests/external_two.py b/tests/external_two.py deleted file mode 100644 index 064585be9..000000000 --- a/tests/external_two.py +++ /dev/null @@ -1,2 +0,0 @@ -def function_from_external_two(): - return "" From 749a6b720bea4592a322fcda532d8e210cc2225a Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 22 Jul 2021 23:06:09 +0100 Subject: [PATCH 60/70] CLN clean up some un-necessary branches --- cloudpickle/cloudpickle.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 27b2c08b0..b58273bed 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -227,12 +227,11 @@ def _whichmodule(obj, name): def _should_pickle_by_reference(obj, name=None): """Dispatcher utility to test whether an object should be serialised by value or reference by using importability as a proxy.""" - if (isinstance(obj, types.FunctionType) or issubclass(type(obj), type) or - isinstance(obj, typing.TypeVar)): - module_name = _lookup_module_and_qualname(obj, name=name) - if module_name is None: + if isinstance(obj, types.FunctionType) or issubclass(type(obj), type): + module_and_name = _lookup_module_and_qualname(obj, name=name) + if module_and_name is None: return False - module, name = module_name + module, name = module_and_name return not _is_registered_pickle_by_value(module.__name__) elif isinstance(obj, types.ModuleType): From c5bc41e5a03bf19159718f0374cc73f9a1891177 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 22 Jul 2021 23:12:03 +0100 Subject: [PATCH 61/70] postpone relative imports handling to the future --- cloudpickle/cloudpickle.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index b58273bed..de288ef2f 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -134,12 +134,16 @@ def register_pickle_by_value(module): raise ValueError( f"Input should be a module object, got {type(module)} instead" ) - # cloudpickle needs a way to access any module registered for pickling by - # value when introspecting relative imports inside functions pickled by - # value. Access is ensured by checking that module is present in + # In the future, cloudpickle may need a way to access any module registered + # for pickling by value in order to introspect relative imports inside + # functions pickled by value. (see + # https://github.com/cloudpipe/cloudpickle/pull/417#issuecomment-873684633). + # This access can be ensured by checking that module is present in # sys.modules at registering time and assuming that it will still be in # there when accessed during pickling. Another alternative would be to - # store a weakref to the module. + # store a weakref to the module. Even though cloudpickle does not implement + # this introspection yet, in order to avoid a possible breaking change + # later, we still enforce the presence of module inside sys.modules. if module.__name__ not in sys.modules: raise ValueError( f"{module} was not imported correctly, have you used an " From 6ef5ec84fbfd57d474e9011d9e70c6ecfc44d11e Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Sat, 31 Jul 2021 16:54:10 +0100 Subject: [PATCH 62/70] _is_registered_pickle_by_value should take a module --- cloudpickle/cloudpickle.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index de288ef2f..52970ec91 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -169,7 +169,7 @@ def list_registry_pickle_by_value(): def _is_registered_pickle_by_value(module): - module_name = module.__name__ if inspect.ismodule(module) else module + module_name = module.__name__ if module_name in _PICKLE_BY_VALUE_MODULES: return True while True: @@ -236,7 +236,7 @@ def _should_pickle_by_reference(obj, name=None): if module_and_name is None: return False module, name = module_and_name - return not _is_registered_pickle_by_value(module.__name__) + return not _is_registered_pickle_by_value(module) elif isinstance(obj, types.ModuleType): # We assume that sys.modules is primarily used as a cache mechanism for @@ -244,7 +244,7 @@ def _should_pickle_by_reference(obj, name=None): # is sys.modules therefore a cheap and simple heuristic to tell us # whether we can assume that a given module could be imported by name # in another Python process. - if _is_registered_pickle_by_value(obj.__name__): + if _is_registered_pickle_by_value(obj): return False return obj.__name__ in sys.modules else: @@ -909,7 +909,7 @@ def _typevar_reduce(obj): if module_and_name is None: return (_make_typevar, _decompose_typevar(obj)) - elif _is_registered_pickle_by_value(module_and_name[0].__name__): + elif _is_registered_pickle_by_value(module_and_name[0]): return (_make_typevar, _decompose_typevar(obj)) return (getattr, module_and_name) From 8004bae7237bc95d7c2933e8fc909b03210a6eb6 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Sat, 31 Jul 2021 17:02:26 +0100 Subject: [PATCH 63/70] CLN cleaner registration API error messages --- cloudpickle/cloudpickle.py | 4 ++-- tests/cloudpickle_test.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 52970ec91..ad0672e07 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -132,7 +132,7 @@ def register_pickle_by_value(module): """Register that the input module should be pickled by value.""" if not isinstance(module, types.ModuleType): raise ValueError( - f"Input should be a module object, got {type(module)} instead" + f"Input should be a module object, got {str(module)} instead" ) # In the future, cloudpickle may need a way to access any module registered # for pickling by value in order to introspect relative imports inside @@ -156,7 +156,7 @@ def unregister_pickle_by_value(module): """Unregister that the input module should be pickled by value.""" if not isinstance(module, types.ModuleType): raise ValueError( - f"Input should be a module object, got {type(module)} instead" + f"Input should be a module object, got {str(module)} instead" ) if module.__name__ not in _PICKLE_BY_VALUE_MODULES: raise ValueError(f"{module} is not registered for pickle by value") diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index c6f3e7921..e619bdc64 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2654,7 +2654,7 @@ def test_register_pickle_by_value(): unregister_pickle_by_value(mod) assert list_registry_pickle_by_value() == {pkg.__name__} - msg = f"Input should be a module object, got {str} instead" + msg = f"Input should be a module object, got {pkg.__name__} instead" with pytest.raises(ValueError, match=msg): unregister_pickle_by_value(pkg.__name__) @@ -2665,7 +2665,7 @@ def test_register_pickle_by_value(): with pytest.raises(ValueError, match=re.escape(msg)): unregister_pickle_by_value(pkg) - msg = f"Input should be a module object, got {str} instead" + msg = f"Input should be a module object, got {pkg.__name__} instead" with pytest.raises(ValueError, match=msg): register_pickle_by_value(pkg.__name__) From 418b848bfc605e0f2a104ba70bd38551c3de6253 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Sat, 31 Jul 2021 17:04:30 +0100 Subject: [PATCH 64/70] CLN unused import --- cloudpickle/cloudpickle.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index ad0672e07..daae48839 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -45,7 +45,6 @@ import builtins import dis import opcode -import inspect import platform import sys import types From 79a38fe1cd9be4b2515aac88cbaccc412690f5e3 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Sat, 31 Jul 2021 17:10:24 +0100 Subject: [PATCH 65/70] Apply docs suggestions Co-authored-by: Olivier Grisel --- cloudpickle/cloudpickle.py | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index daae48839..95b3ef48c 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -128,7 +128,24 @@ def _lookup_class_or_track(class_tracker_id, class_def): def register_pickle_by_value(module): - """Register that the input module should be pickled by value.""" + """Register a module to make it functions and classes picklable by value. + + By default, functions and classes that are attributes of an importable + module are to be pickled by reference, that is relying on re-importing + the attribute from the module at load time. + + If `register_pickle_by_value(module)` is called, all its functions and + classes are subsequently to be pickled by value, meaning that they can + be loaded in Python processes where the module is not importable. + + This is especially useful when developing a module in a distributed + execution environment: restarting the client Python process with the new + source code is enough: there is no need to re-install the new version + of the module on all the worker nodes nor to restart the workers. + + Note: this feature is considered experimental. See the cloudpickle + README.md file for more details and limitations. + """ if not isinstance(module, types.ModuleType): raise ValueError( f"Input should be a module object, got {str(module)} instead" @@ -228,8 +245,19 @@ def _whichmodule(obj, name): def _should_pickle_by_reference(obj, name=None): - """Dispatcher utility to test whether an object should be serialised by - value or reference by using importability as a proxy.""" + """Test whether an function or a class should be pickled by reference + + Pickling by reference means by that the object (typically a function + or a class) is an attribute of a module that is assumed to be importable + in the target Python environment. Loading will therefore rely on importing + the module and then calling `getattr` on it to access the function or class. + + Pickling by reference is the only option to pickle functions and classes in the + standard library. In cloudpickle the alternative option is to pickle by value (for + instance for interactively or locally defined functions and classes or for + attributes of modules that have been explicitly registered to be pickled by + value. + """ if isinstance(obj, types.FunctionType) or issubclass(type(obj), type): module_and_name = _lookup_module_and_qualname(obj, name=name) if module_and_name is None: @@ -241,7 +269,7 @@ def _should_pickle_by_reference(obj, name=None): # We assume that sys.modules is primarily used as a cache mechanism for # the Python import machinery. Checking if a module has been added in # is sys.modules therefore a cheap and simple heuristic to tell us - # whether we can assume that a given module could be imported by name + # whether we can assume that a given module could be imported by name # in another Python process. if _is_registered_pickle_by_value(obj): return False From 4f9af388eaae74d64517225a580a853f75f9bdac Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Sat, 31 Jul 2021 17:10:44 +0100 Subject: [PATCH 66/70] Update tests/cloudpickle_test.py Co-authored-by: Olivier Grisel --- tests/cloudpickle_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index e619bdc64..5796474b8 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2540,7 +2540,7 @@ def test_pickle_constructs_from_installed_packages_registered_for_pickling_by_va def test_pickle_various_versions_of_the_same_function_with_different_pickling_method( # noqa self ): - # Make sure that different version of the same function (possibly + # Make sure that different versions of the same function (possibly # pickled in a different way - by value and/or by reference) can # peacefully co-exist (e.g. without globals interaction) in a remote # worker. From 5f079b0e29b34bd346759db2a73bcc9dcc7a3277 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Sat, 31 Jul 2021 17:14:17 +0100 Subject: [PATCH 67/70] fix linting errors --- cloudpickle/cloudpickle.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 95b3ef48c..1d1b56be2 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -246,17 +246,17 @@ def _whichmodule(obj, name): def _should_pickle_by_reference(obj, name=None): """Test whether an function or a class should be pickled by reference - - Pickling by reference means by that the object (typically a function - or a class) is an attribute of a module that is assumed to be importable - in the target Python environment. Loading will therefore rely on importing - the module and then calling `getattr` on it to access the function or class. - - Pickling by reference is the only option to pickle functions and classes in the - standard library. In cloudpickle the alternative option is to pickle by value (for - instance for interactively or locally defined functions and classes or for - attributes of modules that have been explicitly registered to be pickled by - value. + + Pickling by reference means by that the object (typically a function or a + class) is an attribute of a module that is assumed to be importable in the + target Python environment. Loading will therefore rely on importing the + module and then calling `getattr` on it to access the function or class. + + Pickling by reference is the only option to pickle functions and classes + in the standard library. In cloudpickle the alternative option is to + pickle by value (for instance for interactively or locally defined + functions and classes or for attributes of modules that have been + explicitly registered to be pickled by value. """ if isinstance(obj, types.FunctionType) or issubclass(type(obj), type): module_and_name = _lookup_module_and_qualname(obj, name=name) From 0589dcba6456f72e92264556c195a0c96befe2f3 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Sat, 31 Jul 2021 17:15:26 +0100 Subject: [PATCH 68/70] more linting... --- cloudpickle/cloudpickle.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 1d1b56be2..347b38695 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -129,20 +129,20 @@ def _lookup_class_or_track(class_tracker_id, class_def): def register_pickle_by_value(module): """Register a module to make it functions and classes picklable by value. - + By default, functions and classes that are attributes of an importable module are to be pickled by reference, that is relying on re-importing the attribute from the module at load time. - + If `register_pickle_by_value(module)` is called, all its functions and classes are subsequently to be pickled by value, meaning that they can be loaded in Python processes where the module is not importable. - + This is especially useful when developing a module in a distributed execution environment: restarting the client Python process with the new source code is enough: there is no need to re-install the new version of the module on all the worker nodes nor to restart the workers. - + Note: this feature is considered experimental. See the cloudpickle README.md file for more details and limitations. """ From e4ca37dae797ae52fca142f44c1b674b2e103bef Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Sat, 31 Jul 2021 18:18:06 +0100 Subject: [PATCH 69/70] TST fix a few test mistakes --- tests/cloudpickle_test.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 5796474b8..d2acfb711 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2422,12 +2422,13 @@ def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): # typevar assert w.run(lambda: LocalT.__name__) == LocalT.__name__ # classes - assert w.run( - lambda: LocalClass().method() == LocalClass().method() + assert ( + w.run(lambda: LocalClass().method()) + == LocalClass().method() ) # modules assert ( - w.run(lambda: mod).local_function() == local_function() + w.run(lambda: mod.local_function()) == local_function() ) # Constructs from modules inside subfolders should be pickled @@ -2530,6 +2531,7 @@ def test_pickle_constructs_from_installed_packages_registered_for_pickling_by_va register_pickle_by_value(m) assert w.run(lambda: f()) == _original_global m.global_variable = "modified global" + assert m.global_variable != _original_global assert w.run(lambda: f()) == "modified global" unregister_pickle_by_value(m) finally: From 1e9a48dc7ccf3885d4ae2be03069a3c67b611583 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Sat, 31 Jul 2021 18:18:22 +0100 Subject: [PATCH 70/70] DOC more in-depth context description in README --- README.md | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index cd35b04c4..731fe0abc 100644 --- a/README.md +++ b/README.md @@ -72,19 +72,24 @@ Overriding pickle's serialization mechanism for importable constructs: An important difference between `cloudpickle` and `pickle` is that `cloudpickle` can serialize a function or class **by value**, whereas `pickle` -can only serialize it **by reference**, e.g. by serializing its *module -attribute path* (such as `my_module.my_function`). - -By default, `cloudpickle` only uses serialization by value in cases where -serialization by reference is usually ineffective, for example when the -function/class to be pickled was constructed in an interactive Python session. - -Since `cloudpickle 1.7.0`, it is possible to extend the use of serialization by -value to functions or classes coming from **any pure Python module**. This feature -is useful when the said module is unavailable in the unpickling environment -(making traditional serialization by reference ineffective). To this end, -`cloudpickle` exposes the -`register_pickle_by_value`/`unregister_pickle_by_value` functions: +can only serialize it **by reference**. Serialization by reference treats +functions and classes as attributes of modules, and pickles them through +instructions that trigger the import of their module at load time. +Serialization by reference is thus limited in that it assumes that the module +containing the function or class is available/importable in the unpickling +environment. This assumption breaks when pickling constructs defined in an +interactive session, a case that is automatically detected by `cloudpickle`, +that pickles such constructs **by value**. + +Another case where the importability assumption is expected to break is when +developing a module in a distributed execution environment: the worker +processes may not have access to the said module, for example if they live on a +different machine than the process in which the module is being developed. +By itself, `cloudpickle` cannot detect such "locally importable" modules and +switch to serialization by value; instead, it relies on its default mode, +which is serialization by reference. However, since `cloudpickle 1.7.0`, one +can explicitly specify modules for which serialization by value should be used, +using the `register_pickle_by_value(module)`/`/unregister_pickle(module)` API: ```python >>> import cloudpickle @@ -95,6 +100,10 @@ is useful when the said module is unavailable in the unpickling environment >>> cloudpickle.dumps(my_module.my_function) # my_function is pickled by reference ``` +Using this API, there is no need to re-install the new version of the module on +all the worker nodes nor to restart the workers: restarting the client Python +process with the new source code is enough. + Note that this feature is still **experimental**, and may fail in the following situations: