From d3c1f46686a855a247f93b53493160257f2fbac6 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Mon, 13 Nov 2017 15:35:56 +0100 Subject: [PATCH 01/11] New tests for interactively defined functions --- tests/cloudpickle_test.py | 52 ++++++++++++++++++++++++++++++++++++--- tests/testutils.py | 49 ++++++++++++++++++++++++++++++++---- 2 files changed, 93 insertions(+), 8 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index efe0ebff0..5497cf1dd 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -44,6 +44,7 @@ from cloudpickle.cloudpickle import _find_module, _make_empty_cell, cell_set from .testutils import subprocess_pickle_echo +from .testutils import assert_run_python_script HAVE_WEAKSET = hasattr(weakref, 'WeakSet') @@ -287,19 +288,21 @@ def some_method(self, x): clone_class = pickle_depickle(SomeClass, protocol=self.protocol) self.assertEqual(clone_class(1).one(), 1) self.assertEqual(clone_class(5).some_method(41), 7) - clone_class = subprocess_pickle_echo(SomeClass) + clone_class = subprocess_pickle_echo(SomeClass, protocol=self.protocol) self.assertEqual(clone_class(5).some_method(41), 7) # pickle the class instances self.assertEqual(pickle_depickle(SomeClass(1)).one(), 1) self.assertEqual(pickle_depickle(SomeClass(5)).some_method(41), 7) - new_instance = subprocess_pickle_echo(SomeClass(5)) + new_instance = subprocess_pickle_echo(SomeClass(5), + protocol=self.protocol) self.assertEqual(new_instance.some_method(41), 7) # pickle the method instances self.assertEqual(pickle_depickle(SomeClass(1).one)(), 1) self.assertEqual(pickle_depickle(SomeClass(5).some_method)(41), 7) - new_method = subprocess_pickle_echo(SomeClass(5).some_method) + new_method = subprocess_pickle_echo(SomeClass(5).some_method, + protocol=self.protocol) self.assertEqual(new_method(41), 7) def test_partial(self): @@ -748,6 +751,49 @@ def test_builtin_type__new__(self): for t in list, tuple, set, frozenset, dict, object: self.assertTrue(pickle_depickle(t.__new__) is t.__new__) + def test_interactively_defined_function(self): + # Check that callables defined in the __main__ module of a Python + # script (or jupyter kernel) can be pickled / unpickled / executed. + code = """\ +from testutils import subprocess_pickle_echo + +CONSTANT = 42 + +class Foo(object): + + def method(self, x): + return x + + +def f1(): + return Foo + + +def f2(x): + return Foo().method(x) + + +def f3(): + return Foo().method(CONSTANT) + + +cloned = subprocess_pickle_echo(lambda x: x**2, protocol={protocol}) +assert cloned(3) == 9 + +cloned = subprocess_pickle_echo(Foo, protocol={protocol}) +assert cloned().method(2) == Foo().method(2) + +cloned = subprocess_pickle_echo(f1, protocol={protocol}) +assert cloned()().method('a') == f1()().method('a') + +cloned = subprocess_pickle_echo(f2, protocol={protocol}) +assert cloned(2) == f2(2) + +cloned = subprocess_pickle_echo(f3, protocol={protocol}) +assert cloned() == f3() + """.format(protocol=self.protocol) + assert_run_python_script(code) + @pytest.mark.skipif(sys.version_info >= (3, 0), reason="hardcoded pickle bytes for 2.7") def test_function_pickle_compat_0_4_0(self): diff --git a/tests/testutils.py b/tests/testutils.py index 110e2f78d..2eca9782e 100644 --- a/tests/testutils.py +++ b/tests/testutils.py @@ -1,7 +1,11 @@ import sys import os +import tempfile from subprocess import Popen +from subprocess import check_output from subprocess import PIPE +from subprocess import STDOUT +from subprocess import CalledProcessError from cloudpickle import dumps from pickle import loads @@ -16,7 +20,7 @@ class TimeoutExpired(Exception): timeout_supported = False -def subprocess_pickle_echo(input_data): +def subprocess_pickle_echo(input_data, protocol=None): """Echo function with a child Python process Pickle the input data into a buffer, send it to a subprocess via @@ -27,8 +31,8 @@ def subprocess_pickle_echo(input_data): [1, 'a', None] """ - pickled_input_data = dumps(input_data) - cmd = [sys.executable, __file__] + pickled_input_data = dumps(input_data, protocol=protocol) + cmd = [sys.executable, __file__] # run then pickle_echo() in __main__ cwd = os.getcwd() proc = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=cwd) try: @@ -48,7 +52,7 @@ def subprocess_pickle_echo(input_data): raise RuntimeError(message) -def pickle_echo(stream_in=None, stream_out=None): +def pickle_echo(stream_in=None, stream_out=None, protocol=None): """Read a pickle from stdin and pickle it back to stdout""" if stream_in is None: stream_in = sys.stdin @@ -64,9 +68,44 @@ def pickle_echo(stream_in=None, stream_out=None): input_bytes = stream_in.read() stream_in.close() unpickled_content = loads(input_bytes) - stream_out.write(dumps(unpickled_content)) + stream_out.write(dumps(unpickled_content, protocol=protocol)) stream_out.close() +def assert_run_python_script(source_code, timeout=5): + """Utility to help check pickleability of objects defined in __main__ + + The script provided in the source code should return 0 and not print + anything on stderr or stdout. + """ + fd, source_file = tempfile.mkstemp(suffix='_src_test_cloudpickle.py') + try: + with open(fd, 'wb') as f: + f.write(source_code.encode('utf-8')) + + cmd = [sys.executable, source_file] + pythonpath = "{cwd}/tests:{cwd}".format(cwd=os.getcwd()) + kwargs = { + 'cwd': os.getcwd(), + 'stderr': STDOUT, + 'env': {'PYTHONPATH': pythonpath}, + } + if timeout_supported: + kwargs['timeout'] = timeout + try: + try: + out = check_output(cmd, **kwargs) + except CalledProcessError as e: + raise RuntimeError(u"script errored with output:\n%s" + % e.output.decode('utf-8')) + if out != b"": + raise AssertionError(out.decode('utf-8')) + except TimeoutExpired as e: + raise RuntimeError(u"script timeout, output so far:\n%s" + % e.output.decode('utf-8')) + finally: + os.unlink(source_file) + + if __name__ == '__main__': pickle_echo() From 8d7c003394fd64cd8be1a2bee4af872090e368c4 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Mon, 13 Nov 2017 19:09:39 +0100 Subject: [PATCH 02/11] More robust non-regression test --- tests/testutils.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/testutils.py b/tests/testutils.py index 2eca9782e..6d7bd704a 100644 --- a/tests/testutils.py +++ b/tests/testutils.py @@ -1,11 +1,8 @@ import sys import os +import os.path as op import tempfile -from subprocess import Popen -from subprocess import check_output -from subprocess import PIPE -from subprocess import STDOUT -from subprocess import CalledProcessError +from subprocess import Popen, check_output, PIPE, STDOUT, CalledProcessError from cloudpickle import dumps from pickle import loads @@ -79,14 +76,17 @@ def assert_run_python_script(source_code, timeout=5): anything on stderr or stdout. """ fd, source_file = tempfile.mkstemp(suffix='_src_test_cloudpickle.py') + os.close(fd) try: - with open(fd, 'wb') as f: + with open(source_file, 'wb') as f: f.write(source_code.encode('utf-8')) + cloudpickle_repo_folder = op.normpath( + op.join(op.dirname(__file__), '..')) cmd = [sys.executable, source_file] - pythonpath = "{cwd}/tests:{cwd}".format(cwd=os.getcwd()) + pythonpath = "{src}/tests:{src}".format(src=cloudpickle_repo_folder) kwargs = { - 'cwd': os.getcwd(), + 'cwd': cloudpickle_repo_folder, 'stderr': STDOUT, 'env': {'PYTHONPATH': pythonpath}, } From 5d2ae3ee7644dcbe7eac0c1cfefafadcecf147ef Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Mon, 13 Nov 2017 20:57:42 +0100 Subject: [PATCH 03/11] Use __file__ as reference instead of os.getcwd() --- tests/testutils.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/testutils.py b/tests/testutils.py index 6d7bd704a..e98113271 100644 --- a/tests/testutils.py +++ b/tests/testutils.py @@ -30,8 +30,13 @@ def subprocess_pickle_echo(input_data, protocol=None): """ pickled_input_data = dumps(input_data, protocol=protocol) cmd = [sys.executable, __file__] # run then pickle_echo() in __main__ - cwd = os.getcwd() - proc = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=cwd) + # cwd = os.getcwd() + cloudpickle_repo_folder = op.normpath( + op.join(op.dirname(__file__), '..')) + cwd = cloudpickle_repo_folder + pythonpath = "{src}/tests:{src}".format(src=cloudpickle_repo_folder) + env = {'PYTHONPATH': pythonpath} + proc = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=cwd, env=env) try: comm_kwargs = {} if timeout_supported: @@ -80,10 +85,9 @@ def assert_run_python_script(source_code, timeout=5): try: with open(source_file, 'wb') as f: f.write(source_code.encode('utf-8')) + cmd = [sys.executable, source_file] cloudpickle_repo_folder = op.normpath( op.join(op.dirname(__file__), '..')) - - cmd = [sys.executable, source_file] pythonpath = "{src}/tests:{src}".format(src=cloudpickle_repo_folder) kwargs = { 'cwd': cloudpickle_repo_folder, From 9443907dc72d5a9402e8b77922c7bfce19f2d0be Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Mon, 13 Nov 2017 21:56:11 +0100 Subject: [PATCH 04/11] Nicer indentation handling for inline code snippet --- tests/cloudpickle_test.py | 43 ++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 5497cf1dd..f7bd53e60 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -755,43 +755,44 @@ def test_interactively_defined_function(self): # Check that callables defined in the __main__ module of a Python # script (or jupyter kernel) can be pickled / unpickled / executed. code = """\ -from testutils import subprocess_pickle_echo + from testutils import subprocess_pickle_echo -CONSTANT = 42 + CONSTANT = 42 -class Foo(object): + class Foo(object): - def method(self, x): - return x + def method(self, x): + return x -def f1(): - return Foo + def f1(): + return Foo -def f2(x): - return Foo().method(x) + def f2(x): + return Foo().method(x) -def f3(): - return Foo().method(CONSTANT) + def f3(): + return Foo().method(CONSTANT) -cloned = subprocess_pickle_echo(lambda x: x**2, protocol={protocol}) -assert cloned(3) == 9 + cloned = subprocess_pickle_echo(lambda x: x**2, protocol={protocol}) + assert cloned(3) == 9 -cloned = subprocess_pickle_echo(Foo, protocol={protocol}) -assert cloned().method(2) == Foo().method(2) + cloned = subprocess_pickle_echo(Foo, protocol={protocol}) + assert cloned().method(2) == Foo().method(2) -cloned = subprocess_pickle_echo(f1, protocol={protocol}) -assert cloned()().method('a') == f1()().method('a') + cloned = subprocess_pickle_echo(f1, protocol={protocol}) + assert cloned()().method('a') == f1()().method('a') -cloned = subprocess_pickle_echo(f2, protocol={protocol}) -assert cloned(2) == f2(2) + cloned = subprocess_pickle_echo(f2, protocol={protocol}) + assert cloned(2) == f2(2) -cloned = subprocess_pickle_echo(f3, protocol={protocol}) -assert cloned() == f3() + cloned = subprocess_pickle_echo(f3, protocol={protocol}) + assert cloned() == f3() """.format(protocol=self.protocol) + code = "\n".join(line[8:] for line in code.splitlines()) assert_run_python_script(code) @pytest.mark.skipif(sys.version_info >= (3, 0), From b7014ded004a92960981791083f576bb67244397 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Mon, 13 Nov 2017 22:05:32 +0100 Subject: [PATCH 05/11] cleanup --- tests/testutils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/testutils.py b/tests/testutils.py index e98113271..a8187baf3 100644 --- a/tests/testutils.py +++ b/tests/testutils.py @@ -30,7 +30,6 @@ def subprocess_pickle_echo(input_data, protocol=None): """ pickled_input_data = dumps(input_data, protocol=protocol) cmd = [sys.executable, __file__] # run then pickle_echo() in __main__ - # cwd = os.getcwd() cloudpickle_repo_folder = op.normpath( op.join(op.dirname(__file__), '..')) cwd = cloudpickle_repo_folder From 99bfb9536ed25fa7786f75f45233d9bbbd4e0c45 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Mon, 13 Nov 2017 23:10:11 +0100 Subject: [PATCH 06/11] More comprehensive interactive function tests --- tests/cloudpickle_test.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index f7bd53e60..dd12ab1c5 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -764,25 +764,30 @@ class Foo(object): def method(self, x): return x + def f0(x): + return x ** 2 def f1(): return Foo - def f2(x): return Foo().method(x) - def f3(): return Foo().method(CONSTANT) - cloned = subprocess_pickle_echo(lambda x: x**2, protocol={protocol}) assert cloned(3) == 9 + cloned = subprocess_pickle_echo(f0, protocol={protocol}) + assert cloned(3) == 9 + cloned = subprocess_pickle_echo(Foo, protocol={protocol}) assert cloned().method(2) == Foo().method(2) + cloned = subprocess_pickle_echo(Foo(), protocol={protocol}) + assert cloned.method(2) == Foo().method(2) + cloned = subprocess_pickle_echo(f1, protocol={protocol}) assert cloned()().method('a') == f1()().method('a') @@ -792,8 +797,7 @@ def f3(): cloned = subprocess_pickle_echo(f3, protocol={protocol}) assert cloned() == f3() """.format(protocol=self.protocol) - code = "\n".join(line[8:] for line in code.splitlines()) - assert_run_python_script(code) + assert_run_python_script(textwrap.dedent(code)) @pytest.mark.skipif(sys.version_info >= (3, 0), reason="hardcoded pickle bytes for 2.7") From acf5edd4e72a50c78e14ace9646913ef455757c2 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Mon, 13 Nov 2017 23:11:00 +0100 Subject: [PATCH 07/11] Fix interactive function pickling --- cloudpickle/cloudpickle.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 598834ee6..d9e61cb73 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -629,11 +629,15 @@ def save_global(self, obj, name=None, pack=struct.pack): dispatched here. """ try: - return Pickler.save_global(self, obj, name=name) + if obj.__module__ == "__main__": + raise ValueError("%r is defined in __main__" % obj) + else: + return Pickler.save_global(self, obj, name=name) except Exception: if obj.__module__ == "__builtin__" or obj.__module__ == "builtins": if obj in _BUILTIN_TYPE_NAMES: - return self.save_reduce(_builtin_type, (_BUILTIN_TYPE_NAMES[obj],), obj=obj) + return self.save_reduce( + _builtin_type, (_BUILTIN_TYPE_NAMES[obj],), obj=obj) typ = type(obj) if typ is not obj and isinstance(obj, (type, types.ClassType)): From 7da07a31188d36fa4eda90c098982beb1874cfb1 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Tue, 14 Nov 2017 08:38:26 +0100 Subject: [PATCH 08/11] Simpler fix --- cloudpickle/cloudpickle.py | 8 ++++---- tests/cloudpickle_test.py | 8 ++++++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index d9e61cb73..a185a4e4b 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -628,11 +628,11 @@ def save_global(self, obj, name=None, pack=struct.pack): The name of this method is somewhat misleading: all types get dispatched here. """ + if obj.__module__ == "__main__": + return self.save_dynamic_class(obj) + try: - if obj.__module__ == "__main__": - raise ValueError("%r is defined in __main__" % obj) - else: - return Pickler.save_global(self, obj, name=name) + return Pickler.save_global(self, obj, name=name) except Exception: if obj.__module__ == "__builtin__" or obj.__module__ == "builtins": if obj in _BUILTIN_TYPE_NAMES: diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index dd12ab1c5..af208df66 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -764,6 +764,8 @@ class Foo(object): def method(self, x): return x + foo = Foo() + def f0(x): return x ** 2 @@ -776,6 +778,9 @@ def f2(x): def f3(): return Foo().method(CONSTANT) + def f4(x): + return foo.method(x) + cloned = subprocess_pickle_echo(lambda x: x**2, protocol={protocol}) assert cloned(3) == 9 @@ -796,6 +801,9 @@ def f3(): cloned = subprocess_pickle_echo(f3, protocol={protocol}) assert cloned() == f3() + + cloned = subprocess_pickle_echo(f4, protocol={protocol}) + assert cloned(2) == f4(2) """.format(protocol=self.protocol) assert_run_python_script(textwrap.dedent(code)) From d9e02fbdfcd52da1e1020f73e8e1a10b82b60f04 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Tue, 14 Nov 2017 19:32:13 +0100 Subject: [PATCH 09/11] Further simplification of save_global --- cloudpickle/cloudpickle.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index a185a4e4b..e80932909 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -639,11 +639,7 @@ def save_global(self, obj, name=None, pack=struct.pack): return self.save_reduce( _builtin_type, (_BUILTIN_TYPE_NAMES[obj],), obj=obj) - typ = type(obj) - if typ is not obj and isinstance(obj, (type, types.ClassType)): - return self.save_dynamic_class(obj) - - raise + return self.save_dynamic_class(obj) dispatch[type] = save_global dispatch[types.ClassType] = save_global From 20feec27a1ad35d68e284c4f38cbf64290d03071 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 15 Nov 2017 16:59:27 +0100 Subject: [PATCH 10/11] Add entry to CHANGES.md --- CHANGES.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 5d4a5a096..dfd571686 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,11 @@ +0.5.2 +===== + +- Fixed a regression: `AttributeError` when loading pickles that hold a + reference to a dynamically defined class from the `__main__` module. + ([issue #131]( https://github.com/cloudpipe/cloudpickle/issues/131)). + + 0.5.1 ===== From 748e7d224aee032b5105ff29dc2ea1189bbaf3a3 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 15 Nov 2017 17:37:42 +0100 Subject: [PATCH 11/11] Revert "Further simplification of save_global" This reverts commit d9e02fbdfcd52da1e1020f73e8e1a10b82b60f04. --- cloudpickle/cloudpickle.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index e80932909..a185a4e4b 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -639,7 +639,11 @@ def save_global(self, obj, name=None, pack=struct.pack): return self.save_reduce( _builtin_type, (_BUILTIN_TYPE_NAMES[obj],), obj=obj) - return self.save_dynamic_class(obj) + typ = type(obj) + if typ is not obj and isinstance(obj, (type, types.ClassType)): + return self.save_dynamic_class(obj) + + raise dispatch[type] = save_global dispatch[types.ClassType] = save_global