From f56958c67c14f171a1248f2b4af6febbb97b902c Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 23 Aug 2018 17:25:13 +0200 Subject: [PATCH 01/18] WIP try to generalize func base globals handling --- cloudpickle/cloudpickle.py | 11 ++++++----- tests/cloudpickle_test.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index b1107ba92..f3c0590de 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -635,11 +635,12 @@ def extract_func_data(self, func): base_globals = self.globals_ref.get(id(func.__globals__), None) if base_globals is None: - # For functions defined in __main__, use vars(__main__) for - # base_global. This is necessary to share the global variables - # across multiple functions in this module. - if func.__module__ == "__main__": - base_globals = "__main__" + # For functions defined in a well behaved module use + # vars(func.__module__) for base_globals. This is necessary to + # share the global variables across multiple pickled functions from + # this module. + if hasattr(func, '__module__') and func.__module__ is not None: + base_globals = func.__module__ else: base_globals = {} self.globals_ref[id(func.__globals__)] = base_globals diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index f1c3ea55b..534993e2c 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -47,6 +47,9 @@ from .testutils import assert_run_python_script +_TEST_GLOBAL_VARIABLE = "default_value" + + class RaiserOnPickle(object): def __init__(self, exc): @@ -887,6 +890,39 @@ def f1(): clone_func=clone_func) assert_run_python_script(textwrap.dedent(code)) + def test_closure_interacting_with_a_global_variable(self): + global _TEST_GLOBAL_VARIABLE + orig_value = _TEST_GLOBAL_VARIABLE + try: + def f0(): + global _TEST_GLOBAL_VARIABLE + _TEST_GLOBAL_VARIABLE = "changed_by_f0" + + def f1(): + return _TEST_GLOBAL_VARIABLE + + cloned_f0 = cloudpickle.loads(cloudpickle.dumps( + f0, protocol=self.protocol)) + cloned_f1 = cloudpickle.loads(cloudpickle.dumps( + f1, protocol=self.protocol)) + pickled_f1 = cloudpickle.dumps(f1, protocol=self.protocol) + + # Change the value of the global variable + cloned_f0() + + # Ensure that the global variable is the same for another function + result_f1 = cloned_f1() + assert result_f1 == "changed_by_f0", result_f1 + assert f1() == result_f1 + + # Ensure that unpickling the global variable does not change its + # value + result_pickled_f1 = cloudpickle.loads(pickled_f1)() + assert result_pickled_f1 == "changed_by_f0", result_pickled_f1 + finally: + _TEST_GLOBAL_VARIABLE = orig_value + + @pytest.mark.skipif(sys.version_info >= (3, 0), reason="hardcoded pickle bytes for 2.7") def test_function_pickle_compat_0_4_0(self): From d6a040d57242ea289cb94b9254d497d769ed65a7 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 6 Sep 2018 15:52:17 +0200 Subject: [PATCH 02/18] FIX added checking into sys.modules before trying to import base_globals during unpickling, in order to recreate the previous environnement an object lived in, the module under the base_globals name is re-imported. However, it is not always possible, especially if base_globals pointed to a temporary or locally defined module. This PR introduces a fix that checks if the module still exists during unpickling --- cloudpickle/cloudpickle.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index f3c0590de..0aefbe141 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -1091,7 +1091,13 @@ def _make_skel_func(code, cell_count, base_globals=None): if base_globals is None: base_globals = {} elif isinstance(base_globals, str): - base_globals = vars(sys.modules[base_globals]) + if base_globals in sys.modules.keys(): + # check if we can import the previous environment the object lived + # in + base_globals = vars(sys.modules[base_globals]) + else: + base_globals = {} + base_globals['__builtins__'] = __builtins__ closure = ( From 3c8e53668a9178175047f3c5a342f9ec78e31807 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 6 Sep 2018 16:30:33 +0200 Subject: [PATCH 03/18] dummy update to trigger a travis build --- cloudpickle/cloudpickle.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 0aefbe141..f557906bb 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -1092,8 +1092,8 @@ def _make_skel_func(code, cell_count, base_globals=None): base_globals = {} elif isinstance(base_globals, str): if base_globals in sys.modules.keys(): - # check if we can import the previous environment the object lived - # in + # this checks if we can import the previous environment the object + # lived in base_globals = vars(sys.modules[base_globals]) else: base_globals = {} From 8626c8d3af8cb96c35840222cea060c440cd589c Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Fri, 7 Sep 2018 16:48:07 +0200 Subject: [PATCH 04/18] added base_globals caching and fixed python 2 crash two main fixes here: * added a _BASE_GLOBALS_CACHE, that will track a functions globals in the environnement where it is loaded * fixed a python 2 issue due to dynamic_subimport --- cloudpickle/cloudpickle.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index f557906bb..6aed29dd2 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -78,6 +78,9 @@ PY3 = True +# cache that tracks the values of global variables. +_BASE_GLOBALS_CACHE = {} + def _make_cell_set_template_code(): """Get the Python compiler to emit LOAD_FAST(arg); STORE_DEREF @@ -935,7 +938,10 @@ def subimport(name): def dynamic_subimport(name, vars): mod = imp.new_module(name) mod.__dict__.update(vars) - sys.modules[name] = mod + # adding a dynamic module to sys.module can cause recursive imports + # in python 2.x.x because site.Quitter contains sys as one of its globals + if sys.version > '3': + sys.modules[name] = mod return mod @@ -1095,8 +1101,11 @@ def _make_skel_func(code, cell_count, base_globals=None): # this checks if we can import the previous environment the object # lived in base_globals = vars(sys.modules[base_globals]) + elif id(base_globals) in _BASE_GLOBALS_CACHE.keys(): + base_globals = _BASE_GLOBALS_CACHE[id(base_globals)] else: base_globals = {} + _BASE_GLOBALS_CACHE[id(base_globals)] = base_globals base_globals['__builtins__'] = __builtins__ From afcf8d7cf5662182be461d13ed87e1405c7eb306 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Fri, 7 Sep 2018 17:18:58 +0200 Subject: [PATCH 05/18] pep8 --- cloudpickle/cloudpickle.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 6aed29dd2..abe3700e9 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -81,6 +81,7 @@ # cache that tracks the values of global variables. _BASE_GLOBALS_CACHE = {} + def _make_cell_set_template_code(): """Get the Python compiler to emit LOAD_FAST(arg); STORE_DEREF From 2031c605ee43abc5dd337b211111bc03c6ffab14 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 10 Sep 2018 11:46:47 +0200 Subject: [PATCH 06/18] added _BASE_GLOBALS_CACHE test, minor fixes --- cloudpickle/cloudpickle.py | 12 +++++------- tests/cloudpickle_test.py | 33 ++++++++++++++++++++++++++++++--- 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index abe3700e9..a5698b937 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -56,6 +56,7 @@ import traceback import types import weakref +import hashlib # cloudpickle is meant for inter process communication: we expect all @@ -939,10 +940,6 @@ def subimport(name): def dynamic_subimport(name, vars): mod = imp.new_module(name) mod.__dict__.update(vars) - # adding a dynamic module to sys.module can cause recursive imports - # in python 2.x.x because site.Quitter contains sys as one of its globals - if sys.version > '3': - sys.modules[name] = mod return mod @@ -1098,15 +1095,16 @@ def _make_skel_func(code, cell_count, base_globals=None): if base_globals is None: base_globals = {} elif isinstance(base_globals, str): + base_globals_name = base_globals if base_globals in sys.modules.keys(): # this checks if we can import the previous environment the object # lived in base_globals = vars(sys.modules[base_globals]) - elif id(base_globals) in _BASE_GLOBALS_CACHE.keys(): - base_globals = _BASE_GLOBALS_CACHE[id(base_globals)] + elif base_globals in _BASE_GLOBALS_CACHE.keys(): + base_globals = _BASE_GLOBALS_CACHE[base_globals] else: base_globals = {} - _BASE_GLOBALS_CACHE[id(base_globals)] = base_globals + _BASE_GLOBALS_CACHE[base_globals_name] = base_globals base_globals['__builtins__'] = __builtins__ diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 534993e2c..0c7e2f4b6 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -909,11 +909,12 @@ def f1(): # Change the value of the global variable cloned_f0() + assert _TEST_GLOBAL_VARIABLE == "changed_by_f0" # Ensure that the global variable is the same for another function - result_f1 = cloned_f1() - assert result_f1 == "changed_by_f0", result_f1 - assert f1() == result_f1 + result_cloned_f1 = cloned_f1() + assert result_cloned_f1 == "changed_by_f0", result_cloned_f1 + assert f1() == result_cloned_f1 # Ensure that unpickling the global variable does not change its # value @@ -922,6 +923,32 @@ def f1(): finally: _TEST_GLOBAL_VARIABLE = orig_value + def test_function_from_dynamic_module_with_globals_modifications(self): + mod = imp.new_module('mod') + code = ''' + x = 1 + def func_that_relies_on_dynamic_module(): + global x + return x + ''' + exec(textwrap.dedent(code), mod.__dict__) + + # first, dump the module + fileobj = cloudpickle.dumps(mod.func_that_relies_on_dynamic_module) + + first_f = pickle.loads(fileobj) + # change the mod's global variable x + mod.x = 2 + + fileobj = cloudpickle.dumps(mod.func_that_relies_on_dynamic_module) + + mod.x = 1 + + # finally, re-load the dynamic module + new_f = pickle.loads(fileobj) + + assert first_f() == new_f() + @pytest.mark.skipif(sys.version_info >= (3, 0), reason="hardcoded pickle bytes for 2.7") From ec045b3e8e6e19815b52059ecfcbd5d904b68b83 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 10 Sep 2018 14:13:15 +0200 Subject: [PATCH 07/18] changed _BASE_GLOBALS_CACHE to _dynamic_modules --- cloudpickle/cloudpickle.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index a5698b937..27725de9a 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -79,8 +79,8 @@ PY3 = True -# cache that tracks the values of global variables. -_BASE_GLOBALS_CACHE = {} +# caches dynamic modules that are not referenced in sys.modules +_dynamic_modules = {} def _make_cell_set_template_code(): @@ -1096,15 +1096,15 @@ def _make_skel_func(code, cell_count, base_globals=None): base_globals = {} elif isinstance(base_globals, str): base_globals_name = base_globals - if base_globals in sys.modules.keys(): + if base_globals_name in sys.modules.keys(): # this checks if we can import the previous environment the object # lived in - base_globals = vars(sys.modules[base_globals]) - elif base_globals in _BASE_GLOBALS_CACHE.keys(): - base_globals = _BASE_GLOBALS_CACHE[base_globals] + base_globals = vars(sys.modules[base_globals_name]) + elif base_globals_name in _dynamic_modules.keys(): + base_globals = _dynamic_modules[base_globals_name] else: base_globals = {} - _BASE_GLOBALS_CACHE[base_globals_name] = base_globals + _dynamic_modules[base_globals_name] = base_globals base_globals['__builtins__'] = __builtins__ From 26bc7942e060e47eee2065b079d8bd59b37fbcab Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 10 Sep 2018 15:30:43 +0200 Subject: [PATCH 08/18] removed hashlib from imports --- cloudpickle/cloudpickle.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 27725de9a..e7b7290dd 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -56,7 +56,6 @@ import traceback import types import weakref -import hashlib # cloudpickle is meant for inter process communication: we expect all From 8d672a66c0a4eeb8510d525a8e981f0053eab90b Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 10 Sep 2018 18:27:42 +0200 Subject: [PATCH 09/18] dynamic modules test now ran in a separate process this modification was done to prevent side effects, and affectation of cloudpickle's global variable, (here, _dynamic_modules). Now, those global variables are only changed in ephemere child processes --- tests/cloudpickle_test.py | 83 +++++++++++++++++++++++++++++++++------ 1 file changed, 72 insertions(+), 11 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 0c7e2f4b6..742164d23 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -17,6 +17,7 @@ import textwrap import unittest import weakref +import os try: from StringIO import StringIO @@ -924,31 +925,91 @@ def f1(): _TEST_GLOBAL_VARIABLE = orig_value def test_function_from_dynamic_module_with_globals_modifications(self): + """ + this test verifies that: + - any modification in the global variables of a dynamic + module living in a child process won't get overridden + when new object are unpickled in the child's interpreter + + - vice versa, e.g a modification in the parent process does not + override the value of the variable in the child process + + The two cases are equivalent, and here, the second case is tested. + """ + + # first, we create a dynamic module in the parent process mod = imp.new_module('mod') code = ''' x = 1 - def func_that_relies_on_dynamic_module(): + def func_that_relies_on_dynamic_module(v=None): global x + if v is not None: + x = v return x ''' exec(textwrap.dedent(code), mod.__dict__) - # first, dump the module - fileobj = cloudpickle.dumps(mod.func_that_relies_on_dynamic_module) + try: + # simple sanity check on the function's output + assert mod.func_that_relies_on_dynamic_module() == 1 + + # the function of mod is pickled two times, with two different + # values for the global variable x. + + # a child process that sequentially unpickles the + # two functions is then launched - first_f = pickle.loads(fileobj) - # change the mod's global variable x - mod.x = 2 + # once the _first_ function gets unpickled, mod is created and + # tracked in the child environment. Whatever the global variable + # x's value in the second function, it will be overriden by the + # initial value of x in the child environment - fileobj = cloudpickle.dumps(mod.func_that_relies_on_dynamic_module) + with open('first_function.pk', 'wb') as fid: + cloudpickle.dump(mod.func_that_relies_on_dynamic_module, fid) - mod.x = 1 + # change the mod's global variable x + mod.x = 2 + # at this point, mod.func_that_relies_on_dynamic_module() + # returns 2 + assert mod.func_that_relies_on_dynamic_module() == 2 + with open('function_with_modified_globals.pk', 'wb') as fid: + cloudpickle.dump(mod.func_that_relies_on_dynamic_module, fid) - # finally, re-load the dynamic module - new_f = pickle.loads(fileobj) + child_process_code = """ + import pickle - assert first_f() == new_f() + with open('first_function.pk','rb') as fid: + first_f = pickle.load(fid) + # at this point, a module called 'mod' should exist in + # _dynamic_modules. further function loading + # will use the globals living in mod + + assert first_f() == 1 + + # load a function with initial global variable x set to 2 + with open('function_with_modified_globals.pk','rb') as fid: + new_f = pickle.load(fid) + + # assert the initial global got overridden by + # _dynamic_modules + assert new_f()==1 + + # both function's global x should point to the + # same variable. calling first_f('test_value') + # will change this variable, and new_f() should + # return the changed variable + assert first_f('test_value') == 'test_value' + assert new_f() == 'test_value' + """ + + # finally, we execute the code + assert_run_python_script(textwrap.dedent(child_process_code)) + + finally: + # remove the created files + os.unlink('first_function.pk') + os.unlink('function_with_modified_globals.pk') @pytest.mark.skipif(sys.version_info >= (3, 0), reason="hardcoded pickle bytes for 2.7") From cbbb1e11b354db45690e7def9a929848cb55149b Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 10 Sep 2018 18:31:18 +0200 Subject: [PATCH 10/18] typos --- tests/cloudpickle_test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 742164d23..75446a8f9 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -929,10 +929,10 @@ def test_function_from_dynamic_module_with_globals_modifications(self): this test verifies that: - any modification in the global variables of a dynamic module living in a child process won't get overridden - when new object are unpickled in the child's interpreter + when new objects are unpickled in the child's interpreter - vice versa, e.g a modification in the parent process does not - override the value of the variable in the child process + override the value of the variables in the child process The two cases are equivalent, and here, the second case is tested. """ @@ -969,6 +969,7 @@ def func_that_relies_on_dynamic_module(v=None): # change the mod's global variable x mod.x = 2 + # at this point, mod.func_that_relies_on_dynamic_module() # returns 2 assert mod.func_that_relies_on_dynamic_module() == 2 From 71dc5a3186f293f4940eecf4ab1856967ce8a506 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 10 Sep 2018 18:35:08 +0200 Subject: [PATCH 11/18] simpler code when checking base_globals --- cloudpickle/cloudpickle.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index e7b7290dd..30e48b37f 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -1099,10 +1099,8 @@ def _make_skel_func(code, cell_count, base_globals=None): # this checks if we can import the previous environment the object # lived in base_globals = vars(sys.modules[base_globals_name]) - elif base_globals_name in _dynamic_modules.keys(): - base_globals = _dynamic_modules[base_globals_name] else: - base_globals = {} + base_globals = _dynamic_modules.get(base_globals_name, {}) _dynamic_modules[base_globals_name] = base_globals base_globals['__builtins__'] = __builtins__ From 5dc9f147ba7cd2e0bcbffd506b7b10b3b7e495b6 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 10 Sep 2018 18:37:50 +0200 Subject: [PATCH 12/18] changed _dynamic_modules to _dynamic_modules_globals --- cloudpickle/cloudpickle.py | 6 +++--- tests/cloudpickle_test.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 30e48b37f..79404c28f 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -79,7 +79,7 @@ # caches dynamic modules that are not referenced in sys.modules -_dynamic_modules = {} +_dynamic_modules_globals = {} def _make_cell_set_template_code(): @@ -1100,8 +1100,8 @@ def _make_skel_func(code, cell_count, base_globals=None): # lived in base_globals = vars(sys.modules[base_globals_name]) else: - base_globals = _dynamic_modules.get(base_globals_name, {}) - _dynamic_modules[base_globals_name] = base_globals + base_globals = _dynamic_modules_globals.get(base_globals_name, {}) + _dynamic_modules_globals[base_globals_name] = base_globals base_globals['__builtins__'] = __builtins__ diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 75446a8f9..7dd994ac1 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -983,7 +983,7 @@ def func_that_relies_on_dynamic_module(v=None): first_f = pickle.load(fid) # at this point, a module called 'mod' should exist in - # _dynamic_modules. further function loading + # _dynamic_modules_globals. further function loading # will use the globals living in mod assert first_f() == 1 @@ -993,7 +993,7 @@ def func_that_relies_on_dynamic_module(v=None): new_f = pickle.load(fid) # assert the initial global got overridden by - # _dynamic_modules + # _dynamic_modules_globals assert new_f()==1 # both function's global x should point to the From 953563659b9112b0ce164696f1c15fb230210c6c Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Tue, 11 Sep 2018 14:26:13 +0200 Subject: [PATCH 13/18] reverted dynamic modules behavior dynamic modules globals are not stored in a dict. Each time a function from a dynamic module gets unpickled, a new set of globals is created --- CHANGES.md | 5 +++-- cloudpickle/cloudpickle.py | 11 +++------- tests/cloudpickle_test.py | 42 ++++++++++---------------------------- 3 files changed, 17 insertions(+), 41 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index d9743787c..f20135469 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,7 +1,9 @@ - master ====== +- global variables from modules referenced in sys.modules in the child process + now overrides the initial global variables of the pickled function + 0.5.5 ===== @@ -19,7 +21,6 @@ master variables ([issue #187]( https://github.com/cloudpipe/cloudpickle/issues/187)). - 0.5.3 ===== - Fixed a crash in Python 2 when serializing non-hashable instancemethods of built-in diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 79404c28f..987f6580f 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -78,9 +78,6 @@ PY3 = True -# caches dynamic modules that are not referenced in sys.modules -_dynamic_modules_globals = {} - def _make_cell_set_template_code(): """Get the Python compiler to emit LOAD_FAST(arg); STORE_DEREF @@ -1094,14 +1091,12 @@ def _make_skel_func(code, cell_count, base_globals=None): if base_globals is None: base_globals = {} elif isinstance(base_globals, str): - base_globals_name = base_globals - if base_globals_name in sys.modules.keys(): + if sys.modules.get(base_globals, None) is not None: # this checks if we can import the previous environment the object # lived in - base_globals = vars(sys.modules[base_globals_name]) + base_globals = vars(sys.modules[base_globals]) else: - base_globals = _dynamic_modules_globals.get(base_globals_name, {}) - _dynamic_modules_globals[base_globals_name] = base_globals + base_globals = {} base_globals['__builtins__'] = __builtins__ diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 7dd994ac1..0e94b38cc 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -926,25 +926,19 @@ def f1(): def test_function_from_dynamic_module_with_globals_modifications(self): """ - this test verifies that: - - any modification in the global variables of a dynamic - module living in a child process won't get overridden - when new objects are unpickled in the child's interpreter - - - vice versa, e.g a modification in the parent process does not - override the value of the variables in the child process - - The two cases are equivalent, and here, the second case is tested. + unpickling functions coming from a dynamic module should lead to + new global variables being created each time. + Hence, a modification of the globals of one function should not + interact with the globals of another function. + This test validates this behavior. """ # first, we create a dynamic module in the parent process mod = imp.new_module('mod') code = ''' x = 1 - def func_that_relies_on_dynamic_module(v=None): + def func_that_relies_on_dynamic_module(): global x - if v is not None: - x = v return x ''' exec(textwrap.dedent(code), mod.__dict__) @@ -957,12 +951,8 @@ def func_that_relies_on_dynamic_module(v=None): # values for the global variable x. # a child process that sequentially unpickles the - # two functions is then launched - - # once the _first_ function gets unpickled, mod is created and - # tracked in the child environment. Whatever the global variable - # x's value in the second function, it will be overriden by the - # initial value of x in the child environment + # two functions is then launched, and asserts + # the independance of those two global variables with open('first_function.pk', 'wb') as fid: cloudpickle.dump(mod.func_that_relies_on_dynamic_module, fid) @@ -982,26 +972,16 @@ def func_that_relies_on_dynamic_module(v=None): with open('first_function.pk','rb') as fid: first_f = pickle.load(fid) - # at this point, a module called 'mod' should exist in - # _dynamic_modules_globals. further function loading - # will use the globals living in mod - assert first_f() == 1 # load a function with initial global variable x set to 2 with open('function_with_modified_globals.pk','rb') as fid: new_f = pickle.load(fid) - # assert the initial global got overridden by - # _dynamic_modules_globals - assert new_f()==1 + # assert that by first_f's global variable x does not interact + # with new_f's global variable x + assert new_f()==2 - # both function's global x should point to the - # same variable. calling first_f('test_value') - # will change this variable, and new_f() should - # return the changed variable - assert first_f('test_value') == 'test_value' - assert new_f() == 'test_value' """ # finally, we execute the code From da966e455efdea27b0128dc848f26a909b1dd9e8 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Tue, 11 Sep 2018 14:30:26 +0200 Subject: [PATCH 14/18] referenced the corresponding issues in CHANGES.md --- CHANGES.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index f20135469..feef74e96 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,7 +2,8 @@ master ====== - global variables from modules referenced in sys.modules in the child process - now overrides the initial global variables of the pickled function + now overrides the initial global variables of the pickled function. + ([issue #187](https://github.com/cloudpipe/cloudpickle/issues/187)) 0.5.5 From 89806103a30659faf2fe408c5799ff9da4ae4507 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 12 Sep 2018 14:43:06 +0200 Subject: [PATCH 15/18] removed test_function_from_dynamic_module... also updated CHANGES.md --- CHANGES.md | 4 +-- cloudpickle/cloudpickle.py | 1 - tests/cloudpickle_test.py | 69 +------------------------------------- 3 files changed, 3 insertions(+), 71 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index feef74e96..d1a83cb95 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,8 +1,8 @@ master ====== -- global variables from modules referenced in sys.modules in the child process - now overrides the initial global variables of the pickled function. + - Ensure that unpickling a locally defined function that accesses the global variables + of a module does not reset the values of the global variables if they are already initialized. ([issue #187](https://github.com/cloudpipe/cloudpickle/issues/187)) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 987f6580f..842723539 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -78,7 +78,6 @@ PY3 = True - def _make_cell_set_template_code(): """Get the Python compiler to emit LOAD_FAST(arg); STORE_DEREF diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 0e94b38cc..3ad002958 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -893,6 +893,7 @@ def f1(): def test_closure_interacting_with_a_global_variable(self): global _TEST_GLOBAL_VARIABLE + assert _TEST_GLOBAL_VARIABLE == "default_value" orig_value = _TEST_GLOBAL_VARIABLE try: def f0(): @@ -924,74 +925,6 @@ def f1(): finally: _TEST_GLOBAL_VARIABLE = orig_value - def test_function_from_dynamic_module_with_globals_modifications(self): - """ - unpickling functions coming from a dynamic module should lead to - new global variables being created each time. - Hence, a modification of the globals of one function should not - interact with the globals of another function. - This test validates this behavior. - """ - - # first, we create a dynamic module in the parent process - mod = imp.new_module('mod') - code = ''' - x = 1 - def func_that_relies_on_dynamic_module(): - global x - return x - ''' - exec(textwrap.dedent(code), mod.__dict__) - - try: - # simple sanity check on the function's output - assert mod.func_that_relies_on_dynamic_module() == 1 - - # the function of mod is pickled two times, with two different - # values for the global variable x. - - # a child process that sequentially unpickles the - # two functions is then launched, and asserts - # the independance of those two global variables - - with open('first_function.pk', 'wb') as fid: - cloudpickle.dump(mod.func_that_relies_on_dynamic_module, fid) - - # change the mod's global variable x - mod.x = 2 - - # at this point, mod.func_that_relies_on_dynamic_module() - # returns 2 - assert mod.func_that_relies_on_dynamic_module() == 2 - with open('function_with_modified_globals.pk', 'wb') as fid: - cloudpickle.dump(mod.func_that_relies_on_dynamic_module, fid) - - child_process_code = """ - import pickle - - with open('first_function.pk','rb') as fid: - first_f = pickle.load(fid) - - assert first_f() == 1 - - # load a function with initial global variable x set to 2 - with open('function_with_modified_globals.pk','rb') as fid: - new_f = pickle.load(fid) - - # assert that by first_f's global variable x does not interact - # with new_f's global variable x - assert new_f()==2 - - """ - - # finally, we execute the code - assert_run_python_script(textwrap.dedent(child_process_code)) - - finally: - # remove the created files - os.unlink('first_function.pk') - os.unlink('function_with_modified_globals.pk') - @pytest.mark.skipif(sys.version_info >= (3, 0), reason="hardcoded pickle bytes for 2.7") def test_function_pickle_compat_0_4_0(self): From eddb2f859af3d477ef4c17a8cdacd6b36dc5f4c8 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 12 Sep 2018 16:08:48 +0200 Subject: [PATCH 16/18] added a test to check if pickled dynamic modules stay dynamic --- tests/cloudpickle_test.py | 60 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 3ad002958..6e512084e 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -440,6 +440,66 @@ def method(self, x): mod1, mod2 = pickle_depickle([mod, mod]) self.assertEqual(id(mod1), id(mod2)) + def test_load_dynamic_module_in_grandchild_process(self): + # make sure that a when loaded, a dynamic module + # preserves its dynamic property. Otherwise, + # this will lead to an import error if pickled in the child process + # and reloaded in another one + + # we create a new dynamic module + mod = imp.new_module('mod') + code = ''' + x = 1 + ''' + exec(textwrap.dedent(code), mod.__dict__) + + # this script will be ran in a separate child process + + # it will import the pickled dynamic module, and then + # re-pickle it under a new name. + # finally, it will create a child process that will + # load the re-pickled dynamic module + child_process_script = ''' + import pickle + import textwrap + + import cloudpickle + from testutils import assert_run_python_script + + child_of_child_process_script = {} + + with open('dynamic_module_from_parent_process.pk', 'rb') as fid: + mod = pickle.load(fid) + + with open('dynamic_module_from_child_process.pk', 'wb') as fid: + cloudpickle.dump(mod, fid) + + assert_run_python_script(textwrap.dedent(child_of_child_process_script)) + ''' + + # the script ran by the process created by the child process + child_of_child_process_script = ''' \'\'\' + import pickle + with open('dynamic_module_from_child_process.pk','rb') as fid: + mod = pickle.load(fid) + \'\'\' ''' + + child_process_script = child_process_script.format( + child_of_child_process_script) + + try: + with open('dynamic_module_from_parent_process.pk', 'wb') as fid: + cloudpickle.dump(mod, fid) + + assert_run_python_script(textwrap.dedent(child_process_script)) + + finally: + # remove temporary created files + if os.path.exists('dynamic_module_from_parent_process.pk'): + os.unlink('dynamic_module_from_parent_process.pk') + if os.path.exists('dynamic_module_from_child_process.pk'): + os.unlink('dynamic_module_from_child_process.pk') + def test_find_module(self): import pickle # ensure this test is decoupled from global imports _find_module('pickle') From 8c290b7dd83cc379df5e8dd0db051c411533aad0 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 12 Sep 2018 17:10:39 +0200 Subject: [PATCH 17/18] formatting --- tests/cloudpickle_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 6e512084e..dfb8dd9a8 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -454,7 +454,6 @@ def test_load_dynamic_module_in_grandchild_process(self): exec(textwrap.dedent(code), mod.__dict__) # this script will be ran in a separate child process - # it will import the pickled dynamic module, and then # re-pickle it under a new name. # finally, it will create a child process that will @@ -466,6 +465,7 @@ def test_load_dynamic_module_in_grandchild_process(self): import cloudpickle from testutils import assert_run_python_script + child_of_child_process_script = {} with open('dynamic_module_from_parent_process.pk', 'rb') as fid: From 7bc6a828f2d0fa578f1a0c107d53fdf660d00d80 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 12 Sep 2018 19:27:11 +0200 Subject: [PATCH 18/18] final formatting reworks --- tests/cloudpickle_test.py | 60 ++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index dfb8dd9a8..41f185851 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -441,23 +441,23 @@ def method(self, x): self.assertEqual(id(mod1), id(mod2)) def test_load_dynamic_module_in_grandchild_process(self): - # make sure that a when loaded, a dynamic module - # preserves its dynamic property. Otherwise, - # this will lead to an import error if pickled in the child process - # and reloaded in another one + # Make sure that when loaded, a dynamic module preserves its dynamic + # property. Otherwise, this will lead to an ImportError if pickled in + # the child process and reloaded in another one. - # we create a new dynamic module + # We create a new dynamic module mod = imp.new_module('mod') code = ''' x = 1 ''' exec(textwrap.dedent(code), mod.__dict__) - # this script will be ran in a separate child process - # it will import the pickled dynamic module, and then - # re-pickle it under a new name. - # finally, it will create a child process that will - # load the re-pickled dynamic module + # This script will be ran in a separate child process. It will import + # the pickled dynamic module, and then re-pickle it under a new name. + # Finally, it will create a child process that will load the re-pickled + # dynamic module. + parent_process_module_file = 'dynamic_module_from_parent_process.pkl' + child_process_module_file = 'dynamic_module_from_child_process.pkl' child_process_script = ''' import pickle import textwrap @@ -466,39 +466,47 @@ def test_load_dynamic_module_in_grandchild_process(self): from testutils import assert_run_python_script - child_of_child_process_script = {} + child_of_child_process_script = {child_of_child_process_script} - with open('dynamic_module_from_parent_process.pk', 'rb') as fid: - mod = pickle.load(fid) + with open('{parent_process_module_file}', 'rb') as f: + mod = pickle.load(f) - with open('dynamic_module_from_child_process.pk', 'wb') as fid: - cloudpickle.dump(mod, fid) + with open('{child_process_module_file}', 'wb') as f: + cloudpickle.dump(mod, f) assert_run_python_script(textwrap.dedent(child_of_child_process_script)) ''' - # the script ran by the process created by the child process - child_of_child_process_script = ''' \'\'\' + # The script ran by the process created by the child process + child_of_child_process_script = """ ''' import pickle - with open('dynamic_module_from_child_process.pk','rb') as fid: + with open('{child_process_module_file}','rb') as fid: mod = pickle.load(fid) - \'\'\' ''' + ''' """ + + # Filling the two scripts with the pickled modules filepaths and, + # for the first child process, the script to be executed by its + # own child process. + child_of_child_process_script = child_of_child_process_script.format( + child_process_module_file=child_process_module_file) child_process_script = child_process_script.format( - child_of_child_process_script) + parent_process_module_file=parent_process_module_file, + child_process_module_file=child_process_module_file, + child_of_child_process_script=child_of_child_process_script) try: - with open('dynamic_module_from_parent_process.pk', 'wb') as fid: + with open(parent_process_module_file, 'wb') as fid: cloudpickle.dump(mod, fid) assert_run_python_script(textwrap.dedent(child_process_script)) finally: - # remove temporary created files - if os.path.exists('dynamic_module_from_parent_process.pk'): - os.unlink('dynamic_module_from_parent_process.pk') - if os.path.exists('dynamic_module_from_child_process.pk'): - os.unlink('dynamic_module_from_child_process.pk') + # Remove temporary created files + if os.path.exists(parent_process_module_file): + os.unlink(parent_process_module_file) + if os.path.exists(child_process_module_file): + os.unlink(child_process_module_file) def test_find_module(self): import pickle # ensure this test is decoupled from global imports