From 517ca196fcb95d5ff5e1e9610d47b12dbbb08455 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Tue, 29 Jan 2019 17:04:33 +0100 Subject: [PATCH 01/21] globals shipped with func override existing ones --- cloudpickle/cloudpickle.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index f94e4c109..e4e62d4e6 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -1096,10 +1096,16 @@ def _fill_function(*args): else: raise ValueError('Unexpected _fill_value arguments: %r' % (args,)) - # Only set global variables that do not exist. - for k, v in state['globals'].items(): - if k not in func.__globals__: - func.__globals__[k] = v + # - At pickling time, any dynamic global variable used by func is + # serialized by value (in state['globals']). + # - At unpickling time, func's __globals__ will first mirror the globals + # already existing in func's new module (predicted from func's __module__ + # attribute), and then be updated with state['globals']. + + # That means that if any collision happens, the serialized global variables + # shipped with func will always override the globals already existing in + # func's new environment + func.__globals__.update(state['globals']) func.__defaults__ = state['defaults'] func.__dict__ = state['dict'] From 21adb870ba60414f832400336acaaec493e0ca66 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Tue, 29 Jan 2019 17:06:06 +0100 Subject: [PATCH 02/21] TST update tests using globals accordingly --- tests/cloudpickle_test.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 6091fb8bf..0c5322b67 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1085,6 +1085,8 @@ def f1(): cloned_f0 = {clone_func}(f0, protocol={protocol}) cloned_f1 = {clone_func}(f1, protocol={protocol}) + + # pickled_f1 contains VARIABLE's current value ("default_value") in it. pickled_f1 = dumps(f1, protocol={protocol}) # Change the value of the global variable @@ -1094,9 +1096,13 @@ def f1(): result_f1 = cloned_f1() assert result_f1 == "changed_by_f0", result_f1 - # Ensure that unpickling the global variable does not change its value + # Each time a function f is depickled, cloudpickle sets the values of + # its globals (that are potentially shared with other functions present + # in f's new environment) to the ones stored in f's pickle string. + # Thus, calling loads(pickled_f1) will reset VARIABLE from + # "changed_by_f0" to "default_value". result_pickled_f1 = loads(pickled_f1)() - assert result_pickled_f1 == "changed_by_f0", result_pickled_f1 + assert result_pickled_f1 == "default_value", result_pickled_f1 """ for clone_func in ['local_clone', 'subprocess_pickle_echo']: code = code_template.format(protocol=self.protocol, @@ -1130,10 +1136,13 @@ def f1(): assert result_cloned_f1 == "changed_by_f0", result_cloned_f1 assert f1() == result_cloned_f1 - # Ensure that unpickling the global variable does not change its - # value + # Each time a function f is depickled, cloudpickle sets the values + # of its globals (that are potentially shared with other functions + # present in f's new environment) to the one stored in f's pickle + # string. Thus, calling loads(pickled_f1) will reset VARIABLE from + # "changed_by_f0" to "default_value". result_pickled_f1 = cloudpickle.loads(pickled_f1)() - assert result_pickled_f1 == "changed_by_f0", result_pickled_f1 + assert result_pickled_f1 == "default_value", result_pickled_f1 finally: _TEST_GLOBAL_VARIABLE = orig_value @@ -1204,19 +1213,19 @@ def func_defined_in_dynamic_module(v=None): with open({with_modified_globals_file!r},'rb') as f: func_with_modified_globals = pickle.load(f) - # assert the this unpickling did not modify the value of - # the local - assert func_with_modified_globals() == 'initial value' + # assert that unpickling this function has overridden the + # previous value of GLOBAL_STATE + assert func_with_modified_globals() == 'changed value' # Update the value from the child process and check that - # unpickling again does not reset our change. + # unpickling again resets our change. assert func_with_initial_globals('new value') == 'new value' assert func_with_modified_globals() == 'new value' with open({with_initial_globals_file!r},'rb') as f: func_with_initial_globals = pickle.load(f) - assert func_with_initial_globals() == 'new value' - assert func_with_modified_globals() == 'new value' + assert func_with_initial_globals() == 'initial value' + assert func_with_modified_globals() == 'initial value' """.format( with_initial_globals_file=_escape(with_initial_globals_file), with_modified_globals_file=_escape(with_modified_globals_file)) From 2165d4089e7096b21679c056d1a67191847c7130 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Tue, 29 Jan 2019 17:06:35 +0100 Subject: [PATCH 03/21] DOC update CHANGES.md --- CHANGES.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 76d380915..ecd3407aa 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,6 +4,9 @@ - Add support for pickling interactively defined dataclasses. ([issue #245](https://github.com/cloudpipe/cloudpickle/pull/245)) +- Global variables contained in pickle strings will override existing + variables when loaded in their new environment. + 0.7.0 ===== From 4f92a845a141b026b44117c9cbad9fb242d6c5e2 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 30 Jan 2019 13:47:33 +0100 Subject: [PATCH 04/21] DOC clearer comments --- cloudpickle/cloudpickle.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index e4e62d4e6..bfa9a6063 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -1098,13 +1098,13 @@ def _fill_function(*args): # - At pickling time, any dynamic global variable used by func is # serialized by value (in state['globals']). - # - At unpickling time, func's __globals__ will first mirror the globals - # already existing in func's new module (predicted from func's __module__ - # attribute), and then be updated with state['globals']. + # - At unpickling time, func's __globals__ attribute is initialized by + # first retrieving the globals namespace from func's module by looking up + # its __module__ attribute and then updated with state['globals']. # That means that if any collision happens, the serialized global variables # shipped with func will always override the globals already existing in - # func's new environment + # func's new environment. func.__globals__.update(state['globals']) func.__defaults__ = state['defaults'] From d07ccb9b27ab1424ad239352180f0adfdcac1800 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 30 Jan 2019 13:48:31 +0100 Subject: [PATCH 05/21] DOC update CHANGES.md --- CHANGES.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index ecd3407aa..bc2498990 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -5,7 +5,8 @@ ([issue #245](https://github.com/cloudpipe/cloudpickle/pull/245)) - Global variables contained in pickle strings will override existing - variables when loaded in their new environment. + variables when loaded in their new environment. This restores the (previously + unstested) behavior of cloudpickle prior to changes done in 0.6 and 0.6.1. 0.7.0 From 0f195ee028008be40c1659640832e4053819e2c8 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 30 Jan 2019 13:51:36 +0100 Subject: [PATCH 06/21] DOC typo --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index bc2498990..c0b6f3e82 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -6,7 +6,7 @@ - Global variables contained in pickle strings will override existing variables when loaded in their new environment. This restores the (previously - unstested) behavior of cloudpickle prior to changes done in 0.6 and 0.6.1. + untested) behavior of cloudpickle prior to changes done in 0.6 and 0.6.1. 0.7.0 From 0519ee1d5eb12c480fa3bbb0f43e531b5bfb9a0b Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 30 Jan 2019 17:46:35 +0100 Subject: [PATCH 07/21] DOC mention the 0.5.4 change --- CHANGES.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index c0b6f3e82..751b3b58d 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -6,7 +6,9 @@ - Global variables contained in pickle strings will override existing variables when loaded in their new environment. This restores the (previously - untested) behavior of cloudpickle prior to changes done in 0.6 and 0.6.1. + untested) behavior of cloudpickle prior to changes done in 0.5.4 for + functions defined in the `__main__` module, and 0.6.0/1 for other dynamic + functions. 0.7.0 From 6ab9b9b4aad09955c67f171fd5516736c033a238 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 4 Feb 2019 20:33:53 +0100 Subject: [PATCH 08/21] specify future backward-compatibilty code --- cloudpickle/cloudpickle.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index bfa9a6063..b1a7745ea 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -1146,6 +1146,9 @@ def _make_skel_func(code, cell_count, base_globals=None): if base_globals is None: base_globals = {} elif isinstance(base_globals, str): + # Backward compatibility for cloudpickle from 0.5.4 to 0.6.1, in which + # the globals of a depickled function were retrieved (if possible) by + # using the globals of the module with name base_globals base_globals_name = base_globals try: # First try to reuse the globals from the module containing the From 3dbf2e1ce54d774e5bff935aa52464736c66e3fa Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 4 Feb 2019 20:55:14 +0100 Subject: [PATCH 09/21] flag backward compat code for dynamic modules --- cloudpickle/cloudpickle.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index b1a7745ea..c39305578 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -79,6 +79,9 @@ PY3 = True +# Backward compatibility for cloudpickle from 0.5.4 to 0.6.1, in which +# functions from the same dynamic module would share the same globals in their +# depickling environment. # Container for the global namespace to ensure consistent unpickling of # functions defined in dynamic modules (modules not registed in sys.modules). _dynamic_modules_globals = weakref.WeakValueDictionary() From 8ee5ac4f71fa72759e334ec988d1ef84261d2bec Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 6 Feb 2019 17:18:42 +0100 Subject: [PATCH 10/21] wip --- cloudpickle/cloudpickle.py | 21 +++++----- tests/cloudpickle_test.py | 81 ++++++++++++++++++++++---------------- 2 files changed, 58 insertions(+), 44 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index c39305578..da5e39b63 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -673,16 +673,17 @@ def extract_func_data(self, func): # save the dict dct = func.__dict__ - base_globals = self.globals_ref.get(id(func.__globals__), None) - if base_globals is None: - # For functions defined in a well behaved module use - # vars(func.__module__) for base_globals. This is necessary to - # share the global variables across multiple pickled functions from - # this module. - if func.__module__ is not None: - base_globals = func.__module__ - else: - base_globals = {} + # base_globals represents the future global namespace of func at + # unpickling time. Looking it up and storing it in globals_ref allow + # functions sharing the same globals at pickling time to also + # share them at unpickling time, at one condition: since globals_ref is + # an attribute of a Cloudpickler instance, and that a new Pickler is + # created each time pickle.dump or pickle.dumps is called, functions + # also need to be saved within the same invokation of + # pickle.dump/pickle.dumps (for example: pickle.dumps([f1, f2])). There + # is no such limitation when using Cloudpickler.dump, as long as the + # multiple invokations are bound to the same Cloudpickler. + base_globals = self.globals_ref.get(id(func.__globals__), {}) self.globals_ref[id(func.__globals__)] = base_globals return (code, f_globals, defaults, closure, dct, base_globals) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 0c5322b67..810b18054 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1083,26 +1083,33 @@ def f0(): def f1(): return VARIABLE - cloned_f0 = {clone_func}(f0, protocol={protocol}) - cloned_f1 = {clone_func}(f1, protocol={protocol}) + # pickle f0 and f1 inside the same pickle_string + cloned_f0, cloned_f1 = {clone_func}([f0, f1], protocol={protocol}) - # pickled_f1 contains VARIABLE's current value ("default_value") in it. + # pickle f1 another time, but in a new pickle string pickled_f1 = dumps(f1, protocol={protocol}) - # Change the value of the global variable + # Change the value of the global variable in f0's new global namespace cloned_f0() - # Ensure that the global variable is the same for another function - result_f1 = cloned_f1() - assert result_f1 == "changed_by_f0", result_f1 - - # Each time a function f is depickled, cloudpickle sets the values of - # its globals (that are potentially shared with other functions present - # in f's new environment) to the ones stored in f's pickle string. - # Thus, calling loads(pickled_f1) will reset VARIABLE from - # "changed_by_f0" to "default_value". - result_pickled_f1 = loads(pickled_f1)() - assert result_pickled_f1 == "default_value", result_pickled_f1 + # depickling f0 and f1 should not affect the globals of already + # existing modules + assert VARIABLE == "default_value", VARIABLE + + # Ensure that cloned_f1 and cloned_f0 share the same globals, as f1 and + # f0 shared the same globals at pickling time, and cloned_f1 was + # depickled from the same pickle string as cloned_f0 + shared_global_var = cloned_f1() + assert shared_global_var == "changed_by_f0", shared_global_var + + # f1 is unpickled another time, but because it comes from another + # pickle string than pickled_f1 and pickled_f0, it will not share the + # same globals as the latter two. + cloned_f1_with_new_globals = loads(pickled_f1) + + # get the value of cloned_f1_with_new_globals's VARIABLE + new_global_var = cloned_f1_with_new_globals() + assert new_global_var == "default_value", new_global_var """ for clone_func in ['local_clone', 'subprocess_pickle_echo']: code = code_template.format(protocol=self.protocol, @@ -1121,28 +1128,34 @@ def f0(): def f1(): return _TEST_GLOBAL_VARIABLE - cloned_f0 = cloudpickle.loads(cloudpickle.dumps( - f0, protocol=self.protocol)) - cloned_f1 = cloudpickle.loads(cloudpickle.dumps( - f1, protocol=self.protocol)) + # pickle f0 and f1 inside the same pickle_string + cloned_f0, cloned_f1 = pickle_depickle([f0, f1], + protocol=self.protocol) + + # pickle f1 another time, but in a new pickle string pickled_f1 = cloudpickle.dumps(f1, protocol=self.protocol) - # Change the value of the global variable + # Change the global variable's value in f0's new global namespace cloned_f0() - assert _TEST_GLOBAL_VARIABLE == "changed_by_f0" - - # Ensure that the global variable is the same for another function - result_cloned_f1 = cloned_f1() - assert result_cloned_f1 == "changed_by_f0", result_cloned_f1 - assert f1() == result_cloned_f1 - - # Each time a function f is depickled, cloudpickle sets the values - # of its globals (that are potentially shared with other functions - # present in f's new environment) to the one stored in f's pickle - # string. Thus, calling loads(pickled_f1) will reset VARIABLE from - # "changed_by_f0" to "default_value". - result_pickled_f1 = cloudpickle.loads(pickled_f1)() - assert result_pickled_f1 == "default_value", result_pickled_f1 + + # depickling f0 and f1 should not affect the globals of already + # existing modules + assert _TEST_GLOBAL_VARIABLE == "default_value" + + # Ensure that cloned_f1 and cloned_f0 share thq same globals, as f1 + # and f0 shared the same globals at pickling time, and cloned_f1 + # was depickled from the same pickle string as cloned_f0 + shared_global_var = cloned_f1() + assert shared_global_var == "changed_by_f0", shared_global_var + + # f1 is unpickled another time, but because it comes from another + # pickle string than pickled_f1 and pickled_f0, it will not share + # the same globals as the latter two. + cloned_f1_with_new_globals = cloudpickle.loads(pickled_f1) + + # get the value of cloned_f1_with_new_globals's VARIABLE + new_global_var = cloned_f1_with_new_globals() + assert new_global_var == "default_value", new_global_var finally: _TEST_GLOBAL_VARIABLE = orig_value From f8a0e39d2dfeb5ee8920b9f8d41b37a1006650ba Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 6 Feb 2019 18:55:05 +0100 Subject: [PATCH 11/21] make use of dict.setdefault for globals_ref --- cloudpickle/cloudpickle.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index da5e39b63..08f351eae 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -683,8 +683,7 @@ def extract_func_data(self, func): # pickle.dump/pickle.dumps (for example: pickle.dumps([f1, f2])). There # is no such limitation when using Cloudpickler.dump, as long as the # multiple invokations are bound to the same Cloudpickler. - base_globals = self.globals_ref.get(id(func.__globals__), {}) - self.globals_ref[id(func.__globals__)] = base_globals + base_globals = self.globals_ref.setdefault(id(func.__globals__), {}) return (code, f_globals, defaults, closure, dct, base_globals) From 75ca9af94b68d79535f59ec58f78120c08bed9dd Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 6 Feb 2019 18:55:50 +0100 Subject: [PATCH 12/21] TST remove user-defined dynamic modules tests --- tests/cloudpickle_test.py | 148 ++------------------------------------ 1 file changed, 5 insertions(+), 143 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 810b18054..1b093865b 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -451,57 +451,6 @@ def method(self, x): mod1, mod2 = pickle_depickle([mod, mod]) self.assertEqual(id(mod1), id(mod2)) - def test_dynamic_modules_globals(self): - # _dynamic_modules_globals is a WeakValueDictionary, so if a value - # in this dict (containing a set of global variables from a dynamic - # module created in the parent process) has no other reference than in - # this dict in the child process, it will be garbage collected. - - # We first create a module - mod = types.ModuleType('mod') - code = ''' - x = 1 - def func(): - return - ''' - exec(textwrap.dedent(code), mod.__dict__) - - pickled_module_path = os.path.join(self.tmpdir, 'mod_f.pkl') - child_process_script = ''' - import pickle - from cloudpickle.cloudpickle import _dynamic_modules_globals - import gc - with open("{pickled_module_path}", 'rb') as f: - func = pickle.load(f) - - # A dictionnary storing the globals of the newly unpickled function - # should have been created - assert list(_dynamic_modules_globals.keys()) == ['mod'] - - # func.__globals__ is the only non-weak reference to - # _dynamic_modules_globals['mod']. By deleting func, we delete also - # _dynamic_modules_globals['mod'] - del func - gc.collect() - - # There is no reference to the globals of func since func has been - # deleted and _dynamic_modules_globals is a WeakValueDictionary, - # so _dynamic_modules_globals should now be empty - assert list(_dynamic_modules_globals.keys()) == [] - ''' - - child_process_script = child_process_script.format( - pickled_module_path=_escape(pickled_module_path)) - - try: - with open(pickled_module_path, 'wb') as f: - cloudpickle.dump(mod.func, f, protocol=self.protocol) - - assert_run_python_script(textwrap.dedent(child_process_script)) - - finally: - os.unlink(pickled_module_path) - def test_module_locals_behavior(self): # Makes sure that a local function defined in another module is # correctly serialized. This notably checks that the globals are @@ -1151,103 +1100,16 @@ def f1(): # f1 is unpickled another time, but because it comes from another # pickle string than pickled_f1 and pickled_f0, it will not share # the same globals as the latter two. - cloned_f1_with_new_globals = cloudpickle.loads(pickled_f1) + new_cloned_f1 = pickle.loads(pickled_f1) + assert new_cloned_f1.__globals__ is not cloned_f1.__globals__ + assert new_cloned_f1.__globals__ is not f1.__globals__ - # get the value of cloned_f1_with_new_globals's VARIABLE - new_global_var = cloned_f1_with_new_globals() + # get the value of new_cloned_f1's VARIABLE + new_global_var = new_cloned_f1() assert new_global_var == "default_value", new_global_var finally: _TEST_GLOBAL_VARIABLE = orig_value - def test_function_from_dynamic_module_with_globals_modifications(self): - # This test verifies that the global variable state of a function - # defined in a dynamic module in a child process are not reset by - # subsequent uplickling. - - # first, we create a dynamic module in the parent process - mod = types.ModuleType('mod') - code = ''' - GLOBAL_STATE = "initial value" - - def func_defined_in_dynamic_module(v=None): - global GLOBAL_STATE - if v is not None: - GLOBAL_STATE = v - return GLOBAL_STATE - ''' - exec(textwrap.dedent(code), mod.__dict__) - - with_initial_globals_file = os.path.join( - self.tmpdir, 'function_with_initial_globals.pkl') - with_modified_globals_file = os.path.join( - self.tmpdir, 'function_with_modified_globals.pkl') - - try: - # Simple sanity check on the function's output - assert mod.func_defined_in_dynamic_module() == "initial value" - - # The function of mod is pickled two times, with two different - # values for the global variable GLOBAL_STATE. - # Then we launch a child process that sequentially unpickles the - # two functions. Those unpickle functions should share the same - # global variables in the child process: - # Once the first function gets unpickled, mod is created and - # tracked in the child environment. This is state is preserved - # when unpickling the second function whatever the global variable - # GLOBAL_STATE's value at the time of pickling. - - with open(with_initial_globals_file, 'wb') as f: - cloudpickle.dump(mod.func_defined_in_dynamic_module, f) - - # Change the mod's global variable - mod.GLOBAL_STATE = 'changed value' - - # At this point, mod.func_defined_in_dynamic_module() - # returns the updated value. Let's pickle it again. - assert mod.func_defined_in_dynamic_module() == 'changed value' - with open(with_modified_globals_file, 'wb') as f: - cloudpickle.dump(mod.func_defined_in_dynamic_module, f, - protocol=self.protocol) - - child_process_code = """ - import pickle - - with open({with_initial_globals_file!r},'rb') as f: - func_with_initial_globals = pickle.load(f) - - # At this point, a module called 'mod' should exist in - # _dynamic_modules_globals. Further function loading - # will use the globals living in mod. - - assert func_with_initial_globals() == 'initial value' - - # Load a function with initial global variable that was - # pickled after a change in the global variable - with open({with_modified_globals_file!r},'rb') as f: - func_with_modified_globals = pickle.load(f) - - # assert that unpickling this function has overridden the - # previous value of GLOBAL_STATE - assert func_with_modified_globals() == 'changed value' - - # Update the value from the child process and check that - # unpickling again resets our change. - assert func_with_initial_globals('new value') == 'new value' - assert func_with_modified_globals() == 'new value' - - with open({with_initial_globals_file!r},'rb') as f: - func_with_initial_globals = pickle.load(f) - assert func_with_initial_globals() == 'initial value' - assert func_with_modified_globals() == 'initial value' - """.format( - with_initial_globals_file=_escape(with_initial_globals_file), - with_modified_globals_file=_escape(with_modified_globals_file)) - assert_run_python_script(textwrap.dedent(child_process_code)) - - finally: - os.unlink(with_initial_globals_file) - os.unlink(with_modified_globals_file) - @pytest.mark.skipif(sys.version_info >= (3, 0), reason="hardcoded pickle bytes for 2.7") def test_function_pickle_compat_0_4_0(self): From f4c68ae4983fec8ef676fba9e68601cd17331216 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 6 Feb 2019 18:56:37 +0100 Subject: [PATCH 13/21] TST comments rewording and a few more assertions --- tests/cloudpickle_test.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 1b093865b..4cf62ba82 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1032,17 +1032,24 @@ def f0(): def f1(): return VARIABLE + assert f0.__globals__ is f1.__globals__ + # pickle f0 and f1 inside the same pickle_string cloned_f0, cloned_f1 = {clone_func}([f0, f1], protocol={protocol}) + # cloned_f0 and cloned_f1 now share a global namespace that is isolated + # from any previously existing namespace + assert cloned_f0.__globals__ is cloned_f1.__globals__ + assert cloned_f0.__globals__ is not f0.__globals__ + # pickle f1 another time, but in a new pickle string pickled_f1 = dumps(f1, protocol={protocol}) # Change the value of the global variable in f0's new global namespace cloned_f0() - # depickling f0 and f1 should not affect the globals of already - # existing modules + # thanks to cloudpickle isolation, depickling and calling f0 and f1 + # should not affect the globals of already existing modules assert VARIABLE == "default_value", VARIABLE # Ensure that cloned_f1 and cloned_f0 share the same globals, as f1 and @@ -1054,10 +1061,12 @@ def f1(): # f1 is unpickled another time, but because it comes from another # pickle string than pickled_f1 and pickled_f0, it will not share the # same globals as the latter two. - cloned_f1_with_new_globals = loads(pickled_f1) + new_cloned_f1 = loads(pickled_f1) + assert new_cloned_f1.__globals__ is not cloned_f1.__globals__ + assert new_cloned_f1.__globals__ is not f1.__globals__ - # get the value of cloned_f1_with_new_globals's VARIABLE - new_global_var = cloned_f1_with_new_globals() + # get the value of new_cloned_f1's VARIABLE + new_global_var = new_cloned_f1() assert new_global_var == "default_value", new_global_var """ for clone_func in ['local_clone', 'subprocess_pickle_echo']: @@ -1081,6 +1090,11 @@ def f1(): cloned_f0, cloned_f1 = pickle_depickle([f0, f1], protocol=self.protocol) + # cloned_f0 and cloned_f1 now share a global namespace that is + # isolated from any previously existing namespace + assert cloned_f0.__globals__ is cloned_f1.__globals__ + assert cloned_f0.__globals__ is not f0.__globals__ + # pickle f1 another time, but in a new pickle string pickled_f1 = cloudpickle.dumps(f1, protocol=self.protocol) @@ -1091,7 +1105,7 @@ def f1(): # existing modules assert _TEST_GLOBAL_VARIABLE == "default_value" - # Ensure that cloned_f1 and cloned_f0 share thq same globals, as f1 + # Ensure that cloned_f1 and cloned_f0 share the same globals, as f1 # and f0 shared the same globals at pickling time, and cloned_f1 # was depickled from the same pickle string as cloned_f0 shared_global_var = cloned_f1() From 178b6aab5e948610a6574a96968874c3e866c95c Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 7 Feb 2019 10:51:36 +0100 Subject: [PATCH 14/21] Apply suggestions from code review Co-Authored-By: pierreglaser --- cloudpickle/cloudpickle.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 08f351eae..554a614c7 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -676,11 +676,11 @@ def extract_func_data(self, func): # base_globals represents the future global namespace of func at # unpickling time. Looking it up and storing it in globals_ref allow # functions sharing the same globals at pickling time to also - # share them at unpickling time, at one condition: since globals_ref is + # share them once unpickled, at one condition: since globals_ref is # an attribute of a Cloudpickler instance, and that a new Pickler is # created each time pickle.dump or pickle.dumps is called, functions # also need to be saved within the same invokation of - # pickle.dump/pickle.dumps (for example: pickle.dumps([f1, f2])). There + # cloudpickle.dump/cloudpickle.dumps (for example: cloudpickle.dumps([f1, f2])). There # is no such limitation when using Cloudpickler.dump, as long as the # multiple invokations are bound to the same Cloudpickler. base_globals = self.globals_ref.setdefault(id(func.__globals__), {}) From 9615db0e62133fec3a7e67c034c66e58c8fbb302 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 7 Feb 2019 10:52:02 +0100 Subject: [PATCH 15/21] Apply suggestions from code review Co-Authored-By: pierreglaser --- cloudpickle/cloudpickle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 554a614c7..ffddb8b70 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -677,7 +677,7 @@ def extract_func_data(self, func): # unpickling time. Looking it up and storing it in globals_ref allow # functions sharing the same globals at pickling time to also # share them once unpickled, at one condition: since globals_ref is - # an attribute of a Cloudpickler instance, and that a new Pickler is + # an attribute of a Cloudpickler instance, and that a new CloudPickler is # created each time pickle.dump or pickle.dumps is called, functions # also need to be saved within the same invokation of # cloudpickle.dump/cloudpickle.dumps (for example: cloudpickle.dumps([f1, f2])). There From 8ca80665e5f9715048c9c0569b10bed34a6104db Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 7 Feb 2019 11:25:42 +0100 Subject: [PATCH 16/21] MNT remove code for dynamic modules persistance --- cloudpickle/cloudpickle.py | 37 +------------------------------------ 1 file changed, 1 insertion(+), 36 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index ffddb8b70..a180adca4 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -79,25 +79,6 @@ PY3 = True -# Backward compatibility for cloudpickle from 0.5.4 to 0.6.1, in which -# functions from the same dynamic module would share the same globals in their -# depickling environment. -# Container for the global namespace to ensure consistent unpickling of -# functions defined in dynamic modules (modules not registed in sys.modules). -_dynamic_modules_globals = weakref.WeakValueDictionary() - - -class _DynamicModuleFuncGlobals(dict): - """Global variables referenced by a function defined in a dynamic module - - To avoid leaking references we store such context in a WeakValueDictionary - instance. However instances of python builtin types such as dict cannot - be used directly as values in such a construct, hence the need for a - derived class. - """ - pass - - def _make_cell_set_template_code(): """Get the Python compiler to emit LOAD_FAST(arg); STORE_DEREF @@ -1146,24 +1127,8 @@ def _make_skel_func(code, cell_count, base_globals=None): code and the correct number of cells in func_closure. All other func attributes (e.g. func_globals) are empty. """ - if base_globals is None: + if base_globals is None or isinstance(base_globals, str): base_globals = {} - elif isinstance(base_globals, str): - # Backward compatibility for cloudpickle from 0.5.4 to 0.6.1, in which - # the globals of a depickled function were retrieved (if possible) by - # using the globals of the module with name base_globals - base_globals_name = base_globals - try: - # First try to reuse the globals from the module containing the - # function. If it is not possible to retrieve it, fallback to an - # empty dictionary. - base_globals = vars(importlib.import_module(base_globals)) - except ImportError: - base_globals = _dynamic_modules_globals.get( - base_globals_name, None) - if base_globals is None: - base_globals = _DynamicModuleFuncGlobals() - _dynamic_modules_globals[base_globals_name] = base_globals base_globals['__builtins__'] = __builtins__ From f4f15e35dc2819dd98c2f409a87d962da73cb06a Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 7 Feb 2019 11:56:26 +0100 Subject: [PATCH 17/21] MNT, DOC flag backward compatibility code --- cloudpickle/cloudpickle.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index a180adca4..6dff794d4 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -1127,6 +1127,9 @@ def _make_skel_func(code, cell_count, base_globals=None): code and the correct number of cells in func_closure. All other func attributes (e.g. func_globals) are empty. """ + # This is backward-compatibility code: for cloudpickle versions between + # 0.5.4 and 0.7, base_globals could be a string or None. base_globals + # should now always be a dictionary. if base_globals is None or isinstance(base_globals, str): base_globals = {} From 644199e45ad7fd1228b82bd19ae482e538fc68fb Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 7 Feb 2019 14:08:41 +0100 Subject: [PATCH 18/21] DOC, MNT update namsespace populating comment --- cloudpickle/cloudpickle.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 6dff794d4..54d745cbc 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -1083,12 +1083,12 @@ def _fill_function(*args): # - At pickling time, any dynamic global variable used by func is # serialized by value (in state['globals']). # - At unpickling time, func's __globals__ attribute is initialized by - # first retrieving the globals namespace from func's module by looking up - # its __module__ attribute and then updated with state['globals']. - - # That means that if any collision happens, the serialized global variables - # shipped with func will always override the globals already existing in - # func's new environment. + # first retrieving an empty isolated namespace that will be shared + # with other functions pickled from the same original module + # by the same CloudPickler instance and then updated with the + # content of state['globals'] to populate the shared isolated + # namespace with all the global variables that are specifically + # referenced for this function. func.__globals__.update(state['globals']) func.__defaults__ = state['defaults'] From 19db7fcf6dd76e7adad4b84131de0d467613c361 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 7 Feb 2019 14:25:43 +0100 Subject: [PATCH 19/21] [ci downstream] trigger downstream projects builds From 45ceaed38d69b313ef316a4c746612efc0c70115 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 7 Feb 2019 15:19:33 +0100 Subject: [PATCH 20/21] [ci downstream] trigger downstream builds From c207afca31949be39ae1ec56f60c324a8a6c3313 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 7 Feb 2019 15:44:20 +0100 Subject: [PATCH 21/21] DOC, MNT udpate stale changelog --- CHANGES.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 751b3b58d..160e0d68f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,12 +4,11 @@ - Add support for pickling interactively defined dataclasses. ([issue #245](https://github.com/cloudpipe/cloudpickle/pull/245)) -- Global variables contained in pickle strings will override existing - variables when loaded in their new environment. This restores the (previously - untested) behavior of cloudpickle prior to changes done in 0.5.4 for - functions defined in the `__main__` module, and 0.6.0/1 for other dynamic - functions. - +- Global variables referenced by functions pickled by cloudpickle are now + unpickled in a new and isolated namespace scoped by the CloudPickler + instance. This restores the (previously untested) behavior of cloudpickle + prior to changes done in 0.5.4 for functions defined in the `__main__` + module, and 0.6.0/1 for other dynamic functions. 0.7.0 =====