From 40bd3b33ef49cd79f9629b3f0c67b49e464629da Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 14 Nov 2018 14:17:14 +0100 Subject: [PATCH 1/8] ENH add option to override existing globals --- cloudpickle/cloudpickle.py | 44 ++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 41638be9c..b27595bc5 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -269,13 +269,27 @@ class CloudPickler(Pickler): dispatch = Pickler.dispatch.copy() - def __init__(self, file, protocol=None): + def __init__(self, file, protocol=None, override_existing_globals=True): if protocol is None: protocol = DEFAULT_PROTOCOL Pickler.__init__(self, file, protocol=protocol) # map ids to dictionary. used to ensure that functions can share global env self.globals_ref = {} + # When unpickling a function created in a dynamic module or the __main__ + # module, its set of global variables may be different from the one from + # the module the function belongs to, for example if the dynamic module + # has had its variables mutated between pickling and unpickling time + # of the function. + + # - If set to True, the attribue override_existing_globals will + # override the global variables of the dynamic module with the ones + # from the pickled function at the function's unpickling time. + # - If set to False, the global variables from the pickled function + # will be overriden by the dynamic module global variables at the + # function's unpickling time + self.override_existing_globals = override_existing_globals + def dump(self, obj): self.inject_addons() try: @@ -596,6 +610,7 @@ def save_function_tuple(self, func): if hasattr(func, '__qualname__'): state['qualname'] = func.__qualname__ save(state) + save(self.override_existing_globals) write(pickle.TUPLE) write(pickle.REDUCE) # applies _fill_function on the tuple @@ -921,7 +936,7 @@ def _rebuild_tornado_coroutine(func): # Shorthands for legacy support -def dump(obj, file, protocol=None): +def dump(obj, file, protocol=None, override_existing_globals=True): """Serialize obj as bytes streamed into file protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to @@ -931,10 +946,11 @@ def dump(obj, file, protocol=None): Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure compatibility with older versions of Python. """ - CloudPickler(file, protocol=protocol).dump(obj) + CloudPickler(file, protocol=protocol, + override_existing_globals=override_existing_globals).dump(obj) -def dumps(obj, protocol=None): +def dumps(obj, protocol=None, override_existing_globals=True): """Serialize obj as a string of bytes allocated in memory protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to @@ -946,7 +962,8 @@ def dumps(obj, protocol=None): """ file = StringIO() try: - cp = CloudPickler(file, protocol=protocol) + cp = CloudPickler(file, protocol=protocol, + override_existing_globals=override_existing_globals) cp.dump(obj) return file.getvalue() finally: @@ -1060,9 +1077,14 @@ def _fill_function(*args): The skeleton itself is create by _make_skel_func(). """ + override_existing_globals = True if len(args) == 2: func = args[0] state = args[1] + elif len(args) == 3: + func = args[0] + state = args[1] + override_existing_globals = args[2] elif len(args) == 5: # Backwards compat for cloudpickle v0.4.0, after which the `module` # argument was introduced @@ -1078,10 +1100,14 @@ def _fill_function(*args): else: raise ValueError('Unexpected _fill_value arguments: %r' % (args,)) - # Only set global variables that do not exist. - for k, v in state['globals'].items(): - if k not in func.__globals__: - func.__globals__[k] = v + # This updates the variables of func's module using func's globals from the + # time at which it was pickled. + if override_existing_globals: + func.__globals__.update(state['globals']) + else: + for k, v in state['globals'].items(): + if k not in func.__globals__: + func.__globals__[k] = v func.__defaults__ = state['defaults'] func.__dict__ = state['dict'] From d249bafd4957ff8bd87ac3506d41a4c2a11962fd Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 14 Nov 2018 15:35:53 +0100 Subject: [PATCH 2/8] Parametrize tests for override_existing_globals switch --- tests/cloudpickle_test.py | 79 ++++++++++++++++++++++++++++----------- 1 file changed, 58 insertions(+), 21 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index d318da7e1..51fd939e3 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -78,6 +78,7 @@ def setUp(self): class CloudPickleTest(unittest.TestCase): protocol = cloudpickle.DEFAULT_PROTOCOL + override_existing_globals = False def test_itemgetter(self): d = range(10) @@ -1075,7 +1076,9 @@ def f1(): cloned_f0 = {clone_func}(f0, protocol={protocol}) cloned_f1 = {clone_func}(f1, protocol={protocol}) - pickled_f1 = dumps(f1, protocol={protocol}) + pickled_f1 = dumps( + f1, protocol={protocol}, + override_existing_globals={override_existing_globals}) # Change the value of the global variable cloned_f0() @@ -1084,13 +1087,19 @@ def f1(): result_f1 = cloned_f1() assert result_f1 == "changed_by_f0", result_f1 - # Ensure that unpickling the global variable does not change its value + # Ensure that unpickling the global variable overrides (resp. does not + # change) the global variable if override_existing_globals was True + # (resp. False) at pickling time result_pickled_f1 = loads(pickled_f1)() - assert result_pickled_f1 == "changed_by_f0", result_pickled_f1 + if {override_existing_globals}: + assert result_pickled_f1 == "default_value", result_pickled_f1 + else: + assert result_pickled_f1 == "changed_by_f0", result_pickled_f1 """ for clone_func in ['local_clone', 'subprocess_pickle_echo']: - code = code_template.format(protocol=self.protocol, - clone_func=clone_func) + code = code_template.format( + protocol=self.protocol, clone_func=clone_func, + override_existing_globals=self.override_existing_globals) assert_run_python_script(textwrap.dedent(code)) def test_closure_interacting_with_a_global_variable(self): @@ -1106,10 +1115,16 @@ def f1(): return _TEST_GLOBAL_VARIABLE cloned_f0 = cloudpickle.loads(cloudpickle.dumps( - f0, protocol=self.protocol)) + f0, protocol=self.protocol, + override_existing_globals=self.override_existing_globals)) + cloned_f1 = cloudpickle.loads(cloudpickle.dumps( - f1, protocol=self.protocol)) - pickled_f1 = cloudpickle.dumps(f1, protocol=self.protocol) + f1, protocol=self.protocol, + override_existing_globals=self.override_existing_globals)) + + pickled_f1 = cloudpickle.dumps( + f1, protocol=self.protocol, + override_existing_globals=self.override_existing_globals) # Change the value of the global variable cloned_f0() @@ -1120,10 +1135,14 @@ def f1(): assert result_cloned_f1 == "changed_by_f0", result_cloned_f1 assert f1() == result_cloned_f1 - # Ensure that unpickling the global variable does not change its - # value + # Ensure that unpickling the global variable overrides (resp. + # does not change) its value if override_existing_globals was True + # (resp False) at pickling time result_pickled_f1 = cloudpickle.loads(pickled_f1)() - assert result_pickled_f1 == "changed_by_f0", result_pickled_f1 + if self.override_existing_globals: + assert result_pickled_f1 == "default_value", result_pickled_f1 + else: + assert result_pickled_f1 == "changed_by_f0", result_pickled_f1 finally: _TEST_GLOBAL_VARIABLE = orig_value @@ -1160,7 +1179,9 @@ def func_defined_in_dynamic_module(v=None): # GLOBAL_STATE's value at the time of pickling. with open('function_with_initial_globals.pkl', 'wb') as f: - cloudpickle.dump(mod.func_defined_in_dynamic_module, f) + cloudpickle.dump( + mod.func_defined_in_dynamic_module, f, + override_existing_globals=self.override_existing_globals) # Change the mod's global variable mod.GLOBAL_STATE = 'changed value' @@ -1169,8 +1190,9 @@ def func_defined_in_dynamic_module(v=None): # returns the updated value. Let's pickle it again. assert mod.func_defined_in_dynamic_module() == 'changed value' with open('function_with_modified_globals.pkl', 'wb') as f: - cloudpickle.dump(mod.func_defined_in_dynamic_module, f, - protocol=self.protocol) + cloudpickle.dump( + mod.func_defined_in_dynamic_module, f, + override_existing_globals=self.override_existing_globals) child_process_code = """ import pickle @@ -1189,20 +1211,34 @@ def func_defined_in_dynamic_module(v=None): with open('function_with_modified_globals.pkl','rb') as f: func_with_modified_globals = pickle.load(f) - # assert the this unpickling did not modify the value of - # the local - assert func_with_modified_globals() == 'initial value' + # Verify that loading the function overrides (resp. does not + # change) the global variable GLOBAL_STATE of mod, if + # override_existing_globals was True (resp. False) at pickling + # time + + if {override_existing_globals}: + assert func_with_modified_globals() == 'changed value' + else: + assert func_with_modified_globals() == 'initial value' # Update the value from the child process and check that - # unpickling again does not reset our change. + # unpickling reset (resp. does not reset) our change if + # override_existing_globals was True (resp. False) at pickling + # time assert func_with_initial_globals('new value') == 'new value' assert func_with_modified_globals() == 'new value' with open('function_with_initial_globals.pkl','rb') as f: func_with_initial_globals = pickle.load(f) - assert func_with_initial_globals() == 'new value' - assert func_with_modified_globals() == 'new value' - """ + + if {override_existing_globals}: + assert func_with_initial_globals() == 'initial value' + assert func_with_modified_globals() == 'initial value' + else: + assert func_with_initial_globals() == 'new value' + assert func_with_modified_globals() == 'new value' + + """.format(override_existing_globals=self.override_existing_globals) assert_run_python_script(textwrap.dedent(child_process_code)) finally: @@ -1314,6 +1350,7 @@ def g(x): class Protocol2CloudPickleTest(CloudPickleTest): protocol = 2 + override_existing_globals = True if __name__ == '__main__': From 6ca7e28a264d440d775feb0522a5ead35c8fbed0 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 15 Nov 2018 11:39:19 +0100 Subject: [PATCH 3/8] TST loop over test instead of parametrizing them --- tests/cloudpickle_test.py | 266 +++++++++++++++++++------------------- 1 file changed, 134 insertions(+), 132 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 51fd939e3..18b4d43e8 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -78,7 +78,6 @@ def setUp(self): class CloudPickleTest(unittest.TestCase): protocol = cloudpickle.DEFAULT_PROTOCOL - override_existing_globals = False def test_itemgetter(self): d = range(10) @@ -1097,153 +1096,157 @@ def f1(): assert result_pickled_f1 == "changed_by_f0", result_pickled_f1 """ for clone_func in ['local_clone', 'subprocess_pickle_echo']: - code = code_template.format( - protocol=self.protocol, clone_func=clone_func, - override_existing_globals=self.override_existing_globals) - assert_run_python_script(textwrap.dedent(code)) + for override_existing_globals in [True, False]: + code = code_template.format( + protocol=self.protocol, clone_func=clone_func, + override_existing_globals=override_existing_globals) + assert_run_python_script(textwrap.dedent(code)) def test_closure_interacting_with_a_global_variable(self): global _TEST_GLOBAL_VARIABLE assert _TEST_GLOBAL_VARIABLE == "default_value" orig_value = _TEST_GLOBAL_VARIABLE - try: - def f0(): - global _TEST_GLOBAL_VARIABLE - _TEST_GLOBAL_VARIABLE = "changed_by_f0" - - def f1(): - return _TEST_GLOBAL_VARIABLE - - cloned_f0 = cloudpickle.loads(cloudpickle.dumps( - f0, protocol=self.protocol, - override_existing_globals=self.override_existing_globals)) - - cloned_f1 = cloudpickle.loads(cloudpickle.dumps( - f1, protocol=self.protocol, - override_existing_globals=self.override_existing_globals)) - - pickled_f1 = cloudpickle.dumps( - f1, protocol=self.protocol, - override_existing_globals=self.override_existing_globals) - - # Change the value of the global variable - cloned_f0() - assert _TEST_GLOBAL_VARIABLE == "changed_by_f0" - - # Ensure that the global variable is the same for another function - result_cloned_f1 = cloned_f1() - assert result_cloned_f1 == "changed_by_f0", result_cloned_f1 - assert f1() == result_cloned_f1 - - # Ensure that unpickling the global variable overrides (resp. - # does not change) its value if override_existing_globals was True - # (resp False) at pickling time - result_pickled_f1 = cloudpickle.loads(pickled_f1)() - if self.override_existing_globals: - assert result_pickled_f1 == "default_value", result_pickled_f1 - else: - assert result_pickled_f1 == "changed_by_f0", result_pickled_f1 - finally: - _TEST_GLOBAL_VARIABLE = orig_value + for override_existing_globals in [True, False]: + try: + def f0(): + global _TEST_GLOBAL_VARIABLE + _TEST_GLOBAL_VARIABLE = "changed_by_f0" + + def f1(): + return _TEST_GLOBAL_VARIABLE + + cloned_f0 = cloudpickle.loads(cloudpickle.dumps( + f0, protocol=self.protocol, + override_existing_globals=override_existing_globals)) + + cloned_f1 = cloudpickle.loads(cloudpickle.dumps( + f1, protocol=self.protocol, + override_existing_globals=override_existing_globals)) + + pickled_f1 = cloudpickle.dumps( + f1, protocol=self.protocol, + override_existing_globals=override_existing_globals) + + # Change the value of the global variable + cloned_f0() + assert _TEST_GLOBAL_VARIABLE == "changed_by_f0" + + # Ensure that the global variable is the same for another + # function + result_cloned_f1 = cloned_f1() + assert result_cloned_f1 == "changed_by_f0", result_cloned_f1 + assert f1() == result_cloned_f1 + + # Ensure that unpickling the global variable overrides (resp. + # does not change) its value if override_existing_globals was + # True (resp False) at pickling time + res_pickled_f1 = cloudpickle.loads(pickled_f1)() + if override_existing_globals: + assert res_pickled_f1 == "default_value", res_pickled_f1 + else: + assert res_pickled_f1 == "changed_by_f0", res_pickled_f1 + finally: + _TEST_GLOBAL_VARIABLE = orig_value def test_function_from_dynamic_module_with_globals_modifications(self): # This test verifies that the global variable state of a function # defined in a dynamic module in a child process are not reset by # subsequent uplickling. - # first, we create a dynamic module in the parent process - mod = types.ModuleType('mod') - code = ''' - GLOBAL_STATE = "initial value" - - def func_defined_in_dynamic_module(v=None): - global GLOBAL_STATE - if v is not None: - GLOBAL_STATE = v - return GLOBAL_STATE - ''' - exec(textwrap.dedent(code), mod.__dict__) - - try: - # Simple sanity check on the function's output - assert mod.func_defined_in_dynamic_module() == "initial value" - - # The function of mod is pickled two times, with two different - # values for the global variable GLOBAL_STATE. - # Then we launch a child process that sequentially unpickles the - # two functions. Those unpickle functions should share the same - # global variables in the child process: - # Once the first function gets unpickled, mod is created and - # tracked in the child environment. This is state is preserved - # when unpickling the second function whatever the global variable - # GLOBAL_STATE's value at the time of pickling. - - with open('function_with_initial_globals.pkl', 'wb') as f: - cloudpickle.dump( - mod.func_defined_in_dynamic_module, f, - override_existing_globals=self.override_existing_globals) - - # Change the mod's global variable - mod.GLOBAL_STATE = 'changed value' - - # At this point, mod.func_defined_in_dynamic_module() - # returns the updated value. Let's pickle it again. - assert mod.func_defined_in_dynamic_module() == 'changed value' - with open('function_with_modified_globals.pkl', 'wb') as f: - cloudpickle.dump( - mod.func_defined_in_dynamic_module, f, - override_existing_globals=self.override_existing_globals) - - child_process_code = """ - import pickle - - with open('function_with_initial_globals.pkl','rb') as f: - func_with_initial_globals = pickle.load(f) - - # At this point, a module called 'mod' should exist in - # _dynamic_modules_globals. Further function loading - # will use the globals living in mod. - - assert func_with_initial_globals() == 'initial value' - - # Load a function with initial global variable that was - # pickled after a change in the global variable - with open('function_with_modified_globals.pkl','rb') as f: - func_with_modified_globals = pickle.load(f) - - # Verify that loading the function overrides (resp. does not - # change) the global variable GLOBAL_STATE of mod, if - # override_existing_globals was True (resp. False) at pickling - # time + for override_existing_globals in [True, False]: + try: + # first, we create a dynamic module in the parent process + mod = types.ModuleType('mod') + code = ''' + GLOBAL_STATE = "initial value" + + def func_defined_in_dynamic_module(v=None): + global GLOBAL_STATE + if v is not None: + GLOBAL_STATE = v + return GLOBAL_STATE + ''' + exec(textwrap.dedent(code), mod.__dict__) + # Simple sanity check on the function's output + assert mod.func_defined_in_dynamic_module() == "initial value" + + # The function of mod is pickled two times, with two different + # values for the global variable GLOBAL_STATE. Then we launch + # a child process that sequentially unpickles the two + # functions. Those unpickle functions should share the same + # global variables in the child process: Once the first + # function gets unpickled, mod is created and tracked in the + # child environment. This is state is preserved when unpickling + # the second function whatever the global variable + # GLOBAL_STATE's value at the time of pickling. + + with open('function_with_initial_globals.pkl', 'wb') as f: + cloudpickle.dump( + mod.func_defined_in_dynamic_module, f, + override_existing_globals=override_existing_globals) + + # Change the mod's global variable + mod.GLOBAL_STATE = 'changed value' + + # At this point, mod.func_defined_in_dynamic_module() + # returns the updated value. Let's pickle it again. + assert mod.func_defined_in_dynamic_module() == 'changed value' + with open('function_with_modified_globals.pkl', 'wb') as f: + cloudpickle.dump( + mod.func_defined_in_dynamic_module, f, + override_existing_globals=override_existing_globals) + + child_process_code = """ + import pickle + + with open('function_with_initial_globals.pkl','rb') as f: + func_with_initial_globals = pickle.load(f) + + # At this point, a module called 'mod' should exist in + # _dynamic_modules_globals. Further function loading + # will use the globals living in mod. - if {override_existing_globals}: - assert func_with_modified_globals() == 'changed value' - else: - assert func_with_modified_globals() == 'initial value' + assert func_with_initial_globals() == 'initial value' - # Update the value from the child process and check that - # unpickling reset (resp. does not reset) our change if - # override_existing_globals was True (resp. False) at pickling - # time - assert func_with_initial_globals('new value') == 'new value' - assert func_with_modified_globals() == 'new value' + # Load a function with initial global variable that was + # pickled after a change in the global variable + with open('function_with_modified_globals.pkl','rb') as f: + func_with_modified_globals = pickle.load(f) + + # Verify that loading the function overrides (resp. does + # not change) the global variable GLOBAL_STATE of mod, if + # override_existing_globals was True (resp. False) at + # pickling time + + if {override_existing_globals}: + assert func_with_modified_globals() == 'changed value' + else: + assert func_with_modified_globals() == 'initial value' + + # Update the value from the child process and check that + # unpickling reset (resp. does not reset) our change if + # override_existing_globals was True (resp. False) at + # pickling time + assert ( + func_with_initial_globals('new value') == 'new value') + assert func_with_modified_globals() == 'new value' - with open('function_with_initial_globals.pkl','rb') as f: - func_with_initial_globals = pickle.load(f) + with open('function_with_initial_globals.pkl','rb') as f: + func_with_initial_globals = pickle.load(f) - if {override_existing_globals}: - assert func_with_initial_globals() == 'initial value' - assert func_with_modified_globals() == 'initial value' - else: - assert func_with_initial_globals() == 'new value' - assert func_with_modified_globals() == 'new value' + if {override_existing_globals}: + assert func_with_initial_globals() == 'initial value' + assert func_with_modified_globals() == 'initial value' + else: + assert func_with_initial_globals() == 'new value' + assert func_with_modified_globals() == 'new value' - """.format(override_existing_globals=self.override_existing_globals) - assert_run_python_script(textwrap.dedent(child_process_code)) + """.format(override_existing_globals=override_existing_globals) + assert_run_python_script(textwrap.dedent(child_process_code)) - finally: - os.unlink('function_with_initial_globals.pkl') - os.unlink('function_with_modified_globals.pkl') + finally: + os.unlink('function_with_initial_globals.pkl') + os.unlink('function_with_modified_globals.pkl') @pytest.mark.skipif(sys.version_info >= (3, 0), reason="hardcoded pickle bytes for 2.7") @@ -1350,7 +1353,6 @@ def g(x): class Protocol2CloudPickleTest(CloudPickleTest): protocol = 2 - override_existing_globals = True if __name__ == '__main__': From f9e589fbaf86bc3c61af12a7a34d4f54b1fd6ac2 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 15 Nov 2018 11:45:40 +0100 Subject: [PATCH 4/8] ENH move override_existing_globals to state --- cloudpickle/cloudpickle.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index b27595bc5..fc32fadc0 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -604,13 +604,13 @@ def save_function_tuple(self, func): 'module': func.__module__, 'name': func.__name__, 'doc': func.__doc__, + 'override_existing_globals': self.override_existing_globals } if hasattr(func, '__annotations__') and sys.version_info >= (3, 7): state['annotations'] = func.__annotations__ if hasattr(func, '__qualname__'): state['qualname'] = func.__qualname__ save(state) - save(self.override_existing_globals) write(pickle.TUPLE) write(pickle.REDUCE) # applies _fill_function on the tuple @@ -1077,14 +1077,9 @@ def _fill_function(*args): The skeleton itself is create by _make_skel_func(). """ - override_existing_globals = True if len(args) == 2: func = args[0] state = args[1] - elif len(args) == 3: - func = args[0] - state = args[1] - override_existing_globals = args[2] elif len(args) == 5: # Backwards compat for cloudpickle v0.4.0, after which the `module` # argument was introduced @@ -1102,7 +1097,7 @@ def _fill_function(*args): # This updates the variables of func's module using func's globals from the # time at which it was pickled. - if override_existing_globals: + if state.get('override_existing_globals', True): func.__globals__.update(state['globals']) else: for k, v in state['globals'].items(): From 679e976095761f48c982554466b8b448d72fee53 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 15 Nov 2018 12:18:18 +0100 Subject: [PATCH 5/8] DOC rephrase comments --- cloudpickle/cloudpickle.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index fc32fadc0..c8b8c03f2 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -276,18 +276,22 @@ def __init__(self, file, protocol=None, override_existing_globals=True): # map ids to dictionary. used to ensure that functions can share global env self.globals_ref = {} - # When unpickling a function created in a dynamic module or the __main__ - # module, its set of global variables may be different from the one from - # the module the function belongs to, for example if the dynamic module - # has had its variables mutated between pickling and unpickling time - # of the function. - - # - If set to True, the attribue override_existing_globals will - # override the global variables of the dynamic module with the ones - # from the pickled function at the function's unpickling time. - # - If set to False, the global variables from the pickled function - # will be overriden by the dynamic module global variables at the - # function's unpickling time + # Functions belonging to the same module (the __main__ or a dynamic + # module) share the same global variables. Using cloudpickle, they will + # share those global variables in the process they are unpickled in as + # well. However, the state of those global variables may differ in the + # pickled functions, for example if one global variable was mutated + # between two calls to pickle.dump. override_existing_globals is a + # switch that allows two different behaviors: + + # - If override_existing_globals=True, the global variables of the + # module containing this function will be overridden in the process + # which load the pickled object with the values of these variables at + # the pickling time. + # - If override_existing_globals=False, the global variables of the + # module containing this function will not be overridden in the process + # which load the pickled object. If the global variable is not + # declared, it will be initialized with the value at pickling time. self.override_existing_globals = override_existing_globals def dump(self, obj): From 68d91573a95a2caf5767c89c58219c2bfb745763 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 14 Jan 2019 17:43:51 +0100 Subject: [PATCH 6/8] DOC various doc moves and fixes --- cloudpickle/cloudpickle.py | 45 ++++++++++++++++++++++++-------------- tests/cloudpickle_test.py | 11 +++++----- 2 files changed, 35 insertions(+), 21 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index c8b8c03f2..5a835d1ed 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -266,6 +266,34 @@ def _walk_global_ops(code): class CloudPickler(Pickler): + """Pickler with extended pickling abilities. + + Parameters + ---------- + file : + + protocol : int + + override_existing_globals : bool + Functions belonging to the same module (the __main__ or a dynamic + module) share the same global variables. Using cloudpickle, they will + share those global variables in the process they are unpickled in as + well. However, the state of those global variables may differ in the + pickled functions, for example if one global variable was mutated + between two calls to pickle.dump. override_existing_globals is a + switch that allows two different behaviors: + + * If override_existing_globals=True, the global variables of the + module containing this function will be overridden in the process + which load the pickled object with the values of these variables at + the pickling time. + * If override_existing_globals=False, the global variables of the + module containing the definition of the nested or dynamically defined + function will not be overridden in the process loading the pickled + object. If the global variable is not declared, it will be + initialized with the value at pickling time. + + """ dispatch = Pickler.dispatch.copy() @@ -276,22 +304,6 @@ def __init__(self, file, protocol=None, override_existing_globals=True): # map ids to dictionary. used to ensure that functions can share global env self.globals_ref = {} - # Functions belonging to the same module (the __main__ or a dynamic - # module) share the same global variables. Using cloudpickle, they will - # share those global variables in the process they are unpickled in as - # well. However, the state of those global variables may differ in the - # pickled functions, for example if one global variable was mutated - # between two calls to pickle.dump. override_existing_globals is a - # switch that allows two different behaviors: - - # - If override_existing_globals=True, the global variables of the - # module containing this function will be overridden in the process - # which load the pickled object with the values of these variables at - # the pickling time. - # - If override_existing_globals=False, the global variables of the - # module containing this function will not be overridden in the process - # which load the pickled object. If the global variable is not - # declared, it will be initialized with the value at pickling time. self.override_existing_globals = override_existing_globals def dump(self, obj): @@ -1105,6 +1117,7 @@ def _fill_function(*args): func.__globals__.update(state['globals']) else: for k, v in state['globals'].items(): + # Only set global variables that do not exist. if k not in func.__globals__: func.__globals__[k] = v diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 18b4d43e8..152856eef 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1151,7 +1151,7 @@ def f1(): def test_function_from_dynamic_module_with_globals_modifications(self): # This test verifies that the global variable state of a function # defined in a dynamic module in a child process are not reset by - # subsequent uplickling. + # subsequent unpickling. for override_existing_globals in [True, False]: try: @@ -1176,9 +1176,10 @@ def func_defined_in_dynamic_module(v=None): # functions. Those unpickle functions should share the same # global variables in the child process: Once the first # function gets unpickled, mod is created and tracked in the - # child environment. This is state is preserved when unpickling - # the second function whatever the global variable - # GLOBAL_STATE's value at the time of pickling. + # child environment. If override_globals is False, then the + # module state is preserved when unpickling the second function + # whatever the global variable GLOBAL_STATE's value at the time + # of pickling. with open('function_with_initial_globals.pkl', 'wb') as f: cloudpickle.dump( @@ -1224,7 +1225,7 @@ def func_defined_in_dynamic_module(v=None): assert func_with_modified_globals() == 'initial value' # Update the value from the child process and check that - # unpickling reset (resp. does not reset) our change if + # unpickling resets (resp. does not reset) our change if # override_existing_globals was True (resp. False) at # pickling time assert ( From f6d6337df267228d6092db9a5c8c010f1fc929df Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 14 Jan 2019 17:44:45 +0100 Subject: [PATCH 7/8] CLN override_existing_globals -> override_globals --- cloudpickle/cloudpickle.py | 24 ++++++++++++------------ tests/cloudpickle_test.py | 38 +++++++++++++++++++------------------- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 5a835d1ed..0cbd00215 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -274,20 +274,20 @@ class CloudPickler(Pickler): protocol : int - override_existing_globals : bool + override_globals : bool Functions belonging to the same module (the __main__ or a dynamic module) share the same global variables. Using cloudpickle, they will share those global variables in the process they are unpickled in as well. However, the state of those global variables may differ in the pickled functions, for example if one global variable was mutated - between two calls to pickle.dump. override_existing_globals is a + between two calls to pickle.dump. override_globals is a switch that allows two different behaviors: - * If override_existing_globals=True, the global variables of the + * If override_globals=True, the global variables of the module containing this function will be overridden in the process which load the pickled object with the values of these variables at the pickling time. - * If override_existing_globals=False, the global variables of the + * If override_globals=False, the global variables of the module containing the definition of the nested or dynamically defined function will not be overridden in the process loading the pickled object. If the global variable is not declared, it will be @@ -297,14 +297,14 @@ class CloudPickler(Pickler): dispatch = Pickler.dispatch.copy() - def __init__(self, file, protocol=None, override_existing_globals=True): + def __init__(self, file, protocol=None, override_globals=True): if protocol is None: protocol = DEFAULT_PROTOCOL Pickler.__init__(self, file, protocol=protocol) # map ids to dictionary. used to ensure that functions can share global env self.globals_ref = {} - self.override_existing_globals = override_existing_globals + self.override_globals = override_globals def dump(self, obj): self.inject_addons() @@ -620,7 +620,7 @@ def save_function_tuple(self, func): 'module': func.__module__, 'name': func.__name__, 'doc': func.__doc__, - 'override_existing_globals': self.override_existing_globals + 'override_globals': self.override_globals } if hasattr(func, '__annotations__') and sys.version_info >= (3, 7): state['annotations'] = func.__annotations__ @@ -952,7 +952,7 @@ def _rebuild_tornado_coroutine(func): # Shorthands for legacy support -def dump(obj, file, protocol=None, override_existing_globals=True): +def dump(obj, file, protocol=None, override_globals=True): """Serialize obj as bytes streamed into file protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to @@ -963,10 +963,10 @@ def dump(obj, file, protocol=None, override_existing_globals=True): compatibility with older versions of Python. """ CloudPickler(file, protocol=protocol, - override_existing_globals=override_existing_globals).dump(obj) + override_globals=override_globals).dump(obj) -def dumps(obj, protocol=None, override_existing_globals=True): +def dumps(obj, protocol=None, override_globals=True): """Serialize obj as a string of bytes allocated in memory protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to @@ -979,7 +979,7 @@ def dumps(obj, protocol=None, override_existing_globals=True): file = StringIO() try: cp = CloudPickler(file, protocol=protocol, - override_existing_globals=override_existing_globals) + override_globals=override_globals) cp.dump(obj) return file.getvalue() finally: @@ -1113,7 +1113,7 @@ def _fill_function(*args): # This updates the variables of func's module using func's globals from the # time at which it was pickled. - if state.get('override_existing_globals', True): + if state.get('override_globals', True): func.__globals__.update(state['globals']) else: for k, v in state['globals'].items(): diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 152856eef..d88662b2f 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1077,7 +1077,7 @@ def f1(): cloned_f1 = {clone_func}(f1, protocol={protocol}) pickled_f1 = dumps( f1, protocol={protocol}, - override_existing_globals={override_existing_globals}) + override_globals={override_globals}) # Change the value of the global variable cloned_f0() @@ -1087,26 +1087,26 @@ def f1(): assert result_f1 == "changed_by_f0", result_f1 # Ensure that unpickling the global variable overrides (resp. does not - # change) the global variable if override_existing_globals was True + # change) the global variable if override_globals was True # (resp. False) at pickling time result_pickled_f1 = loads(pickled_f1)() - if {override_existing_globals}: + if {override_globals}: assert result_pickled_f1 == "default_value", result_pickled_f1 else: assert result_pickled_f1 == "changed_by_f0", result_pickled_f1 """ for clone_func in ['local_clone', 'subprocess_pickle_echo']: - for override_existing_globals in [True, False]: + for override_globals in [True, False]: code = code_template.format( protocol=self.protocol, clone_func=clone_func, - override_existing_globals=override_existing_globals) + override_globals=override_globals) assert_run_python_script(textwrap.dedent(code)) def test_closure_interacting_with_a_global_variable(self): global _TEST_GLOBAL_VARIABLE assert _TEST_GLOBAL_VARIABLE == "default_value" orig_value = _TEST_GLOBAL_VARIABLE - for override_existing_globals in [True, False]: + for override_globals in [True, False]: try: def f0(): global _TEST_GLOBAL_VARIABLE @@ -1117,15 +1117,15 @@ def f1(): cloned_f0 = cloudpickle.loads(cloudpickle.dumps( f0, protocol=self.protocol, - override_existing_globals=override_existing_globals)) + override_globals=override_globals)) cloned_f1 = cloudpickle.loads(cloudpickle.dumps( f1, protocol=self.protocol, - override_existing_globals=override_existing_globals)) + override_globals=override_globals)) pickled_f1 = cloudpickle.dumps( f1, protocol=self.protocol, - override_existing_globals=override_existing_globals) + override_globals=override_globals) # Change the value of the global variable cloned_f0() @@ -1138,10 +1138,10 @@ def f1(): assert f1() == result_cloned_f1 # Ensure that unpickling the global variable overrides (resp. - # does not change) its value if override_existing_globals was + # does not change) its value if override_globals was # True (resp False) at pickling time res_pickled_f1 = cloudpickle.loads(pickled_f1)() - if override_existing_globals: + if override_globals: assert res_pickled_f1 == "default_value", res_pickled_f1 else: assert res_pickled_f1 == "changed_by_f0", res_pickled_f1 @@ -1153,7 +1153,7 @@ def test_function_from_dynamic_module_with_globals_modifications(self): # defined in a dynamic module in a child process are not reset by # subsequent unpickling. - for override_existing_globals in [True, False]: + for override_globals in [True, False]: try: # first, we create a dynamic module in the parent process mod = types.ModuleType('mod') @@ -1184,7 +1184,7 @@ def func_defined_in_dynamic_module(v=None): with open('function_with_initial_globals.pkl', 'wb') as f: cloudpickle.dump( mod.func_defined_in_dynamic_module, f, - override_existing_globals=override_existing_globals) + override_globals=override_globals) # Change the mod's global variable mod.GLOBAL_STATE = 'changed value' @@ -1195,7 +1195,7 @@ def func_defined_in_dynamic_module(v=None): with open('function_with_modified_globals.pkl', 'wb') as f: cloudpickle.dump( mod.func_defined_in_dynamic_module, f, - override_existing_globals=override_existing_globals) + override_globals=override_globals) child_process_code = """ import pickle @@ -1216,17 +1216,17 @@ def func_defined_in_dynamic_module(v=None): # Verify that loading the function overrides (resp. does # not change) the global variable GLOBAL_STATE of mod, if - # override_existing_globals was True (resp. False) at + # override_globals was True (resp. False) at # pickling time - if {override_existing_globals}: + if {override_globals}: assert func_with_modified_globals() == 'changed value' else: assert func_with_modified_globals() == 'initial value' # Update the value from the child process and check that # unpickling resets (resp. does not reset) our change if - # override_existing_globals was True (resp. False) at + # override_globals was True (resp. False) at # pickling time assert ( func_with_initial_globals('new value') == 'new value') @@ -1235,14 +1235,14 @@ def func_defined_in_dynamic_module(v=None): with open('function_with_initial_globals.pkl','rb') as f: func_with_initial_globals = pickle.load(f) - if {override_existing_globals}: + if {override_globals}: assert func_with_initial_globals() == 'initial value' assert func_with_modified_globals() == 'initial value' else: assert func_with_initial_globals() == 'new value' assert func_with_modified_globals() == 'new value' - """.format(override_existing_globals=override_existing_globals) + """.format(override_globals=override_globals) assert_run_python_script(textwrap.dedent(child_process_code)) finally: From 3f32518812162269a3e32c50e2c13a640d2b5493 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Tue, 15 Jan 2019 16:59:57 +0100 Subject: [PATCH 8/8] DOC update CHANGES.md --- CHANGES.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index b7499530f..4c5a1e3c0 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,12 @@ +0.7.0 +===== + +- Add a switch in `cloudpickle.dump` speficying if variables present in the + written pickle string should override colluding values present in their new + namespace at unpickling time. + ([issue #214](https://github.com/cloudpipe/cloudpickle/issues/214)) + + 0.6.1 =====