From 05b95a9a5c6503cd7f18e9109921775cef6e7e45 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 20 May 2019 19:14:30 +0200 Subject: [PATCH 01/18] MNT use the is_dynamic for both classes and functions --- cloudpickle/cloudpickle.py | 147 +++++++++++++++++++------------------ tests/cloudpickle_test.py | 2 +- 2 files changed, 76 insertions(+), 73 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 1c91021e9..67d18501b 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -123,6 +123,71 @@ def _getattribute(obj, name): return getattr(obj, name, None), None +def whichmodule(obj, name): + """Find the module an object belongs to. + + This function differs from ``pickle.whichmodule`` in two ways: + - it does not mangle the cases where obj's module is __main__ and obj was + not found in any module. + - Errors arising during module introspection are ignored, as those errors + are considered unwanted side effects. + """ + module_name = getattr(obj, '__module__', None) + if module_name is not None: + return module_name + # Protect the iteration by using a list copy of sys.modules against dynamic + # modules that trigger imports of other modules upon calls to getattr. + for module_name, module in list(sys.modules.items()): + if module_name == '__main__' or module is None: + continue + try: + if _getattribute(module, name)[0] is obj: + return module_name + except Exception: + pass + return None + + +def _is_global(obj, name=None): + """Determine if obj can be pickled as attribute of a file-backed module""" + if name is None: + name = getattr(obj, '__qualname__', None) + if name is None: + name = getattr(obj, '__name__', None) + + module_name = whichmodule(obj, name) + + if module_name is None: + # In this case, obj.__module__ is None AND obj was not found in any + # imported module. obj is thus treated as dynamic. + return False + + if module_name == "__main__": + return False + + module = sys.modules.get(module_name, None) + if module is None: + # The main reason why obj's module would not be imported is that this + # module has been dynamically created, using for example + # types.ModuleType. The other possibility is that module was removed + # from sys.modules after obj was created/imported. But this case is not + # supported, as the standard pickle does not support it either. + return False + + if _is_dynamic(module): + # module has been added to sys.modules, but it can still be dynamic. + return False + + try: + obj2, parent = _getattribute(module, name) + except AttributeError: + # obj was not found inside the module it points to + return False + if obj2 is not obj: + return False + return True + + def _make_cell_set_template_code(): """Get the Python compiler to emit LOAD_FAST(arg); STORE_DEREF @@ -392,61 +457,9 @@ def save_function(self, obj, name=None): Determines what kind of function obj is (e.g. lambda, defined at interactive prompt, etc) and handles the pickling appropriately. """ - write = self.write - - if name is None: - name = getattr(obj, '__qualname__', None) - if name is None: - name = getattr(obj, '__name__', None) - try: - # whichmodule() could fail, see - # https://bitbucket.org/gutworth/six/issues/63/importing-six-breaks-pickling - modname = pickle.whichmodule(obj, name) - except Exception: - modname = None - # print('which gives %s %s %s' % (modname, obj, name)) - try: - themodule = sys.modules[modname] - except KeyError: - # eval'd items such as namedtuple give invalid items for their function __module__ - modname = '__main__' - - if modname == '__main__': - themodule = None - - try: - lookedup_by_name, _ = _getattribute(themodule, name) - except Exception: - lookedup_by_name = None - - if themodule: - if lookedup_by_name is obj: - return self.save_global(obj, name) - - # if func is lambda, def'ed at prompt, is in main, or is nested, then - # we'll pickle the actual function object rather than simply saving a - # reference (as is done in default pickler), via save_function_tuple. - if (islambda(obj) - or getattr(obj.__code__, 'co_filename', None) == '' - or themodule is None): - self.save_function_tuple(obj) - return - else: - # func is nested - if lookedup_by_name is None or lookedup_by_name is not obj: - self.save_function_tuple(obj) - return - - if obj.__dict__: - # essentially save_reduce, but workaround needed to avoid recursion - self.save(_restore_attr) - write(pickle.MARK + pickle.GLOBAL + modname + '\n' + name + '\n') - self.memoize(obj) - self.save(obj.__dict__) - write(pickle.TUPLE + pickle.REDUCE) - else: - write(pickle.GLOBAL + modname + '\n' + name + '\n') - self.memoize(obj) + if not _is_global(obj, name=name): + return self.save_function_tuple(obj) + return Pickler.save_global(self, obj, name=name) dispatch[types.FunctionType] = save_function @@ -801,23 +814,13 @@ def save_global(self, obj, name=None, pack=struct.pack): return self.save_reduce(type, (Ellipsis,), obj=obj) elif obj is type(NotImplemented): return self.save_reduce(type, (NotImplemented,), obj=obj) - - if obj.__module__ == "__main__": - return self.save_dynamic_class(obj) - - try: - return Pickler.save_global(self, obj, name=name) - except Exception: - if obj.__module__ == "__builtin__" or obj.__module__ == "builtins": - if obj in _BUILTIN_TYPE_NAMES: - return self.save_reduce( - _builtin_type, (_BUILTIN_TYPE_NAMES[obj],), obj=obj) - - typ = type(obj) - if typ is not obj and isinstance(obj, (type, types.ClassType)): - return self.save_dynamic_class(obj) - - raise + elif obj in _BUILTIN_TYPE_NAMES: + return self.save_reduce( + _builtin_type, (_BUILTIN_TYPE_NAMES[obj],), obj=obj) + elif not _is_global(obj, name=name): + self.save_dynamic_class(obj) + else: + Pickler.save_global(self, obj, name=name) dispatch[type] = save_global dispatch[types.ClassType] = save_global diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 7f7d7dfd8..f745e6f77 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1023,7 +1023,7 @@ def __init__(self, x): self.assertEqual(set(weakset), {depickled1, depickled2}) def test_faulty_module(self): - for module_name in ['_faulty_module', '_missing_module', None]: + for module_name in ['_missing_module', None]: class FaultyModule(object): def __getattr__(self, name): # This throws an exception while looking up within From d1b96c5fdad0040333502a8cbd6e7ea81baa3536 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 22 May 2019 15:36:31 +0200 Subject: [PATCH 02/18] FIX fix is_dynamic for some builtin packages in pypy Previously, this bug was not revealed because we tried Pickle.save_global(obj) (which would succeed in these cases) before calling is_dynamic(obj.__module__), but now the order is reversed --- cloudpickle/cloudpickle.py | 25 ++++++++++++++++++++++++- tests/cloudpickle_test.py | 4 ++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 67d18501b..54cda5395 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -93,6 +93,11 @@ PY3 = True PY2 = False + if sys.implementation.name == 'pypy': + from importlib._bootstrap import _find_spec + else: + from _frozen_importlib import _find_spec + def _ensure_tracking(class_def): with _DYNAMIC_CLASS_TRACKER_LOCK: @@ -1301,7 +1306,25 @@ def _is_dynamic(module): return False if hasattr(module, '__spec__'): - return module.__spec__ is None + if module.__spec__ is not None: + return False + + # In PyPy, Some built-in modules such as _codecs can have their + # __spec__ attribute set to None despite being imported. For such + # modules, the ``_find_spec`` utility of the standard library is used. + parent_name = module.__name__.rpartition('.')[0] + if parent_name: + try: + parent = sys.modules[parent_name] + except KeyError: + msg = "parent {!r} not in sys.modules" + raise ImportError(msg.format(parent_name)) + else: + pkgpath = parent.__path__ + else: + pkgpath = None + return _find_spec(module.__name__, pkgpath, module) is None + else: # Backward compat for Python 2 import imp diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index f745e6f77..0bb3fe2a3 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -621,6 +621,10 @@ def test_is_dynamic_module(self): dynamic_module = types.ModuleType('dynamic_module') assert _is_dynamic(dynamic_module) + if sys.implementation.name == 'pypy': + import _codecs + assert not _is_dynamic(_codecs) + def test_Ellipsis(self): self.assertEqual(Ellipsis, pickle_depickle(Ellipsis, protocol=self.protocol)) From 2f24f8c92c799c56642ae9942ae9463bdb94b833 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 22 May 2019 15:51:41 +0200 Subject: [PATCH 03/18] FIX python2-3 compat --- cloudpickle/cloudpickle.py | 3 ++- tests/cloudpickle_test.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 54cda5395..a2a4c20f0 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -50,6 +50,7 @@ import opcode import operator import pickle +import platform import struct import sys import traceback @@ -93,7 +94,7 @@ PY3 = True PY2 = False - if sys.implementation.name == 'pypy': + if platform.python_implementation() == 'PyPy': from importlib._bootstrap import _find_spec else: from _frozen_importlib import _find_spec diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 0bb3fe2a3..c8190a682 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -621,7 +621,7 @@ def test_is_dynamic_module(self): dynamic_module = types.ModuleType('dynamic_module') assert _is_dynamic(dynamic_module) - if sys.implementation.name == 'pypy': + if platform.python_implementation() == 'PyPy': import _codecs assert not _is_dynamic(_codecs) From 9278bce39bdc13437ad9442a9d26b01cb5f5d621 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 22 May 2019 16:03:49 +0200 Subject: [PATCH 04/18] CLN stale code --- cloudpickle/cloudpickle.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index a2a4c20f0..d8cbdaa35 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -307,10 +307,6 @@ def cell_set(cell, value): EXTENDED_ARG = dis.EXTENDED_ARG -def islambda(func): - return getattr(func, '__name__') == '' - - _BUILTIN_TYPE_NAMES = {} for k, v in types.__dict__.items(): if type(v) is type: @@ -1094,13 +1090,6 @@ def dynamic_subimport(name, vars): return mod -# restores function attributes -def _restore_attr(obj, attr): - for key, val in attr.items(): - setattr(obj, key, val) - return obj - - def _gen_ellipsis(): return Ellipsis From df99f748cb158dc09d09cf41386c9b379ac5a588 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Thu, 23 May 2019 13:34:53 +0200 Subject: [PATCH 05/18] [ci downstream] From 7d105cc4ab08b74a08ea84326beae63cb708282a Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Thu, 23 May 2019 14:00:16 +0200 Subject: [PATCH 06/18] CLN address review comments --- cloudpickle/cloudpickle.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index d8cbdaa35..57069d408 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -93,11 +93,7 @@ string_types = (str,) PY3 = True PY2 = False - - if platform.python_implementation() == 'PyPy': - from importlib._bootstrap import _find_spec - else: - from _frozen_importlib import _find_spec + from importlib._bootstrap import _find_spec def _ensure_tracking(class_def): @@ -180,8 +176,8 @@ def _is_global(obj, name=None): # supported, as the standard pickle does not support it either. return False + # module has been added to sys.modules, but it can still be dynamic. if _is_dynamic(module): - # module has been added to sys.modules, but it can still be dynamic. return False try: @@ -189,9 +185,7 @@ def _is_global(obj, name=None): except AttributeError: # obj was not found inside the module it points to return False - if obj2 is not obj: - return False - return True + return obj2 is obj def _make_cell_set_template_code(): From 2c9bf4b0338534b0247983ed5a07cfba38c3663d Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Fri, 24 May 2019 15:55:06 +0200 Subject: [PATCH 07/18] add case where name is not None --- cloudpickle/cloudpickle.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 57069d408..652b6be22 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -813,6 +813,8 @@ def save_global(self, obj, name=None, pack=struct.pack): elif obj in _BUILTIN_TYPE_NAMES: return self.save_reduce( _builtin_type, (_BUILTIN_TYPE_NAMES[obj],), obj=obj) + elif name is not None: + Pickler.save_global(self, obj, name=name) elif not _is_global(obj, name=name): self.save_dynamic_class(obj) else: From b44c26b195683214db759a119e3b3e8a52db61af Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 29 May 2019 13:32:43 +0200 Subject: [PATCH 08/18] TST test when __reduce__ returns a string --- tests/cloudpickle_test.py | 9 +++++++++ tests/mypkg/__init__.py | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index c8190a682..8ef22770c 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1798,6 +1798,15 @@ def f(a, /, b=1): """.format(protocol=self.protocol) assert_run_python_script(textwrap.dedent(code)) + def test___reduce___returns_string(self): + # Non regression test for objects with a __reduce__ method returning a + # string, meaning "save by attribute using save_global" + from .mypkg import some_singleton + assert some_singleton.__reduce__() == "some_singleton" + depickled_singleton = pickle_depickle( + some_singleton, protocol=self.protocol) + assert depickled_singleton is some_singleton + class Protocol2CloudPickleTest(CloudPickleTest): protocol = 2 diff --git a/tests/mypkg/__init__.py b/tests/mypkg/__init__.py index fe3cc6b1d..60d5b8d28 100644 --- a/tests/mypkg/__init__.py +++ b/tests/mypkg/__init__.py @@ -4,3 +4,12 @@ def package_function(): """Function living inside a package, not a simple module""" return "hello from a package!" + + +class _SingletonClass(object): + def __reduce__(self): + # This reducer is only valid for the top level "some_singleton" object. + return "some_singleton" + + +some_singleton = _SingletonClass() From 35852d6a00ec1bd51d6b24026b54e869631f34c5 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 29 May 2019 13:52:18 +0200 Subject: [PATCH 09/18] TST rountrip function from a dynamic module --- tests/cloudpickle_test.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 8ef22770c..cf7cb8a08 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -478,6 +478,10 @@ def method(self, x): mod1, mod2 = pickle_depickle([mod, mod]) self.assertEqual(id(mod1), id(mod2)) + # Test pickling a function of mod + depickled_f = pickle_depickle(mod.f, protocol=self.protocol) + self.assertEqual(mod.f(5), depickled_f(5)) + def test_module_locals_behavior(self): # Makes sure that a local function defined in another module is # correctly serialized. This notably checks that the globals are From c18f8bfbf3dd210e194deda48296143acd2a8c0d Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 29 May 2019 14:07:46 +0200 Subject: [PATCH 10/18] TST dynamic module added to sys.modules --- tests/cloudpickle_test.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index cf7cb8a08..77fde239b 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -478,9 +478,14 @@ def method(self, x): mod1, mod2 = pickle_depickle([mod, mod]) self.assertEqual(id(mod1), id(mod2)) - # Test pickling a function of mod - depickled_f = pickle_depickle(mod.f, protocol=self.protocol) - self.assertEqual(mod.f(5), depickled_f(5)) + # Ensure proper pickling of mod's functions when module "looks" like a + # file-backed module even though it is not: + try: + sys.modules['mod'] = mod + depickled_f = pickle_depickle(mod.f, protocol=self.protocol) + self.assertEqual(mod.f(5), depickled_f(5)) + finally: + sys.modules.pop('mod', None) def test_module_locals_behavior(self): # Makes sure that a local function defined in another module is From 97b7f29e2aba182d6de443664524c65d22787fdd Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 29 May 2019 14:21:48 +0200 Subject: [PATCH 11/18] CI trigger From 20ed3dbfc5cd8dd1a7dd78f76c736ee49f1617c1 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 29 May 2019 16:03:23 +0200 Subject: [PATCH 12/18] flag untested code in _is_dynamic --- cloudpickle/cloudpickle.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 652b6be22..0086c76ad 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -1299,7 +1299,11 @@ def _is_dynamic(module): # __spec__ attribute set to None despite being imported. For such # modules, the ``_find_spec`` utility of the standard library is used. parent_name = module.__name__.rpartition('.')[0] - if parent_name: + if parent_name: # pragma: no branch + # This code handles the case where an imported package (and not + # module) remains with __spec__ set to None. It is however untested + # as no package in the PyPy stdlib has __spec__ set to None after + # it is imported. try: parent = sys.modules[parent_name] except KeyError: From 918fda933a63887d46afad38948bd85cf9d6c2e8 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 29 May 2019 16:41:42 +0200 Subject: [PATCH 13/18] FIX correct coverage pragma --- cloudpickle/cloudpickle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 0086c76ad..5ba0af91d 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -1299,7 +1299,7 @@ def _is_dynamic(module): # __spec__ attribute set to None despite being imported. For such # modules, the ``_find_spec`` utility of the standard library is used. parent_name = module.__name__.rpartition('.')[0] - if parent_name: # pragma: no branch + if parent_name: # pragma: no cover # This code handles the case where an imported package (and not # module) remains with __spec__ set to None. It is however untested # as no package in the PyPy stdlib has __spec__ set to None after From dfff9edc79012603e4b8fb9118d9800ea317b949 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 29 May 2019 16:46:31 +0200 Subject: [PATCH 14/18] [ci downstream] From 2ba23475db78dbd38ecfb0892f1916eeefe11279 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 29 May 2019 16:51:30 +0200 Subject: [PATCH 15/18] MNT make whichmodule private --- cloudpickle/cloudpickle.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 5ba0af91d..31c91903d 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -125,10 +125,10 @@ def _getattribute(obj, name): return getattr(obj, name, None), None -def whichmodule(obj, name): +def _whichmodule(obj, name): """Find the module an object belongs to. - This function differs from ``pickle.whichmodule`` in two ways: + This function differs from ``pickle._whichmodule`` in two ways: - it does not mangle the cases where obj's module is __main__ and obj was not found in any module. - Errors arising during module introspection are ignored, as those errors @@ -157,7 +157,7 @@ def _is_global(obj, name=None): if name is None: name = getattr(obj, '__name__', None) - module_name = whichmodule(obj, name) + module_name = _whichmodule(obj, name) if module_name is None: # In this case, obj.__module__ is None AND obj was not found in any From ac110939f388af1fe3235f9eb53de13fa02f8e17 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Wed, 29 May 2019 17:22:42 +0200 Subject: [PATCH 16/18] [ci downstream] From a0e310adafca3bd0790c3d4baaeb76157ebfc7b4 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Tue, 4 Jun 2019 17:50:04 +0200 Subject: [PATCH 17/18] [ci downstream] From ff61f6c8e78c39902a57347ed973247de0ae48a2 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Tue, 4 Jun 2019 18:10:07 +0200 Subject: [PATCH 18/18] DOC fix comment Co-Authored-By: Olivier Grisel --- cloudpickle/cloudpickle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 31c91903d..b3893175c 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -128,7 +128,7 @@ def _getattribute(obj, name): def _whichmodule(obj, name): """Find the module an object belongs to. - This function differs from ``pickle._whichmodule`` in two ways: + This function differs from ``pickle.whichmodule`` in two ways: - it does not mangle the cases where obj's module is __main__ and obj was not found in any module. - Errors arising during module introspection are ignored, as those errors