From 59d930ea36e552d2c9ff29146abe5d4468e82753 Mon Sep 17 00:00:00 2001 From: valtron Date: Tue, 10 Mar 2020 19:27:35 -0600 Subject: [PATCH 1/7] Pickle dynamic `typing.TypeVar`s --- CHANGES.md | 3 ++ cloudpickle/cloudpickle.py | 63 +++++++++++++++++++++++++++------ cloudpickle/cloudpickle_fast.py | 10 +++--- tests/cloudpickle_test.py | 34 ++++++++++++++++++ tests/mypkg/__init__.py | 2 ++ 5 files changed, 98 insertions(+), 14 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index fdec87447..df78278e5 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -6,6 +6,9 @@ - Stop pickling the annotations of a dynamic class for Python < 3.6 (follow up on #276) ([issue #347](https://github.com/cloudpipe/cloudpickle/issues/347)) +- Fix a bug affecting the pickling of dynamic `TypeVar` instances on Python 3.7+, + and expand the support for pickling `TypeVar` instances (dynamic or non-dynamic) + to Python 3.5-3.6 ([PR #350](https://github.com/cloudpipe/cloudpickle/pull/350)) 1.3.0 ===== diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 68bd20df8..6a2ea3392 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -60,6 +60,7 @@ import weakref import uuid import threading +import typing from enum import Enum from pickle import _Pickler as Pickler @@ -118,7 +119,7 @@ def _whichmodule(obj, name): - Errors arising during module introspection are ignored, as those errors are considered unwanted side effects. """ - module_name = getattr(obj, '__module__', None) + module_name = _get_module_attr(obj) if module_name is not None: return module_name # Protect the iteration by using a copy of sys.modules against dynamic @@ -141,8 +142,22 @@ def _whichmodule(obj, name): return None -def _is_global(obj, name=None): +if sys.version_info[:2] < (3, 7): + def _get_module_attr(obj): + if isinstance(obj, typing.TypeVar): + return None + return getattr(obj, '__module__', None) +else: + def _get_module_attr(obj): + return getattr(obj, '__module__', None) + + +def _is_importable_by_name(obj, name=None): """Determine if obj can be pickled as attribute of a file-backed module""" + return _lookup_module_and_qualname(obj, name) is not None + + +def _lookup_module_and_qualname(obj, name=None): if name is None: name = getattr(obj, '__qualname__', None) if name is None: @@ -153,10 +168,10 @@ def _is_global(obj, name=None): if module_name is None: # In this case, obj.__module__ is None AND obj was not found in any # imported module. obj is thus treated as dynamic. - return False + return None if module_name == "__main__": - return False + return None module = sys.modules.get(module_name, None) if module is None: @@ -165,18 +180,20 @@ def _is_global(obj, name=None): # types.ModuleType. The other possibility is that module was removed # from sys.modules after obj was created/imported. But this case is not # supported, as the standard pickle does not support it either. - return False + return None # module has been added to sys.modules, but it can still be dynamic. if _is_dynamic(module): - return False + return None try: obj2, parent = _getattribute(module, name) except AttributeError: # obj was not found inside the module it points to - return False - return obj2 is obj + return None + if obj2 is not obj: + return None + return module, name def _extract_code_globals(co): @@ -420,6 +437,11 @@ def dump(self, obj): else: raise + def save_typevar(self, obj): + self.save_reduce(*_typevar_reduce(obj)) + + dispatch[typing.TypeVar] = save_typevar + def save_memoryview(self, obj): self.save(obj.tobytes()) @@ -469,7 +491,7 @@ def save_function(self, obj, name=None): Determines what kind of function obj is (e.g. lambda, defined at interactive prompt, etc) and handles the pickling appropriately. """ - if _is_global(obj, name=name): + if _is_importable_by_name(obj, name=name): return Pickler.save_global(self, obj, name=name) elif PYPY and isinstance(obj.__code__, builtin_code_type): return self.save_pypy_builtin_func(obj) @@ -772,7 +794,7 @@ def save_global(self, obj, name=None, pack=struct.pack): _builtin_type, (_BUILTIN_TYPE_NAMES[obj],), obj=obj) elif name is not None: Pickler.save_global(self, obj, name=name) - elif not _is_global(obj, name=name): + elif not _is_importable_by_name(obj, name=name): self.save_dynamic_class(obj) else: Pickler.save_global(self, obj, name=name) @@ -1216,3 +1238,24 @@ def _is_dynamic(module): else: pkgpath = None return _find_spec(module.__name__, pkgpath, module) is None + + +def _make_typevar(name, bound, constraints, covariant, contravariant): + return typing.TypeVar( + name, *constraints, bound=bound, + covariant=covariant, contravariant=contravariant + ) + + +def _decompose_typevar(obj): + return ( + obj.__name__, obj.__bound__, obj.__constraints__, + obj.__covariant__, obj.__contravariant__, + ) + + +def _typevar_reduce(obj): + module_and_name = _lookup_module_and_qualname(obj, obj.__name__) + if module_and_name is None: + return (_make_typevar, _decompose_typevar(obj)) + return (getattr, module_and_name) diff --git a/cloudpickle/cloudpickle_fast.py b/cloudpickle/cloudpickle_fast.py index 1ea235ca3..47e70de94 100644 --- a/cloudpickle/cloudpickle_fast.py +++ b/cloudpickle/cloudpickle_fast.py @@ -20,14 +20,15 @@ import sys import types import weakref +import typing from _pickle import Pickler from .cloudpickle import ( _is_dynamic, _extract_code_globals, _BUILTIN_TYPE_NAMES, DEFAULT_PROTOCOL, - _find_imported_submodules, _get_cell_contents, _is_global, _builtin_type, + _find_imported_submodules, _get_cell_contents, _is_importable_by_name, _builtin_type, Enum, _ensure_tracking, _make_skeleton_class, _make_skeleton_enum, - _extract_class_dict, dynamic_subimport, subimport + _extract_class_dict, dynamic_subimport, subimport, _typevar_reduce, ) load, loads = _pickle.load, _pickle.loads @@ -332,7 +333,7 @@ def _class_reduce(obj): return type, (NotImplemented,) elif obj in _BUILTIN_TYPE_NAMES: return _builtin_type, (_BUILTIN_TYPE_NAMES[obj],) - elif not _is_global(obj): + elif not _is_importable_by_name(obj): return _dynamic_class_reduce(obj) return NotImplemented @@ -422,6 +423,7 @@ class CloudPickler(Pickler): dispatch[types.MethodType] = _method_reduce dispatch[types.MappingProxyType] = _mappingproxy_reduce dispatch[weakref.WeakSet] = _weakset_reduce + dispatch[typing.TypeVar] = _typevar_reduce def __init__(self, file, protocol=None, buffer_callback=None): if protocol is None: @@ -503,7 +505,7 @@ def _function_reduce(self, obj): As opposed to cloudpickle.py, There no special handling for builtin pypy functions because cloudpickle_fast is CPython-specific. """ - if _is_global(obj): + if _is_importable_by_name(obj): return NotImplemented else: return self._dynamic_function_reduce(obj) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index d64d2001d..5489cbd0c 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -46,6 +46,7 @@ from cloudpickle.cloudpickle import _is_dynamic from cloudpickle.cloudpickle import _make_empty_cell, cell_set from cloudpickle.cloudpickle import _extract_class_dict, _whichmodule +from cloudpickle.cloudpickle import _lookup_module_and_qualname from .testutils import subprocess_pickle_echo from .testutils import assert_run_python_script @@ -2110,11 +2111,44 @@ class LocallyDefinedClass: reconstructed = pickle.loads(pickle_bytes, buffers=buffers) np.testing.assert_allclose(reconstructed.data, data_instance.data) + def test_pickle_dynamic_typevar(self): + T = typing.TypeVar('T') + pickle_depickle(T, protocol=self.protocol) + + def test_pickle_importable_typevar(self): + from .mypkg import T + T1 = pickle_depickle(T, protocol=self.protocol) + assert T1 is T + class Protocol2CloudPickleTest(CloudPickleTest): protocol = 2 +def test_lookup_module_and_qualname_dynamic_typevar(): + T = typing.TypeVar('T') + module_and_name = _lookup_module_and_qualname(T, T.__name__) + assert module_and_name is None + + +def test_lookup_module_and_qualname_importable_typevar(): + from . import mypkg + T = mypkg.T + module_and_name = _lookup_module_and_qualname(T, T.__name__) + assert module_and_name is not None + module, name = module_and_name + assert module is mypkg + assert name == 'T' + + +def test_lookup_module_and_qualname_stdlib_typevar(): + module_and_name = _lookup_module_and_qualname(typing.AnyStr, typing.AnyStr.__name__) + assert module_and_name is not None + module, name = module_and_name + assert module is typing + assert name == 'AnyStr' + + if __name__ == '__main__': unittest.main() diff --git a/tests/mypkg/__init__.py b/tests/mypkg/__init__.py index 60d5b8d28..5ef5ab486 100644 --- a/tests/mypkg/__init__.py +++ b/tests/mypkg/__init__.py @@ -1,3 +1,4 @@ +import typing from .mod import module_function @@ -13,3 +14,4 @@ def __reduce__(self): some_singleton = _SingletonClass() +T = typing.TypeVar('T') From d3fa5ea7b52e5979aaa31eec660acbce92d6e1e7 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Sat, 14 Mar 2020 12:51:31 +0100 Subject: [PATCH 2/7] Always pass name as kwarg + insert comments --- cloudpickle/cloudpickle.py | 16 +++++++++++++--- tests/cloudpickle_test.py | 7 ++++--- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 6a2ea3392..8fcfc3d83 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -143,6 +143,12 @@ def _whichmodule(obj, name): if sys.version_info[:2] < (3, 7): + # Workaround bug in old Python versions: prior to Python 3.7, T.__module__ + # would always be set to "typing" even when the TypeVar T would be defined + # in a different module. + # + # For such older Python versions, we treat all TypeVar instances as + # non-importable TypeVar instances to avoid this issue. def _get_module_attr(obj): if isinstance(obj, typing.TypeVar): return None @@ -154,13 +160,16 @@ def _get_module_attr(obj): def _is_importable_by_name(obj, name=None): """Determine if obj can be pickled as attribute of a file-backed module""" - return _lookup_module_and_qualname(obj, name) is not None + return _lookup_module_and_qualname(obj, name=name) is not None def _lookup_module_and_qualname(obj, name=None): if name is None: name = getattr(obj, '__qualname__', None) - if name is None: + if name is None: # pragma: no cover + # This used to be needed for Python 2.7 support but is probably not + # needed anymore. However we keep the __name__ introspection in case + # users of cloudpickle rely on this old behavior for unknown reasons. name = getattr(obj, '__name__', None) module_name = _whichmodule(obj, name) @@ -1255,7 +1264,8 @@ def _decompose_typevar(obj): def _typevar_reduce(obj): - module_and_name = _lookup_module_and_qualname(obj, obj.__name__) + # TypeVar instances have no __qualname__ hence we pass the name explicitly. + module_and_name = _lookup_module_and_qualname(obj, name=obj.__name__) if module_and_name is None: return (_make_typevar, _decompose_typevar(obj)) return (getattr, module_and_name) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 5489cbd0c..4aa34e4bd 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2128,14 +2128,14 @@ class Protocol2CloudPickleTest(CloudPickleTest): def test_lookup_module_and_qualname_dynamic_typevar(): T = typing.TypeVar('T') - module_and_name = _lookup_module_and_qualname(T, T.__name__) + module_and_name = _lookup_module_and_qualname(T, name=T.__name__) assert module_and_name is None def test_lookup_module_and_qualname_importable_typevar(): from . import mypkg T = mypkg.T - module_and_name = _lookup_module_and_qualname(T, T.__name__) + module_and_name = _lookup_module_and_qualname(T, name=T.__name__) assert module_and_name is not None module, name = module_and_name assert module is mypkg @@ -2143,7 +2143,8 @@ def test_lookup_module_and_qualname_importable_typevar(): def test_lookup_module_and_qualname_stdlib_typevar(): - module_and_name = _lookup_module_and_qualname(typing.AnyStr, typing.AnyStr.__name__) + module_and_name = _lookup_module_and_qualname(typing.AnyStr, + name=typing.AnyStr.__name__) assert module_and_name is not None module, name = module_and_name assert module is typing From 4e356a8b85cd31061540b49d113bdecd802bebec Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Sat, 14 Mar 2020 12:51:39 +0100 Subject: [PATCH 3/7] Cosmit --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index df78278e5..c4efa23dd 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -6,6 +6,7 @@ - Stop pickling the annotations of a dynamic class for Python < 3.6 (follow up on #276) ([issue #347](https://github.com/cloudpipe/cloudpickle/issues/347)) + - Fix a bug affecting the pickling of dynamic `TypeVar` instances on Python 3.7+, and expand the support for pickling `TypeVar` instances (dynamic or non-dynamic) to Python 3.5-3.6 ([PR #350](https://github.com/cloudpipe/cloudpickle/pull/350)) From 7ac7a66193f2973d0de03bdf3065d7a70ec0fd7f Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Sat, 14 Mar 2020 13:50:08 +0100 Subject: [PATCH 4/7] Fix _get_module_attr comment --- cloudpickle/cloudpickle.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 8fcfc3d83..5f0812dd4 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -147,8 +147,9 @@ def _whichmodule(obj, name): # would always be set to "typing" even when the TypeVar T would be defined # in a different module. # - # For such older Python versions, we treat all TypeVar instances as - # non-importable TypeVar instances to avoid this issue. + # For such older Python versions, we ignore the __module__ attribute of + # TypeVar instances and instead exhaustively lookup those instances in all + # currently imported modules via the _whichmodule function. def _get_module_attr(obj): if isinstance(obj, typing.TypeVar): return None From fcfb2d55e1368ff8ddee0d081ced6e5cac6b570a Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Sat, 14 Mar 2020 13:53:43 +0100 Subject: [PATCH 5/7] test for attribute equality of roundtripped typevars --- tests/cloudpickle_test.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 4aa34e4bd..aa64f5740 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -2113,13 +2113,23 @@ class LocallyDefinedClass: def test_pickle_dynamic_typevar(self): T = typing.TypeVar('T') - pickle_depickle(T, protocol=self.protocol) + depickled_T = pickle_depickle(T, protocol=self.protocol) + attr_list = [ + "__name__", "__bound__", "__constraints__", "__covariant__", + "__contravariant__" + ] + for attr in attr_list: + assert getattr(T, attr) == getattr(depickled_T, attr) def test_pickle_importable_typevar(self): from .mypkg import T T1 = pickle_depickle(T, protocol=self.protocol) assert T1 is T + # Standard Library TypeVar + from typing import AnyStr + assert AnyStr is pickle_depickle(AnyStr, protocol=self.protocol) + class Protocol2CloudPickleTest(CloudPickleTest): From 8a7d319cb4ec711d08d81b6ce688aa4ce85efcdd Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Sat, 14 Mar 2020 14:03:23 +0100 Subject: [PATCH 6/7] flag version specific code with coverage pragmas --- cloudpickle/cloudpickle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 5f0812dd4..5c143963d 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -142,7 +142,7 @@ def _whichmodule(obj, name): return None -if sys.version_info[:2] < (3, 7): +if sys.version_info[:2] < (3, 7): # pragma: no branch # Workaround bug in old Python versions: prior to Python 3.7, T.__module__ # would always be set to "typing" even when the TypeVar T would be defined # in a different module. From 122ed0e826de13836053071a57c7dc96e7e67ff6 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Sat, 14 Mar 2020 14:28:17 +0100 Subject: [PATCH 7/7] CI trigger