From 4093b026b7727d1986561e16bffd8498c9609379 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 20 Jan 2020 18:04:38 +0100 Subject: [PATCH 01/11] FIX check the type of the entries in sys.modules Some modules such as coverage can inject some non-modules objects inside sys.modules, creating unpredictable bugs while trying to infer the module of a function. --- cloudpickle/cloudpickle.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 1d4c85cbe..a82b73f2d 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -154,7 +154,13 @@ def _whichmodule(obj, name): # Protect the iteration by using a list copy of sys.modules against dynamic # modules that trigger imports of other modules upon calls to getattr. for module_name, module in list(sys.modules.items()): - if module_name == '__main__' or module is None: + # Some modules such as coverage can inject non-module objects inside + # sys.modules + if ( + module_name == '__main__' or + module is None or + not isinstance(module, types.ModuleType) + ): continue try: if _getattribute(module, name)[0] is obj: From 8348cffe5271caacd77ddfc9b8336079ef28f91b Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Sun, 26 Jan 2020 18:15:42 +0100 Subject: [PATCH 02/11] add a test of a faulty module tricking whichmodule --- tests/cloudpickle_test.py | 44 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 8bd210abe..4daa579b5 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1048,6 +1048,50 @@ def __init__(self, x): self.assertEqual(set(weakset), {depickled1, depickled2}) + def test_non_module_object_passing_whichmodule_test(self): + # https://github.com/cloudpipe/cloudpickle/pull/326: cloudpickle should + # not try to instrospect non-modules object when trying to discover the + # module of a function/class + def func(x): + return x ** 2 + + func.__module__ = "NonModuleObject" + + class NonModuleObject(object): + def __getattr__(self, name): + # We whitelist func so that a _whichmodule(func, None) call returns + # the NonModuleObject instance if a type check on the entries + # of sys.modules is not carried out, but manipulating this + # instance thinking it really is a module later on in the + # pickling process of func errors out + if name == 'func': + return func + else: + raise ValueError + + non_module_object = NonModuleObject() + + assert func(2) == 4 + assert func is non_module_object.func + + # Any manipulation of non_module_object relying on attribute access + # will raise an Exception + with pytest.raises(ValueError): + _is_dynamic(non_module_object) + + try: + sys.modules['NonModuleObject'] = non_module_object + + func_module_name = _whichmodule(func, None) + assert func_module_name != 'NonModuleObject' + assert func_module_name is None + + depickled_func = pickle_depickle(func, protocol=self.protocol) + assert depickled_func(2) == 4 + + finally: + sys.modules.pop('NonModuleObject') + def test_faulty_module(self): for module_name in ['_missing_module', None]: class FaultyModule(object): From f746c5dbdac790ecf9284e9907c67a82946dc353 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Sun, 26 Jan 2020 18:24:28 +0100 Subject: [PATCH 03/11] also test faulty modules that are module instances --- tests/cloudpickle_test.py | 52 ++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 4daa579b5..d2b0544ce 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1093,34 +1093,40 @@ def __getattr__(self, name): sys.modules.pop('NonModuleObject') def test_faulty_module(self): - for module_name in ['_missing_module', None]: - class FaultyModule(object): - def __getattr__(self, name): - # This throws an exception while looking up within - # pickle.whichmodule or getattr(module, name, None) - raise Exception() + for base_class in (object, types.ModuleType): + for module_name in ['_missing_module', None]: + class FaultyModule(base_class): + def __getattr__(self, name): + # This throws an exception while looking up within + # pickle.whichmodule or getattr(module, name, None) + raise Exception() - class Foo(object): - __module__ = module_name + class Foo(object): + __module__ = module_name - def foo(self): - return "it works!" - - def foo(): - return "it works!" + def foo(self): + return "it works!" - foo.__module__ = module_name + def foo(): + return "it works!" - sys.modules["_faulty_module"] = FaultyModule() - try: - # Test whichmodule in save_global. - self.assertEqual(pickle_depickle(Foo()).foo(), "it works!") + foo.__module__ = module_name - # Test whichmodule in save_function. - cloned = pickle_depickle(foo, protocol=self.protocol) - self.assertEqual(cloned(), "it works!") - finally: - sys.modules.pop("_faulty_module", None) + if base_class is types.ModuleType: # noqa + faulty_module = FaultyModule('_faulty_module') + else: + faulty_module = FaultyModule() + sys.modules["_faulty_module"] = faulty_module + + try: + # Test whichmodule in save_global. + self.assertEqual(pickle_depickle(Foo()).foo(), "it works!") + + # Test whichmodule in save_function. + cloned = pickle_depickle(foo, protocol=self.protocol) + self.assertEqual(cloned(), "it works!") + finally: + sys.modules.pop("_faulty_module", None) def test_dynamic_pytest_module(self): # Test case for pull request https://github.com/cloudpipe/cloudpickle/pull/116 From 5da8b99c169c6bb8812784957fad2ed813c8a9c4 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Sun, 26 Jan 2020 18:32:53 +0100 Subject: [PATCH 04/11] missing import statement --- tests/cloudpickle_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index d2b0544ce..0fc9c4fb9 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -42,7 +42,7 @@ import cloudpickle from cloudpickle.cloudpickle import _is_dynamic from cloudpickle.cloudpickle import _make_empty_cell, cell_set -from cloudpickle.cloudpickle import _extract_class_dict +from cloudpickle.cloudpickle import _extract_class_dict, _whichmodule from .testutils import subprocess_pickle_echo from .testutils import assert_run_python_script From 773f0d114620427e94d9adeda62743be12e0cecd Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Sun, 26 Jan 2020 18:54:23 +0100 Subject: [PATCH 05/11] FIX hide func's __module__ --- tests/cloudpickle_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 0fc9c4fb9..33638c926 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1055,7 +1055,7 @@ def test_non_module_object_passing_whichmodule_test(self): def func(x): return x ** 2 - func.__module__ = "NonModuleObject" + func.__module__ = None class NonModuleObject(object): def __getattr__(self, name): From 4e82270f55c80c2e1c4ba2acdb7988b231be4f1e Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 27 Jan 2020 11:42:48 +0100 Subject: [PATCH 06/11] Update tests/cloudpickle_test.py --- tests/cloudpickle_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 33638c926..d6bc796d4 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1067,7 +1067,7 @@ def __getattr__(self, name): if name == 'func': return func else: - raise ValueError + raise AttributeError non_module_object = NonModuleObject() From 2ddf9620789a6dfd1f73445baf27aa76968f4b87 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 27 Jan 2020 11:43:52 +0100 Subject: [PATCH 07/11] Update tests/cloudpickle_test.py --- tests/cloudpickle_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index d6bc796d4..2b9e2a986 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1076,7 +1076,7 @@ def __getattr__(self, name): # Any manipulation of non_module_object relying on attribute access # will raise an Exception - with pytest.raises(ValueError): + with pytest.raises(AttributeError): _is_dynamic(non_module_object) try: From f416909b87fb74a5489013064ff4729c1cdcbbc3 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 27 Jan 2020 12:13:30 +0100 Subject: [PATCH 08/11] add more inline comments describing the issue --- tests/cloudpickle_test.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 2b9e2a986..400dccf6a 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1051,10 +1051,15 @@ def __init__(self, x): def test_non_module_object_passing_whichmodule_test(self): # https://github.com/cloudpipe/cloudpickle/pull/326: cloudpickle should # not try to instrospect non-modules object when trying to discover the - # module of a function/class + # module of a function/class. This happenened because codecov injects + # tuples (and not modules) into sys.modules, but type-checks where not + # carried out on the entries of sys.modules, causing cloupdickle to + # then error in unexpected ways def func(x): return x ** 2 + # trigger a loop during the execution of whichmodule(func) by + # explicitly setting the function's module to None func.__module__ = None class NonModuleObject(object): From bbbdfab28023f7cf10e2dbd5513929c67e923ca4 Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 27 Jan 2020 16:10:52 +0100 Subject: [PATCH 09/11] MNT update changelog --- CHANGES.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 79043afe4..0f57b66a5 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,10 @@ 1.2.3 ===== +- Fix a bug affecting cloudpickle when non-modules objects are added into + sys.modules + ([PR #326](https://github.com/cloudpipe/cloudpickle/pull/326)). + - Fix a bug when a thread imports a module while cloudpickle iterates over the module list ([PR #322](https://github.com/cloudpipe/cloudpickle/pull/322)). From 090a3b3f12c1f76a700cc8ed9e9c27c3d25aa64c Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Mon, 27 Jan 2020 17:07:39 +0100 Subject: [PATCH 10/11] DOC more test comments --- tests/cloudpickle_test.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 400dccf6a..a0ad164da 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1097,7 +1097,11 @@ def __getattr__(self, name): finally: sys.modules.pop('NonModuleObject') - def test_faulty_module(self): + def test_unrelated_faulty_module(self): + # Check that pickling a dynamically defined function or class does not + # fail when introspecting the currently loaded modules in sys.modules + # as long as those faulty modules are unrelated to the class or + # function we are currently pickling. for base_class in (object, types.ModuleType): for module_name in ['_missing_module', None]: class FaultyModule(base_class): From 2d5e60184612966bf4d85ff9bf8a30ef91cd290a Mon Sep 17 00:00:00 2001 From: Pierre Glaser Date: Tue, 28 Jan 2020 16:37:10 +0100 Subject: [PATCH 11/11] CLN typos --- tests/cloudpickle_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index a0ad164da..6394305d1 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -1052,13 +1052,13 @@ def test_non_module_object_passing_whichmodule_test(self): # https://github.com/cloudpipe/cloudpickle/pull/326: cloudpickle should # not try to instrospect non-modules object when trying to discover the # module of a function/class. This happenened because codecov injects - # tuples (and not modules) into sys.modules, but type-checks where not + # tuples (and not modules) into sys.modules, but type-checks were not # carried out on the entries of sys.modules, causing cloupdickle to # then error in unexpected ways def func(x): return x ** 2 - # trigger a loop during the execution of whichmodule(func) by + # Trigger a loop during the execution of whichmodule(func) by # explicitly setting the function's module to None func.__module__ = None