Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion python/pyspark/cloudpickle/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,8 @@
from pyspark.cloudpickle.cloudpickle import * # noqa
from pyspark.cloudpickle.cloudpickle_fast import CloudPickler, dumps, dump # noqa

__version__ = '1.5.0'
# Conform to the convention used by python serialization libraries, which
# expose their Pickler subclass at top-level under the "Pickler" name.
Pickler = CloudPickler

__version__ = '1.6.0'
22 changes: 17 additions & 5 deletions python/pyspark/cloudpickle/cloudpickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def g():
DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL

# Track the provenance of reconstructed dynamic classes to make it possible to
# reconstruct instances from the matching singleton class definition when
# recontruct instances from the matching singleton class definition when
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo?

# appropriate and preserve the usual "isinstance" semantics of Python objects.
_DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary()
_DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary()
Expand Down Expand Up @@ -236,7 +236,7 @@ def _extract_code_globals(co):
out_names = {names[oparg] for _, oparg in _walk_global_ops(co)}

# Declaring a function inside another one using the "def ..."
# syntax generates a constant code object corresponding to the one
# syntax generates a constant code object corresonding to the one
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Its a copy from cloudpickle. I think I would just keep it as is.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cloudpipe/cloudpickle#406. Let's fix it back when we upgrade next time.

# of the nested function's As the nested function may itself need
# global variables, we need to introspect its code, extract its
# globals, (look for code object in it's co_consts attribute..) and
Expand Down Expand Up @@ -457,7 +457,7 @@ def _is_parametrized_type_hint(obj):
is_typing = getattr(obj, '__origin__', None) is not None

# typing_extensions.Literal
is_literal = getattr(obj, '__values__', None) is not None
is_litteral = getattr(obj, '__values__', None) is not None

# typing_extensions.Final
is_final = getattr(obj, '__type__', None) is not None
Expand All @@ -469,7 +469,7 @@ def _is_parametrized_type_hint(obj):
getattr(obj, '__result__', None) is not None and
getattr(obj, '__args__', None) is not None
)
return any((is_typing, is_literal, is_final, is_union, is_tuple,
return any((is_typing, is_litteral, is_final, is_union, is_tuple,
is_callable))

def _create_parametrized_type_hint(origin, args):
Expand Down Expand Up @@ -699,7 +699,7 @@ def _make_skel_func(code, cell_count, base_globals=None):
"""
# This function is deprecated and should be removed in cloudpickle 1.7
warnings.warn(
"A pickle file created using an old (<=1.4.1) version of cloudpickle "
"A pickle file created using an old (<=1.4.1) version of cloudpicke "
"is currently being loaded. This is not supported by cloudpickle and "
"will break in cloudpickle 1.7", category=UserWarning
)
Expand Down Expand Up @@ -828,3 +828,15 @@ def _get_bases(typ):
# For regular class objects
bases_attr = '__bases__'
return getattr(typ, bases_attr)


def _make_dict_keys(obj):
return dict.fromkeys(obj).keys()


def _make_dict_values(obj):
return {i: _ for i, _ in enumerate(obj)}.values()


def _make_dict_items(obj):
return obj.items()
37 changes: 30 additions & 7 deletions python/pyspark/cloudpickle/cloudpickle_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
is only available for Python versions 3.8+, a lot of backward-compatibility
code is also removed.

Note that the C Pickler subclassing API is CPython-specific. Therefore, some
Note that the C Pickler sublassing API is CPython-specific. Therefore, some
guards present in cloudpickle.py that were written to handle PyPy specificities
are not present in cloudpickle_fast.py
"""
import _collections_abc
import abc
import copyreg
import io
Expand All @@ -33,8 +34,8 @@
_typevar_reduce, _get_bases, _make_cell, _make_empty_cell, CellType,
_is_parametrized_type_hint, PYPY, cell_set,
parametrized_type_hint_getinitargs, _create_parametrized_type_hint,
builtin_code_type

builtin_code_type,
_make_dict_keys, _make_dict_values, _make_dict_items,
)


Expand Down Expand Up @@ -179,7 +180,7 @@ def _class_getstate(obj):
clsdict.pop('__weakref__', None)

if issubclass(type(obj), abc.ABCMeta):
# If obj is an instance of an ABCMeta subclass, don't pickle the
# If obj is an instance of an ABCMeta subclass, dont pickle the
# cache/negative caches populated during isinstance/issubclass
# checks, but pickle the list of registered subclasses of obj.
clsdict.pop('_abc_cache', None)
Expand Down Expand Up @@ -400,14 +401,32 @@ def _class_reduce(obj):
return NotImplemented


def _dict_keys_reduce(obj):
# Safer not to ship the full dict as sending the rest might
# be unintended and could potentially cause leaking of
# sensitive information
return _make_dict_keys, (list(obj), )


def _dict_values_reduce(obj):
# Safer not to ship the full dict as sending the rest might
# be unintended and could potentially cause leaking of
# sensitive information
return _make_dict_values, (list(obj), )


def _dict_items_reduce(obj):
return _make_dict_items, (dict(obj), )


# COLLECTIONS OF OBJECTS STATE SETTERS
# ------------------------------------
# state setters are called at unpickling time, once the object is created and
# it has to be updated to how it was at unpickling time.


def _function_setstate(obj, state):
"""Update the state of a dynamic function.
"""Update the state of a dynaamic function.

As __closure__ and __globals__ are readonly attributes of a function, we
cannot rely on the native setstate routine of pickle.load_build, that calls
Expand Down Expand Up @@ -473,6 +492,10 @@ class CloudPickler(Pickler):
_dispatch_table[types.MappingProxyType] = _mappingproxy_reduce
_dispatch_table[weakref.WeakSet] = _weakset_reduce
_dispatch_table[typing.TypeVar] = _typevar_reduce
_dispatch_table[_collections_abc.dict_keys] = _dict_keys_reduce
_dispatch_table[_collections_abc.dict_values] = _dict_values_reduce
_dispatch_table[_collections_abc.dict_items] = _dict_items_reduce


dispatch_table = ChainMap(_dispatch_table, copyreg.dispatch_table)

Expand Down Expand Up @@ -556,7 +579,7 @@ def dump(self, obj):
# `dispatch` attribute. Earlier versions of the protocol 5 CloudPickler
# used `CloudPickler.dispatch` as a class-level attribute storing all
# reducers implemented by cloudpickle, but the attribute name was not a
# great choice given the meaning of `CloudPickler.dispatch` when
# great choice given the meaning of `Cloudpickler.dispatch` when
# `CloudPickler` extends the pure-python pickler.
dispatch = dispatch_table

Expand Down Expand Up @@ -630,7 +653,7 @@ def reducer_override(self, obj):
return self._function_reduce(obj)
else:
# fallback to save_global, including the Pickler's
# dispatch_table
# distpatch_table
return NotImplemented

else:
Expand Down