Skip to content

Commit 1b98fb3

Browse files
committed
serialize itemgetter/attrgetter in portable ways
1 parent 3c1dbfe commit 1b98fb3

File tree

1 file changed

+33
-46
lines changed

1 file changed

+33
-46
lines changed

python/pyspark/cloudpickle.py

Lines changed: 33 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -70,20 +70,6 @@
7070
import logging
7171
cloudLog = logging.getLogger("Cloud.Transport")
7272

73-
if PyImp == "CPython":
74-
try:
75-
import ctypes
76-
except (MemoryError, ImportError):
77-
logging.warning('Exception raised on importing ctypes. Likely python bug.. some functionality will be disabled', exc_info = True)
78-
ctypes = None
79-
PyObject_HEAD = None
80-
else:
81-
82-
# for reading internal structures
83-
PyObject_HEAD = [
84-
('ob_refcnt', ctypes.c_size_t),
85-
('ob_type', ctypes.c_void_p),
86-
]
8773

8874
if PyImp == "PyPy":
8975
# register builtin type in `new`
@@ -232,6 +218,8 @@ def save_function(self, obj, name=None, pack=struct.pack):
232218

233219
if themodule:
234220
self.modules.add(themodule)
221+
if getattr(themodule, name, None) is obj:
222+
return self.save_global(obj, name)
235223

236224
if not self.savedDjangoEnv:
237225
#hack for django - if we detect the settings module, we transport it
@@ -402,6 +390,12 @@ def get_contents(cell):
402390

403391
return (code, f_globals, defaults, closure, dct, base_globals)
404392

393+
def save_builtin_function(self, obj):
394+
if obj.__module__ is "__builtin__":
395+
return self.save_global(obj)
396+
return self.save_function(obj)
397+
dispatch[types.BuiltinFunctionType] = save_builtin_function
398+
405399
def save_global(self, obj, name=None, pack=struct.pack):
406400
write = self.write
407401
memo = self.memo
@@ -486,7 +480,6 @@ def save_global(self, obj, name=None, pack=struct.pack):
486480
write(pickle.GLOBAL + modname + '\n' + name + '\n')
487481
self.memoize(obj)
488482
dispatch[types.ClassType] = save_global
489-
dispatch[types.BuiltinFunctionType] = save_global
490483
dispatch[types.TypeType] = save_global
491484

492485
def save_instancemethod(self, obj):
@@ -556,37 +549,31 @@ def save_property(self, obj):
556549
self.save_reduce(property, (obj.fget, obj.fset, obj.fdel, obj.__doc__), obj=obj)
557550
dispatch[property] = save_property
558551

559-
if PyImp == "CPython":
560-
def save_itemgetter(self, obj):
561-
"""itemgetter serializer (needed for namedtuple support)
562-
a bit of a pain as we need to read ctypes internals"""
563-
class ItemGetterType(ctypes.Structure):
564-
_fields_ = PyObject_HEAD + [
565-
('nitems', ctypes.c_size_t),
566-
('item', ctypes.py_object)
567-
]
568-
569-
obj = ctypes.cast(ctypes.c_void_p(id(obj)), ctypes.POINTER(ItemGetterType)).contents
570-
return self.save_reduce(operator.itemgetter,
571-
obj.item if obj.nitems > 1 else (obj.item,))
572-
573-
if PyObject_HEAD:
574-
dispatch[operator.itemgetter] = save_itemgetter
575-
576-
def save_attrgetter(self, obj):
577-
"""attrgetter serializer"""
578-
class AttrGetterType(ctypes.Structure):
579-
_fields_ = PyObject_HEAD + [
580-
('nattrs', ctypes.c_size_t),
581-
('attr', ctypes.py_object)
582-
]
583-
584-
obj = ctypes.cast(ctypes.c_void_p(id(obj)), ctypes.POINTER(AttrGetterType)).contents
585-
return self.save_reduce(operator.attrgetter,
586-
obj.attr if obj.nattrs > 1 else (obj.attr,))
587-
588-
if PyObject_HEAD:
589-
dispatch[operator.attrgetter] = save_attrgetter
552+
def save_itemgetter(self, obj):
553+
"""itemgetter serializer (needed for namedtuple support)"""
554+
class Dummy:
555+
def __getitem__(self, item):
556+
return item
557+
items = obj(Dummy())
558+
if not isinstance(items, tuple):
559+
items = (items, )
560+
return self.save_reduce(operator.itemgetter, items)
561+
562+
if type(operator.itemgetter) is type:
563+
dispatch[operator.itemgetter] = save_itemgetter
564+
565+
def save_attrgetter(self, obj):
566+
"""attrgetter serializer"""
567+
class Dummy:
568+
def __getattr__(self, item):
569+
return item
570+
items = obj(Dummy())
571+
if not isinstance(items, tuple):
572+
items = (items, )
573+
return self.save_reduce(operator.attrgetter, items)
574+
575+
if type(operator.attrgetter) is type:
576+
dispatch[operator.attrgetter] = save_attrgetter
590577

591578
def save_reduce(self, func, args, state=None,
592579
listitems=None, dictitems=None, obj=None):

0 commit comments

Comments
 (0)