Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 44 additions & 80 deletions python/pyarrow/serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

import numpy as np

from pyarrow import serialize_pandas, deserialize_pandas
from pyarrow.compat import builtin_pickle
from pyarrow.lib import _default_serialization_context, frombuffer

Expand Down Expand Up @@ -61,6 +60,48 @@ def _load_pickle_from_buffer(data):
_deserialize_numpy_array_pickle = _load_pickle_from_buffer


# ----------------------------------------------------------------------
# pandas-specific serialization matters

def _register_custom_pandas_handlers(context):
# ARROW-1784, faster path for pandas-only visibility

try:
import pandas as pd
except ImportError:
return

import pyarrow.pandas_compat as pdcompat

def _serialize_pandas_dataframe(obj):
return pdcompat.dataframe_to_serialized_dict(obj)

def _deserialize_pandas_dataframe(data):
return pdcompat.serialized_dict_to_dataframe(data)

def _serialize_pandas_series(obj):
return _serialize_pandas_dataframe(pd.DataFrame({obj.name: obj}))

def _deserialize_pandas_series(data):
deserialized = _deserialize_pandas_dataframe(data)
return deserialized[deserialized.columns[0]]

context.register_type(
pd.Series, 'pd.Series',
custom_serializer=_serialize_pandas_series,
custom_deserializer=_deserialize_pandas_series)

context.register_type(
pd.Index, 'pd.Index',
custom_serializer=_pickle_to_buffer,
custom_deserializer=_load_pickle_from_buffer)

context.register_type(
pd.DataFrame, 'pd.DataFrame',
custom_serializer=_serialize_pandas_dataframe,
custom_deserializer=_deserialize_pandas_dataframe)


def register_default_serialization_handlers(serialization_context):

# ----------------------------------------------------------------------
Expand Down Expand Up @@ -136,90 +177,13 @@ def _deserialize_torch_tensor(data):
# no torch
pass


register_default_serialization_handlers(_default_serialization_context)
_register_custom_pandas_handlers(serialization_context)


# ----------------------------------------------------------------------
# pandas-specific serialization matters

register_default_serialization_handlers(_default_serialization_context)

pandas_serialization_context = _default_serialization_context.clone()


def _register_pandas_arrow_handlers(context):
try:
import pandas as pd
except ImportError:
return

def _serialize_pandas_series(obj):
return serialize_pandas(pd.DataFrame({obj.name: obj}))

def _deserialize_pandas_series(data):
deserialized = deserialize_pandas(data)
return deserialized[deserialized.columns[0]]

def _serialize_pandas_dataframe(obj):
return serialize_pandas(obj)

def _deserialize_pandas_dataframe(data):
return deserialize_pandas(data)

context.register_type(
pd.Series, 'pd.Series',
custom_serializer=_serialize_pandas_series,
custom_deserializer=_deserialize_pandas_series)

context.register_type(
pd.DataFrame, 'pd.DataFrame',
custom_serializer=_serialize_pandas_dataframe,
custom_deserializer=_deserialize_pandas_dataframe)


def _register_custom_pandas_handlers(context):
# ARROW-1784, faster path for pandas-only visibility

try:
import pandas as pd
except ImportError:
return

import pyarrow.pandas_compat as pdcompat

def _serialize_pandas_dataframe(obj):
return pdcompat.dataframe_to_serialized_dict(obj)

def _deserialize_pandas_dataframe(data):
return pdcompat.serialized_dict_to_dataframe(data)

def _serialize_pandas_series(obj):
return _serialize_pandas_dataframe(pd.DataFrame({obj.name: obj}))

def _deserialize_pandas_series(data):
deserialized = _deserialize_pandas_dataframe(data)
return deserialized[deserialized.columns[0]]

context.register_type(
pd.Series, 'pd.Series',
custom_serializer=_serialize_pandas_series,
custom_deserializer=_deserialize_pandas_series)

context.register_type(
pd.Index, 'pd.Index',
custom_serializer=_pickle_to_buffer,
custom_deserializer=_load_pickle_from_buffer)

context.register_type(
pd.DataFrame, 'pd.DataFrame',
custom_serializer=_serialize_pandas_dataframe,
custom_deserializer=_deserialize_pandas_dataframe)


_register_pandas_arrow_handlers(_default_serialization_context)
_register_custom_pandas_handlers(pandas_serialization_context)


pandas_serialization_context.register_type(
np.ndarray, 'np.array',
custom_serializer=_serialize_numpy_array_pickle,
Expand Down