diff --git a/python/pyarrow/serialization.py b/python/pyarrow/serialization.py index 689ec15d329..61f2e83f319 100644 --- a/python/pyarrow/serialization.py +++ b/python/pyarrow/serialization.py @@ -21,7 +21,6 @@ import numpy as np -from pyarrow import serialize_pandas, deserialize_pandas from pyarrow.compat import builtin_pickle from pyarrow.lib import _default_serialization_context, frombuffer @@ -61,6 +60,48 @@ def _load_pickle_from_buffer(data): _deserialize_numpy_array_pickle = _load_pickle_from_buffer +# ---------------------------------------------------------------------- +# pandas-specific serialization matters + +def _register_custom_pandas_handlers(context): + # ARROW-1784, faster path for pandas-only visibility + + try: + import pandas as pd + except ImportError: + return + + import pyarrow.pandas_compat as pdcompat + + def _serialize_pandas_dataframe(obj): + return pdcompat.dataframe_to_serialized_dict(obj) + + def _deserialize_pandas_dataframe(data): + return pdcompat.serialized_dict_to_dataframe(data) + + def _serialize_pandas_series(obj): + return _serialize_pandas_dataframe(pd.DataFrame({obj.name: obj})) + + def _deserialize_pandas_series(data): + deserialized = _deserialize_pandas_dataframe(data) + return deserialized[deserialized.columns[0]] + + context.register_type( + pd.Series, 'pd.Series', + custom_serializer=_serialize_pandas_series, + custom_deserializer=_deserialize_pandas_series) + + context.register_type( + pd.Index, 'pd.Index', + custom_serializer=_pickle_to_buffer, + custom_deserializer=_load_pickle_from_buffer) + + context.register_type( + pd.DataFrame, 'pd.DataFrame', + custom_serializer=_serialize_pandas_dataframe, + custom_deserializer=_deserialize_pandas_dataframe) + + def register_default_serialization_handlers(serialization_context): # ---------------------------------------------------------------------- @@ -136,90 +177,13 @@ def _deserialize_torch_tensor(data): # no torch pass - -register_default_serialization_handlers(_default_serialization_context) + _register_custom_pandas_handlers(serialization_context) -# ---------------------------------------------------------------------- -# pandas-specific serialization matters - +register_default_serialization_handlers(_default_serialization_context) pandas_serialization_context = _default_serialization_context.clone() - -def _register_pandas_arrow_handlers(context): - try: - import pandas as pd - except ImportError: - return - - def _serialize_pandas_series(obj): - return serialize_pandas(pd.DataFrame({obj.name: obj})) - - def _deserialize_pandas_series(data): - deserialized = deserialize_pandas(data) - return deserialized[deserialized.columns[0]] - - def _serialize_pandas_dataframe(obj): - return serialize_pandas(obj) - - def _deserialize_pandas_dataframe(data): - return deserialize_pandas(data) - - context.register_type( - pd.Series, 'pd.Series', - custom_serializer=_serialize_pandas_series, - custom_deserializer=_deserialize_pandas_series) - - context.register_type( - pd.DataFrame, 'pd.DataFrame', - custom_serializer=_serialize_pandas_dataframe, - custom_deserializer=_deserialize_pandas_dataframe) - - -def _register_custom_pandas_handlers(context): - # ARROW-1784, faster path for pandas-only visibility - - try: - import pandas as pd - except ImportError: - return - - import pyarrow.pandas_compat as pdcompat - - def _serialize_pandas_dataframe(obj): - return pdcompat.dataframe_to_serialized_dict(obj) - - def _deserialize_pandas_dataframe(data): - return pdcompat.serialized_dict_to_dataframe(data) - - def _serialize_pandas_series(obj): - return _serialize_pandas_dataframe(pd.DataFrame({obj.name: obj})) - - def _deserialize_pandas_series(data): - deserialized = _deserialize_pandas_dataframe(data) - return deserialized[deserialized.columns[0]] - - context.register_type( - pd.Series, 'pd.Series', - custom_serializer=_serialize_pandas_series, - custom_deserializer=_deserialize_pandas_series) - - context.register_type( - pd.Index, 'pd.Index', - custom_serializer=_pickle_to_buffer, - custom_deserializer=_load_pickle_from_buffer) - - context.register_type( - pd.DataFrame, 'pd.DataFrame', - custom_serializer=_serialize_pandas_dataframe, - custom_deserializer=_deserialize_pandas_dataframe) - - -_register_pandas_arrow_handlers(_default_serialization_context) -_register_custom_pandas_handlers(pandas_serialization_context) - - pandas_serialization_context.register_type( np.ndarray, 'np.array', custom_serializer=_serialize_numpy_array_pickle,