Skip to content
Closed
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 24 additions & 10 deletions python/pyarrow/pandas_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -686,14 +686,15 @@ def dataframe_to_serialized_dict(frame):

def serialized_dict_to_dataframe(data):
import pandas.core.internals as _int
reconstructed_blocks = [_reconstruct_block(block)
ndim = len(data['axes'])
reconstructed_blocks = [_reconstruct_block(block, ndim=ndim)
for block in data['blocks']]

block_mgr = _int.BlockManager(reconstructed_blocks, data['axes'])
return _pandas_api.data_frame(block_mgr)


def _reconstruct_block(item, columns=None, extension_columns=None):
def _reconstruct_block(item, ndim, columns=None, extension_columns=None):
"""
Construct a pandas Block from the `item` dictionary coming from pyarrow's
serialization or returned by arrow::python::ConvertTableToPandas.
Expand All @@ -709,6 +710,8 @@ def _reconstruct_block(item, columns=None, extension_columns=None):
{'block': np.ndarray of values, 'placement': pandas block placement}.
Additional keys are present for other types (dictionary, timezone,
object).
ndim : int
Dimension of the Block under construction.
columns :
Column names of the table being constructed, used for extension types
extension_columns : dict
Expand All @@ -730,15 +733,26 @@ def _reconstruct_block(item, columns=None, extension_columns=None):
block_arr, categories=item['dictionary'],
ordered=item['ordered'])
block = _int.make_block(cat, placement=placement,
klass=_int.CategoricalBlock)
klass=_int.CategoricalBlock, ndim=ndim)
elif 'timezone' in item:
dtype = make_datetimetz(item['timezone'])
# TODO: once older pandas is dropped, use dtype.construct_array_type()
# instead of hard-coding DatetimeArray
try:
from pandas.arrays import DatetimeArray
block_arr = DatetimeArray(block_arr, dtype=dtype)
except ImportError:
# older pandas versions
from pandas import DatetimeIndex
block_arr = DatetimeIndex(block_arr, dtype=dtype)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is also failing on older pandas. I assume because DatetimeArray and DatetimeIndex might be interpreting datetime64 differently? (wall time vs unix time? didn't check, but I remember discussions about it)


block = _int.make_block(block_arr, placement=placement,
klass=_int.DatetimeTZBlock,
dtype=dtype)
klass=_int.DatetimeTZBlock)
elif 'object' in item:
block = _int.make_block(builtin_pickle.loads(block_arr),
placement=placement, klass=_int.ObjectBlock)
block_arr = builtin_pickle.loads(block_arr)
block = _int.make_block(block_arr,
placement=placement,
klass=_int.ObjectBlock, ndim=ndim)
elif 'py_array' in item:
# create ExtensionBlock
arr = item['py_array']
Expand All @@ -750,9 +764,9 @@ def _reconstruct_block(item, columns=None, extension_columns=None):
"to a pandas ExtensionArray")
pd_ext_arr = pandas_dtype.__from_arrow__(arr)
block = _int.make_block(pd_ext_arr, placement=placement,
klass=_int.ExtensionBlock)
klass=_int.ExtensionBlock, ndim=ndim)
else:
block = _int.make_block(block_arr, placement=placement)
block = _int.make_block(block_arr, placement=placement, ndim=ndim)

return block

Expand Down Expand Up @@ -1130,7 +1144,7 @@ def _table_to_blocks(options, block_table, categories, extension_columns):
columns = block_table.column_names
result = pa.lib.table_to_blocks(options, block_table, categories,
list(extension_columns.keys()))
return [_reconstruct_block(item, columns, extension_columns)
return [_reconstruct_block(item, 2, columns, extension_columns)
for item in result]


Expand Down