Skip to content
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 16 additions & 10 deletions python/pyarrow/pandas_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -686,14 +686,15 @@ def dataframe_to_serialized_dict(frame):

def serialized_dict_to_dataframe(data):
import pandas.core.internals as _int
reconstructed_blocks = [_reconstruct_block(block)
ndim = len(data['axes'])
reconstructed_blocks = [_reconstruct_block(block, ndim=ndim)
for block in data['blocks']]

block_mgr = _int.BlockManager(reconstructed_blocks, data['axes'])
return _pandas_api.data_frame(block_mgr)


def _reconstruct_block(item, columns=None, extension_columns=None):
def _reconstruct_block(item, ndim, columns=None, extension_columns=None):
"""
Construct a pandas Block from the `item` dictionary coming from pyarrow's
serialization or returned by arrow::python::ConvertTableToPandas.
Expand All @@ -709,6 +710,8 @@ def _reconstruct_block(item, columns=None, extension_columns=None):
{'block': np.ndarray of values, 'placement': pandas block placement}.
Additional keys are present for other types (dictionary, timezone,
object).
ndim : int
Dimension of the Block under construction.
columns :
Column names of the table being constructed, used for extension types
extension_columns : dict
Expand All @@ -730,15 +733,18 @@ def _reconstruct_block(item, columns=None, extension_columns=None):
block_arr, categories=item['dictionary'],
ordered=item['ordered'])
block = _int.make_block(cat, placement=placement,
klass=_int.CategoricalBlock)
klass=_int.CategoricalBlock, ndim=ndim)
elif 'timezone' in item:
dtype = make_datetimetz(item['timezone'])
cls = dtype.construct_array_type()
block_arr = cls._simple_new(block_arr, dtype=dtype)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this line seems to cause the failures on latest pandas. So if the previous way was working, I would keep it that way (in addition, although pyarrow is already using private APIs here, _simple_new is even more a private implementation detail, so I would prefer not using it)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In addition, the dtype.construct_array_type() line is failing on older pandas releases. So reverting this change might solve that as well.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIRC the motivation here was to stop passing dtype to make_block, since that kwarg isnt used within pandas and AFAICT this usage is the blocker to deprecating/removing it. could be considered separate from the matter at hand, but while im in the neighborhood

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I understand.
Now, next problem: also DatetimeArray is not yet available on the older pandas versions ..

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(now we could do something else depending on the pandas version)

block = _int.make_block(block_arr, placement=placement,
klass=_int.DatetimeTZBlock,
dtype=dtype)
klass=_int.DatetimeTZBlock)
elif 'object' in item:
block = _int.make_block(builtin_pickle.loads(block_arr),
placement=placement, klass=_int.ObjectBlock)
block_arr = builtin_pickle.loads(block_arr)
block = _int.make_block(block_arr,
placement=placement,
klass=_int.ObjectBlock, ndim=ndim)
elif 'py_array' in item:
# create ExtensionBlock
arr = item['py_array']
Expand All @@ -750,9 +756,9 @@ def _reconstruct_block(item, columns=None, extension_columns=None):
"to a pandas ExtensionArray")
pd_ext_arr = pandas_dtype.__from_arrow__(arr)
block = _int.make_block(pd_ext_arr, placement=placement,
klass=_int.ExtensionBlock)
klass=_int.ExtensionBlock, ndim=ndim)
else:
block = _int.make_block(block_arr, placement=placement)
block = _int.make_block(block_arr, placement=placement, ndim=ndim)

return block

Expand Down Expand Up @@ -1130,7 +1136,7 @@ def _table_to_blocks(options, block_table, categories, extension_columns):
columns = block_table.column_names
result = pa.lib.table_to_blocks(options, block_table, categories,
list(extension_columns.keys()))
return [_reconstruct_block(item, columns, extension_columns)
return [_reconstruct_block(item, 2, columns, extension_columns)
for item in result]


Expand Down