diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py index 935f495d7c8..708c9b35bf4 100644 --- a/python/pyarrow/pandas_compat.py +++ b/python/pyarrow/pandas_compat.py @@ -686,14 +686,15 @@ def dataframe_to_serialized_dict(frame): def serialized_dict_to_dataframe(data): import pandas.core.internals as _int - reconstructed_blocks = [_reconstruct_block(block) + ndim = len(data['axes']) + reconstructed_blocks = [_reconstruct_block(block, ndim=ndim) for block in data['blocks']] block_mgr = _int.BlockManager(reconstructed_blocks, data['axes']) return _pandas_api.data_frame(block_mgr) -def _reconstruct_block(item, columns=None, extension_columns=None): +def _reconstruct_block(item, ndim, columns=None, extension_columns=None): """ Construct a pandas Block from the `item` dictionary coming from pyarrow's serialization or returned by arrow::python::ConvertTableToPandas. @@ -709,6 +710,8 @@ def _reconstruct_block(item, columns=None, extension_columns=None): {'block': np.ndarray of values, 'placement': pandas block placement}. Additional keys are present for other types (dictionary, timezone, object). + ndim : int + Dimension of the Block under construction. columns : Column names of the table being constructed, used for extension types extension_columns : dict @@ -730,15 +733,26 @@ def _reconstruct_block(item, columns=None, extension_columns=None): block_arr, categories=item['dictionary'], ordered=item['ordered']) block = _int.make_block(cat, placement=placement, - klass=_int.CategoricalBlock) + klass=_int.CategoricalBlock, ndim=ndim) elif 'timezone' in item: dtype = make_datetimetz(item['timezone']) + # TODO: once older pandas is dropped, use dtype.construct_array_type() + # instead of hard-coding DatetimeArray + try: + from pandas.arrays import DatetimeArray + block_arr = DatetimeArray(block_arr, dtype=dtype) + except ImportError: + # older pandas versions + from pandas import DatetimeIndex + block_arr = DatetimeIndex(block_arr).tz_localize(dtype.tz) + block = _int.make_block(block_arr, placement=placement, - klass=_int.DatetimeTZBlock, - dtype=dtype) + klass=_int.DatetimeTZBlock) elif 'object' in item: - block = _int.make_block(builtin_pickle.loads(block_arr), - placement=placement, klass=_int.ObjectBlock) + block_arr = builtin_pickle.loads(block_arr) + block = _int.make_block(block_arr, + placement=placement, + klass=_int.ObjectBlock, ndim=ndim) elif 'py_array' in item: # create ExtensionBlock arr = item['py_array'] @@ -750,9 +764,9 @@ def _reconstruct_block(item, columns=None, extension_columns=None): "to a pandas ExtensionArray") pd_ext_arr = pandas_dtype.__from_arrow__(arr) block = _int.make_block(pd_ext_arr, placement=placement, - klass=_int.ExtensionBlock) + klass=_int.ExtensionBlock, ndim=ndim) else: - block = _int.make_block(block_arr, placement=placement) + block = _int.make_block(block_arr, placement=placement, ndim=ndim) return block @@ -1130,7 +1144,7 @@ def _table_to_blocks(options, block_table, categories, extension_columns): columns = block_table.column_names result = pa.lib.table_to_blocks(options, block_table, categories, list(extension_columns.keys())) - return [_reconstruct_block(item, columns, extension_columns) + return [_reconstruct_block(item, 2, columns, extension_columns) for item in result]