Skip to content

Commit

Permalink
ARROW-6556: [Python] Handle future removal of pandas SparseDataFrame
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche committed Sep 13, 2019
1 parent 5bc0fda commit 705f0a7
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 3 deletions.
3 changes: 2 additions & 1 deletion python/pyarrow/feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ def __init__(self, dest):
self.writer.open(dest)

def write(self, df):
if isinstance(df, _pandas_api.pd.SparseDataFrame):
if (_pandas_api.has_sparse
and isinstance(df, _pandas_api.pd.SparseDataFrame)):
df = df.to_dense()

if not df.columns.is_unique:
Expand Down
7 changes: 7 additions & 0 deletions python/pyarrow/pandas-shim.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ cdef class _PandasAPIShim(object):
object _data_frame, _index, _series, _categorical_type
object _datetimetz_type, _extension_array
object _array_like_types
bint has_sparse

def __init__(self):
self._tried_importing_pandas = False
Expand Down Expand Up @@ -81,6 +82,12 @@ cdef class _PandasAPIShim(object):
self._datetimetz_type = DatetimeTZDtype
self._have_pandas = True

try:
from pandas import SparseDataFrame
self.has_sparse = True
except ImportError:
self.has_sparse = False

cdef inline _check_import(self, bint raise_=True):
if self._tried_importing_pandas:
if not self._have_pandas and raise_:
Expand Down
6 changes: 4 additions & 2 deletions python/pyarrow/serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,8 @@ def _register_custom_pandas_handlers(context):
)

def _serialize_pandas_dataframe(obj):
if isinstance(obj, pd.SparseDataFrame):
if (pdcompat._pandas_api.has_sparse
and isinstance(obj, pd.SparseDataFrame)):
raise NotImplementedError(
sparse_type_error_msg.format('SparseDataFrame')
)
Expand All @@ -174,7 +175,8 @@ def _deserialize_pandas_dataframe(data):
return pdcompat.serialized_dict_to_dataframe(data)

def _serialize_pandas_series(obj):
if isinstance(obj, pd.SparseSeries):
if (pdcompat._pandas_api.has_sparse
and isinstance(obj, pd.SparseSeries)):
raise NotImplementedError(
sparse_type_error_msg.format('SparseSeries')
)
Expand Down
4 changes: 4 additions & 0 deletions python/pyarrow/tests/test_feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,11 @@ def test_filelike_objects(self):
result = read_feather(buf)
assert_frame_equal(result, df)

@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
@pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")
def test_sparse_dataframe(self):
if not hasattr(pd, 'SparseDataFrame'):
pytest.skip("version of pandas does not support SparseDataFrame")
# GH #221
data = {'A': [0, 1, 2],
'B': [1, 0, 1]}
Expand Down

0 comments on commit 705f0a7

Please sign in to comment.