Skip to content

Commit

Permalink
ARROW-6556: [Python] Handle future removal of pandas SparseDataFrame
Browse files Browse the repository at this point in the history
https://issues.apache.org/jira/browse/ARROW-6556

The plan in pandas is to remove SparseDataFrame/Series in pandas 1.0. By making sure our code works with that, we can ensure that a pandas release does not break the pyarrow release that is at that moment the latest stable release.
(and this also makes it easier for me to develop on master branches of both together)

This isn't merged yet in pandas, so we can also wait until that is done to merge this PR.
I was just trying out some things in pandas and saw that the pyarrow feather tests were failing when we remove those classes.

Closes #5377 from jorisvandenbossche/ARROW-6556-pandas-sparse and squashes the following commits:

705f0a7 <Joris Van den Bossche> ARROW-6556:  Handle future removal of pandas SparseDataFrame

Authored-by: Joris Van den Bossche <[email protected]>
Signed-off-by: Wes McKinney <[email protected]>
  • Loading branch information
jorisvandenbossche authored and wesm committed Sep 14, 2019
1 parent a1eb81b commit 3e6f8d1
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 3 deletions.
3 changes: 2 additions & 1 deletion python/pyarrow/feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ def __init__(self, dest):
self.writer.open(dest)

def write(self, df):
if isinstance(df, _pandas_api.pd.SparseDataFrame):
if (_pandas_api.has_sparse
and isinstance(df, _pandas_api.pd.SparseDataFrame)):
df = df.to_dense()

if not df.columns.is_unique:
Expand Down
7 changes: 7 additions & 0 deletions python/pyarrow/pandas-shim.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ cdef class _PandasAPIShim(object):
object _data_frame, _index, _series, _categorical_type
object _datetimetz_type, _extension_array
object _array_like_types
bint has_sparse

def __init__(self):
self._tried_importing_pandas = False
Expand Down Expand Up @@ -81,6 +82,12 @@ cdef class _PandasAPIShim(object):
self._datetimetz_type = DatetimeTZDtype
self._have_pandas = True

try:
from pandas import SparseDataFrame
self.has_sparse = True
except ImportError:
self.has_sparse = False

cdef inline _check_import(self, bint raise_=True):
if self._tried_importing_pandas:
if not self._have_pandas and raise_:
Expand Down
6 changes: 4 additions & 2 deletions python/pyarrow/serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,8 @@ def _register_custom_pandas_handlers(context):
)

def _serialize_pandas_dataframe(obj):
if isinstance(obj, pd.SparseDataFrame):
if (pdcompat._pandas_api.has_sparse
and isinstance(obj, pd.SparseDataFrame)):
raise NotImplementedError(
sparse_type_error_msg.format('SparseDataFrame')
)
Expand All @@ -174,7 +175,8 @@ def _deserialize_pandas_dataframe(data):
return pdcompat.serialized_dict_to_dataframe(data)

def _serialize_pandas_series(obj):
if isinstance(obj, pd.SparseSeries):
if (pdcompat._pandas_api.has_sparse
and isinstance(obj, pd.SparseSeries)):
raise NotImplementedError(
sparse_type_error_msg.format('SparseSeries')
)
Expand Down
4 changes: 4 additions & 0 deletions python/pyarrow/tests/test_feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,11 @@ def test_filelike_objects(self):
result = read_feather(buf)
assert_frame_equal(result, df)

@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
@pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")
def test_sparse_dataframe(self):
if not hasattr(pd, 'SparseDataFrame'):
pytest.skip("version of pandas does not support SparseDataFrame")
# GH #221
data = {'A': [0, 1, 2],
'B': [1, 0, 1]}
Expand Down

0 comments on commit 3e6f8d1

Please sign in to comment.