ARROW-6556: [Python] Handle future removal of pandas SparseDataFrame

https://issues.apache.org/jira/browse/ARROW-6556 The plan in pandas is to remove SparseDataFrame/Series in pandas 1.0. By making sure our code works with that, we can ensure that a pandas release does not break the pyarrow release that is at that moment the latest stable release. (and this also makes it easier for me to develop on master branches of both together) This isn't merged yet in pandas, so we can also wait until that is done to merge this PR. I was just trying out some things in pandas and saw that the pyarrow feather tests were failing when we remove those classes. Closes #5377 from jorisvandenbossche/ARROW-6556-pandas-sparse and squashes the following commits: 705f0a7 <Joris Van den Bossche> ARROW-6556: Handle future removal of pandas SparseDataFrame Authored-by: Joris Van den Bossche <[email protected]> Signed-off-by: Wes McKinney <[email protected]>
apache · Sep 14, 2019 · 3e6f8d1 · 3e6f8d1
1 parent a1eb81b
commit 3e6f8d1
Show file tree

Hide file tree

Showing 4 changed files with 17 additions and 3 deletions.
diff --git a/python/pyarrow/feather.py b/python/pyarrow/feather.py
@@ -82,7 +82,8 @@ def __init__(self, dest):
         self.writer.open(dest)
 
     def write(self, df):
-        if isinstance(df, _pandas_api.pd.SparseDataFrame):
+        if (_pandas_api.has_sparse
+                and isinstance(df, _pandas_api.pd.SparseDataFrame)):
             df = df.to_dense()
 
         if not df.columns.is_unique:

diff --git a/python/pyarrow/pandas-shim.pxi b/python/pyarrow/pandas-shim.pxi
@@ -32,6 +32,7 @@ cdef class _PandasAPIShim(object):
         object _data_frame, _index, _series, _categorical_type
         object _datetimetz_type, _extension_array
         object _array_like_types
+        bint has_sparse
 
     def __init__(self):
         self._tried_importing_pandas = False
@@ -81,6 +82,12 @@ cdef class _PandasAPIShim(object):
         self._datetimetz_type = DatetimeTZDtype
         self._have_pandas = True
 
+        try:
+            from pandas import SparseDataFrame
+            self.has_sparse = True
+        except ImportError:
+            self.has_sparse = False
+
     cdef inline _check_import(self, bint raise_=True):
         if self._tried_importing_pandas:
             if not self._have_pandas and raise_:

diff --git a/python/pyarrow/serialization.py b/python/pyarrow/serialization.py
@@ -163,7 +163,8 @@ def _register_custom_pandas_handlers(context):
     )
 
     def _serialize_pandas_dataframe(obj):
-        if isinstance(obj, pd.SparseDataFrame):
+        if (pdcompat._pandas_api.has_sparse
+                and isinstance(obj, pd.SparseDataFrame)):
             raise NotImplementedError(
                 sparse_type_error_msg.format('SparseDataFrame')
             )
@@ -174,7 +175,8 @@ def _deserialize_pandas_dataframe(data):
         return pdcompat.serialized_dict_to_dataframe(data)
 
     def _serialize_pandas_series(obj):
-        if isinstance(obj, pd.SparseSeries):
+        if (pdcompat._pandas_api.has_sparse
+                and isinstance(obj, pd.SparseSeries)):
             raise NotImplementedError(
                 sparse_type_error_msg.format('SparseSeries')
             )

diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
@@ -515,7 +515,11 @@ def test_filelike_objects(self):
         result = read_feather(buf)
         assert_frame_equal(result, df)
 
+    @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
+    @pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")
     def test_sparse_dataframe(self):
+        if not hasattr(pd, 'SparseDataFrame'):
+            pytest.skip("version of pandas does not support SparseDataFrame")
         # GH #221
         data = {'A': [0, 1, 2],
                 'B': [1, 0, 1]}