Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-6556: [Python] Handle future removal of pandas SparseDataFrame #5377

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion python/pyarrow/feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ def __init__(self, dest):
self.writer.open(dest)

def write(self, df):
if isinstance(df, _pandas_api.pd.SparseDataFrame):
if (_pandas_api.has_sparse
and isinstance(df, _pandas_api.pd.SparseDataFrame)):
df = df.to_dense()

if not df.columns.is_unique:
Expand Down
7 changes: 7 additions & 0 deletions python/pyarrow/pandas-shim.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ cdef class _PandasAPIShim(object):
object _data_frame, _index, _series, _categorical_type
object _datetimetz_type, _extension_array
object _array_like_types
bint has_sparse

def __init__(self):
self._tried_importing_pandas = False
Expand Down Expand Up @@ -81,6 +82,12 @@ cdef class _PandasAPIShim(object):
self._datetimetz_type = DatetimeTZDtype
self._have_pandas = True

try:
from pandas import SparseDataFrame
self.has_sparse = True
except ImportError:
self.has_sparse = False

cdef inline _check_import(self, bint raise_=True):
if self._tried_importing_pandas:
if not self._have_pandas and raise_:
Expand Down
6 changes: 4 additions & 2 deletions python/pyarrow/serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,8 @@ def _register_custom_pandas_handlers(context):
)

def _serialize_pandas_dataframe(obj):
if isinstance(obj, pd.SparseDataFrame):
if (pdcompat._pandas_api.has_sparse
and isinstance(obj, pd.SparseDataFrame)):
raise NotImplementedError(
sparse_type_error_msg.format('SparseDataFrame')
)
Expand All @@ -174,7 +175,8 @@ def _deserialize_pandas_dataframe(data):
return pdcompat.serialized_dict_to_dataframe(data)

def _serialize_pandas_series(obj):
if isinstance(obj, pd.SparseSeries):
if (pdcompat._pandas_api.has_sparse
and isinstance(obj, pd.SparseSeries)):
raise NotImplementedError(
sparse_type_error_msg.format('SparseSeries')
)
Expand Down
4 changes: 4 additions & 0 deletions python/pyarrow/tests/test_feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,11 @@ def test_filelike_objects(self):
result = read_feather(buf)
assert_frame_equal(result, df)

@pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
@pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")
def test_sparse_dataframe(self):
if not hasattr(pd, 'SparseDataFrame'):
pytest.skip("version of pandas does not support SparseDataFrame")
# GH #221
data = {'A': [0, 1, 2],
'B': [1, 0, 1]}
Expand Down