Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -676,6 +676,7 @@ Deprecations
- Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`)
- Deprecated special treatment of lists with first element a Categorical in the :class:`DataFrame` constructor; pass as ``pd.DataFrame({col: categorical, ...})`` instead (:issue:`38845`)
- Deprecated passing arguments as positional (except for ``"method"``) in :meth:`DataFrame.interpolate` and :meth:`Series.interpolate` (:issue:`41485`)
- Deprecated passing arguments as positional in :meth:`DataFrame.drop_duplicates` (except for ``subset``), :meth:`Series.drop_duplicates` and :meth:`Index.drop_duplicates` (:issue:`41485`)
- Deprecated passing arguments (apart from ``value``) as positional in :meth:`DataFrame.fillna` and :meth:`Series.fillna` (:issue:`41485`)
- Deprecated construction of :class:`Series` or :class:`DataFrame` with ``DatetimeTZDtype`` data and ``datetime64[ns]`` dtype. Use ``Series(data).dt.tz_localize(None)`` instead (:issue:`41555`,:issue:`33401`)

Expand Down
1 change: 1 addition & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6005,6 +6005,7 @@ def dropna(
else:
return result

@deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "subset"])
def drop_duplicates(
self,
subset: Hashable | Sequence[Hashable] | None = None,
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
from pandas.util._decorators import (
Appender,
cache_readonly,
deprecate_nonkeyword_arguments,
doc,
)

Expand Down Expand Up @@ -2633,6 +2634,7 @@ def unique(self: _IndexT, level: Hashable | None = None) -> _IndexT:
result = super().unique()
return self._shallow_copy(result)

@deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
@final
def drop_duplicates(self: _IndexT, keep: str_t | bool = "first") -> _IndexT:
"""
Expand Down
1 change: 1 addition & 0 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2057,6 +2057,7 @@ def drop_duplicates(self, *, inplace: Literal[True]) -> None:
def drop_duplicates(self, keep=..., inplace: bool = ...) -> Series | None:
...

@deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
def drop_duplicates(self, keep="first", inplace=False) -> Series | None:
"""
Return Series with duplicate values removed.
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/frame/methods/test_drop_duplicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,3 +471,21 @@ def test_drop_duplicates_non_boolean_ignore_index(arg):
msg = '^For argument "ignore_index" expected type bool, received type .*.$'
with pytest.raises(ValueError, match=msg):
df.drop_duplicates(ignore_index=arg)


def test_drop_duplicates_pos_args_deprecation():
# GH#41485
df = DataFrame({"a": [1, 1, 2], "b": [1, 1, 3], "c": [1, 1, 3]})

msg = (
"In a future version of pandas all arguments of "
"DataFrame.drop_duplicates except for the argument 'subset' "
"will be keyword-only"
)

with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.drop_duplicates(["b", "c"], "last")

expected = DataFrame({"a": [1, 2], "b": [1, 3], "c": [1, 3]}, index=[1, 2])

tm.assert_frame_equal(expected, result)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can remove all the extra newlines here, this test could read as a single paragraph

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made this change to all my tests in fbf70a2, they now all read as one paragraph.

17 changes: 17 additions & 0 deletions pandas/tests/indexes/multi/test_duplicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,3 +306,20 @@ def test_duplicated_drop_duplicates():
assert duplicated.dtype == bool
expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2]))
tm.assert_index_equal(idx.drop_duplicates(keep=False), expected)


def test_multi_drop_duplicates_pos_args_deprecation():
idx = MultiIndex.from_arrays([[1, 2, 3, 1], [1, 2, 3, 1]])

msg = (
"In a future version of pandas all arguments of "
"Index.drop_duplicates will be keyword-only"
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error message shows Index - to get it to show MultiIndex, you'll need to define drop_duplicates in the MultiIndex class and then call super().drop_duplicates inside it - see interpolate for an example of this

Copy link
Contributor Author

@jmholzer jmholzer May 23, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added this change in 2cb482f.

I will have to do the same for #41551, as the test for MultiIndex currently has the same problem.


with tm.assert_produces_warning(FutureWarning, match=msg):
idx.drop_duplicates("last")
result = idx.drop_duplicates("last")

expected = MultiIndex.from_arrays([[2, 3, 1], [2, 3, 1]])

tm.assert_index_equal(expected, result)
17 changes: 17 additions & 0 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1738,3 +1738,20 @@ def test_construct_from_memoryview(klass, extra_kwargs):
result = klass(memoryview(np.arange(2000, 2005)), **extra_kwargs)
expected = klass(range(2000, 2005), **extra_kwargs)
tm.assert_index_equal(result, expected)


def test_drop_duplicates_pos_args_deprecation():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you also put the issue number here?

Copy link
Contributor Author

@jmholzer jmholzer May 23, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added this change in 2cb482f.

idx = Index([1, 2, 3, 1])

msg = (
"In a future version of pandas all arguments of "
"Index.drop_duplicates will be keyword-only"
)

with tm.assert_produces_warning(FutureWarning, match=msg):
idx.drop_duplicates("last")
result = idx.drop_duplicates("last")

expected = Index([2, 3, 1])

tm.assert_index_equal(expected, result)
17 changes: 17 additions & 0 deletions pandas/tests/series/methods/test_drop_duplicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,3 +223,20 @@ def test_drop_duplicates_categorical_bool(self, ordered):
return_value = sc.drop_duplicates(keep=False, inplace=True)
assert return_value is None
tm.assert_series_equal(sc, tc[~expected])


def test_drop_duplicates_pos_args_deprecation():
# GH#41485
s = Series(["a", "b", "c", "b"])

msg = (
"In a future version of pandas all arguments of "
"Series.drop_duplicates will be keyword-only"
)

with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.drop_duplicates("last")

expected = Series(["a", "c", "b"], index=[0, 2, 3])

tm.assert_series_equal(expected, result)