Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -723,7 +723,7 @@ Deprecations
- The deprecated internal attributes ``_start``, ``_stop`` and ``_step`` of :class:`RangeIndex` now raise a ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`26581`)
- The ``pandas.util.testing`` module has been deprecated. Use the public API in ``pandas.testing`` documented at :ref:`api.general.testing` (:issue:`16232`).
- ``pandas.SparseArray`` has been deprecated. Use ``pandas.arrays.SparseArray`` (:class:`arrays.SparseArray`) instead. (:issue:`30642`)
- The parameter ``is_copy`` of :meth:`DataFrame.take` has been deprecated and will be removed in a future version. (:issue:`27357`)
- The parameter ``is_copy`` of :meth:`Series.take` and :meth:`DataFrame.take` has been deprecated and will be removed in a future version. (:issue:`27357`)
- Support for multi-dimensional indexing (e.g. ``index[:, None]``) on a :class:`Index` is deprecated and will be removed in a future version, convert to a numpy array before indexing instead (:issue:`30588`)
- The ``pandas.np`` submodule is now deprecated. Import numpy directly instead (:issue:`30296`)
- The ``pandas.datetime`` class is now deprecated. Import from ``datetime`` instead (:issue:`30610`)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2797,7 +2797,7 @@ def __getitem__(self, key):
if getattr(indexer, "dtype", None) == bool:
indexer = np.where(indexer)[0]

data = self.take(indexer, axis=1)
data = self._take_with_is_copy(indexer, axis=1)

if is_single_key:
# What does looking for a single key in a non-unique index return?
Expand Down Expand Up @@ -2830,7 +2830,7 @@ def _getitem_bool_array(self, key):
# be reindexed to match DataFrame rows
key = check_bool_indexer(self.index, key)
indexer = key.nonzero()[0]
return self.take(indexer, axis=0)
return self._take_with_is_copy(indexer, axis=0)

def _getitem_multilevel(self, key):
# self.columns is a MultiIndex
Expand Down
35 changes: 20 additions & 15 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3287,12 +3287,10 @@ class max_speed
if is_copy is not None:
warnings.warn(
"is_copy is deprecated and will be removed in a future version. "
"take will always return a copy in the future.",
"'take' always returns a copy, so there is no need to specify this.",
FutureWarning,
stacklevel=2,
)
else:
is_copy = True

nv.validate_take(tuple(), kwargs)

Expand All @@ -3301,13 +3299,21 @@ class max_speed
new_data = self._data.take(
indices, axis=self._get_block_manager_axis(axis), verify=True
)
result = self._constructor(new_data).__finalize__(self)
return self._constructor(new_data).__finalize__(self)

# Maybe set copy if we didn't actually change the index.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure if its relevant, but DTI.take sometimes returns a view

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the case the indices can represent a slice, right?
Personally I don't think we should make that optimization, as IMO it is much clearer for take to always be a copy, but I suppose for Index which is supposed to be immutable it might matter less.

if is_copy:
if not result._get_axis(axis).equals(self._get_axis(axis)):
result._set_is_copy(self)
def _take_with_is_copy(
self: FrameOrSeries, indices, axis=0, **kwargs
) -> FrameOrSeries:
"""
Internal version of the `take` method that sets the `_is_copy`
attribute to keep track of the parent dataframe (using in indexing
for the SettingWithCopyWarning).

See the docstring of `take` for full explanation of the parameters.
"""
result = self.take(indices=indices, axis=axis, **kwargs)
if not result._get_axis(axis).equals(self._get_axis(axis)):
result._set_is_copy(self)
return result

def xs(self, key, axis=0, level=None, drop_level: bool_t = True):
Expand Down Expand Up @@ -3437,9 +3443,9 @@ class animal locomotion
if isinstance(loc, np.ndarray):
if loc.dtype == np.bool_:
(inds,) = loc.nonzero()
return self.take(inds, axis=axis)
return self._take_with_is_copy(inds, axis=axis)
else:
return self.take(loc, axis=axis)
return self._take_with_is_copy(loc, axis=axis)

if not is_scalar(loc):
new_index = self.index[loc]
Expand Down Expand Up @@ -3496,7 +3502,7 @@ def _iget_item_cache(self, item):
if ax.is_unique:
lower = self._get_item_cache(ax[item])
else:
lower = self.take(item, axis=self._info_axis_number)
lower = self._take_with_is_copy(item, axis=self._info_axis_number)
return lower

def _box_item_values(self, key, values):
Expand Down Expand Up @@ -6876,8 +6882,7 @@ def asof(self, where, subset=None):

# mask the missing
missing = locs == -1
d = self.take(locs)
data = d.copy()
data = self.take(locs)
data.index = where
data.loc[missing] = np.nan
return data if is_list else data.iloc[-1]
Expand Down Expand Up @@ -7423,7 +7428,7 @@ def at_time(
except AttributeError:
raise TypeError("Index must be DatetimeIndex")

return self.take(indexer, axis=axis)
return self._take_with_is_copy(indexer, axis=axis)

def between_time(
self: FrameOrSeries,
Expand Down Expand Up @@ -7505,7 +7510,7 @@ def between_time(
except AttributeError:
raise TypeError("Index must be DatetimeIndex")

return self.take(indexer, axis=axis)
return self._take_with_is_copy(indexer, axis=axis)

def resample(
self,
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -685,7 +685,7 @@ def get_group(self, name, obj=None):
if not len(inds):
raise KeyError(name)

return obj.take(inds, axis=self.axis)
return obj._take_with_is_copy(inds, axis=self.axis)

def __iter__(self):
"""
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,7 @@ def get_value(self, series, key):

if isinstance(key, time):
locs = self.indexer_at_time(key)
return series.take(locs)
return series._take_with_set_copy(locs)

if isinstance(key, str):
try:
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1509,7 +1509,7 @@ def _getitem_iterable(self, key, axis: int):
# A boolean indexer
key = check_bool_indexer(labels, key)
(inds,) = key.nonzero()
return self.obj.take(inds, axis=axis)
return self.obj._take_with_is_copy(inds, axis=axis)
else:
# A collection of keys
keyarr, indexer = self._get_listlike_indexer(key, axis, raise_missing=False)
Expand Down Expand Up @@ -1701,7 +1701,7 @@ def _getbool_axis(self, key, axis: int):
labels = self.obj._get_axis(axis)
key = check_bool_indexer(labels, key)
inds = key.nonzero()[0]
return self.obj.take(inds, axis=axis)
return self.obj._take_with_is_copy(inds, axis=axis)

def _get_slice_axis(self, slice_obj: slice, axis: int):
"""
Expand All @@ -1722,7 +1722,7 @@ def _get_slice_axis(self, slice_obj: slice, axis: int):
else:
# DatetimeIndex overrides Index.slice_indexer and may
# return a DatetimeIndex instead of a slice object.
return self.obj.take(indexer, axis=axis)
return self.obj._take_with_is_copy(indexer, axis=axis)


@Appender(IndexingMixin.loc.__doc__)
Expand Down Expand Up @@ -2017,7 +2017,7 @@ def _get_list_axis(self, key, axis: int):
`axis` can only be zero.
"""
try:
return self.obj.take(key, axis=axis)
return self.obj._take_with_is_copy(key, axis=axis)
except IndexError:
# re-raise with different error message
raise IndexError("positional indexers are out-of-bounds")
Expand Down
25 changes: 18 additions & 7 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -756,7 +756,14 @@ def axes(self) -> List[Index]:
# Indexing Methods

@Appender(generic.NDFrame.take.__doc__)
def take(self, indices, axis=0, is_copy=False, **kwargs) -> "Series":
def take(self, indices, axis=0, is_copy=None, **kwargs) -> "Series":
if is_copy is not None:
warnings.warn(
"is_copy is deprecated and will be removed in a future version. "
"'take' always returns a copy, so there is no need to specify this.",
FutureWarning,
stacklevel=2,
)
nv.validate_take(tuple(), kwargs)

indices = ensure_platform_int(indices)
Expand All @@ -771,16 +778,20 @@ def take(self, indices, axis=0, is_copy=False, **kwargs) -> "Series":
kwargs = {}
new_values = self._values.take(indices, **kwargs)

result = self._constructor(
return self._constructor(
new_values, index=new_index, fastpath=True
).__finalize__(self)

# Maybe set copy if we didn't actually change the index.
if is_copy:
if not result._get_axis(axis).equals(self._get_axis(axis)):
result._set_is_copy(self)
def _take_with_is_copy(self, indices, axis=0, **kwargs):
"""
Internal version of the `take` method that sets the `_is_copy`
attribute to keep track of the parent dataframe (using in indexing
for the SettingWithCopyWarning). For Series this does the same
as the public take (it never sets `_is_copy`).

return result
See the docstring of `take` for full explanation of the parameters.
"""
return self.take(indices=indices, axis=axis, **kwargs)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am including a Series._take_with_is_copy to override the generic one. This is because the defaults for the old is_copy kwargs were different for Series (False) and DataFrame (True).

I am not fully sure if the code paths that use _take_with_is_copy actually can have a Series calling it, but included this to be sure.


def _ixs(self, i: int, axis: int = 0):
"""
Expand Down
11 changes: 8 additions & 3 deletions pandas/tests/generic/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -817,18 +817,23 @@ def test_take_invalid_kwargs(self):
with pytest.raises(ValueError, match=msg):
obj.take(indices, mode="clip")

def test_depr_take_kwarg_is_copy(self):
@pytest.mark.parametrize("is_copy", [True, False])
def test_depr_take_kwarg_is_copy(self, is_copy):
# GH 27357
df = DataFrame({"A": [1, 2, 3]})
msg = (
"is_copy is deprecated and will be removed in a future version. "
"take will always return a copy in the future."
"'take' always returns a copy, so there is no need to specify this."
)
with tm.assert_produces_warning(FutureWarning) as w:
df.take([0, 1], is_copy=True)
df.take([0, 1], is_copy=is_copy)

assert w[0].message.args[0] == msg

s = Series([1, 2, 3])
with tm.assert_produces_warning(FutureWarning):
s.take([0, 1], is_copy=is_copy)

def test_equals(self):
s1 = pd.Series([1, 2, 3], index=[0, 2, 1])
s2 = s1.copy()
Expand Down