Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -722,7 +722,7 @@ Deprecations
- The deprecated internal attributes ``_start``, ``_stop`` and ``_step`` of :class:`RangeIndex` now raise a ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`26581`)
- The ``pandas.util.testing`` module has been deprecated. Use the public API in ``pandas.testing`` documented at :ref:`api.general.testing` (:issue:`16232`).
- ``pandas.SparseArray`` has been deprecated. Use ``pandas.arrays.SparseArray`` (:class:`arrays.SparseArray`) instead. (:issue:`30642`)
- The parameter ``is_copy`` of :meth:`DataFrame.take` has been deprecated and will be removed in a future version. (:issue:`27357`)
- The parameter ``is_copy`` of :meth:`Series.take` and :meth:`DataFrame.take` has been deprecated and will be removed in a future version. (:issue:`27357`)
- Support for multi-dimensional indexing (e.g. ``index[:, None]``) on a :class:`Index` is deprecated and will be removed in a future version, convert to a numpy array before indexing instead (:issue:`30588`)
- The ``pandas.np`` submodule is now deprecated. Import numpy directly instead (:issue:`30296`)
- The ``pandas.datetime`` class is now deprecated. Import from ``datetime`` instead (:issue:`30610`)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2800,7 +2800,7 @@ def __getitem__(self, key):
if getattr(indexer, "dtype", None) == bool:
indexer = np.where(indexer)[0]

data = self.take(indexer, axis=1)
data = self._take_with_is_copy(indexer, axis=1)

if is_single_key:
# What does looking for a single key in a non-unique index return?
Expand Down Expand Up @@ -2833,7 +2833,7 @@ def _getitem_bool_array(self, key):
# be reindexed to match DataFrame rows
key = check_bool_indexer(self.index, key)
indexer = key.nonzero()[0]
return self.take(indexer, axis=0)
return self._take_with_is_copy(indexer, axis=0)

def _getitem_multilevel(self, key):
# self.columns is a MultiIndex
Expand Down
35 changes: 20 additions & 15 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3276,12 +3276,10 @@ class max_speed
if is_copy is not None:
warnings.warn(
"is_copy is deprecated and will be removed in a future version. "
"take will always return a copy in the future.",
"'take' always returns a copy, so there is no need to specify this.",
FutureWarning,
stacklevel=2,
)
else:
is_copy = True

nv.validate_take(tuple(), kwargs)

Expand All @@ -3290,13 +3288,21 @@ class max_speed
new_data = self._data.take(
indices, axis=self._get_block_manager_axis(axis), verify=True
)
result = self._constructor(new_data).__finalize__(self)
return self._constructor(new_data).__finalize__(self)

# Maybe set copy if we didn't actually change the index.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure if its relevant, but DTI.take sometimes returns a view

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the case the indices can represent a slice, right?
Personally I don't think we should make that optimization, as IMO it is much clearer for take to always be a copy, but I suppose for Index which is supposed to be immutable it might matter less.

if is_copy:
if not result._get_axis(axis).equals(self._get_axis(axis)):
result._set_is_copy(self)
def _take_with_is_copy(
self: FrameOrSeries, indices, axis=0, **kwargs
) -> FrameOrSeries:
"""
Internal version of the `take` method that sets the `_is_copy`
attribute to keep track of the parent dataframe (using in indexing
for the SettingWithCopyWarning).

See the docstring of `take` for full explanation of the parameters.
"""
result = self.take(indices=indices, axis=axis, **kwargs)
if not result._get_axis(axis).equals(self._get_axis(axis)):
result._set_is_copy(self)
return result

def xs(self, key, axis=0, level=None, drop_level: bool_t = True):
Expand Down Expand Up @@ -3426,9 +3432,9 @@ class animal locomotion
if isinstance(loc, np.ndarray):
if loc.dtype == np.bool_:
(inds,) = loc.nonzero()
return self.take(inds, axis=axis)
return self._take_with_is_copy(inds, axis=axis)
else:
return self.take(loc, axis=axis)
return self._take_with_is_copy(loc, axis=axis)

if not is_scalar(loc):
new_index = self.index[loc]
Expand Down Expand Up @@ -3485,7 +3491,7 @@ def _iget_item_cache(self, item):
if ax.is_unique:
lower = self._get_item_cache(ax[item])
else:
lower = self.take(item, axis=self._info_axis_number)
lower = self._take_with_is_copy(item, axis=self._info_axis_number)
return lower

def _box_item_values(self, key, values):
Expand Down Expand Up @@ -7003,8 +7009,7 @@ def asof(self, where, subset=None):

# mask the missing
missing = locs == -1
d = self.take(locs)
data = d.copy()
data = self.take(locs)
data.index = where
data.loc[missing] = np.nan
return data if is_list else data.iloc[-1]
Expand Down Expand Up @@ -7550,7 +7555,7 @@ def at_time(
except AttributeError:
raise TypeError("Index must be DatetimeIndex")

return self.take(indexer, axis=axis)
return self._take_with_is_copy(indexer, axis=axis)

def between_time(
self: FrameOrSeries,
Expand Down Expand Up @@ -7632,7 +7637,7 @@ def between_time(
except AttributeError:
raise TypeError("Index must be DatetimeIndex")

return self.take(indexer, axis=axis)
return self._take_with_is_copy(indexer, axis=axis)

def resample(
self,
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -685,7 +685,7 @@ def get_group(self, name, obj=None):
if not len(inds):
raise KeyError(name)

return obj.take(inds, axis=self.axis)
return obj._take_with_is_copy(inds, axis=self.axis)

def __iter__(self):
"""
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,7 +649,7 @@ def get_value(self, series, key):

if isinstance(key, time):
locs = self.indexer_at_time(key)
return series.take(locs)
return series._take_with_is_copy(locs)

if isinstance(key, str):
try:
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1509,7 +1509,7 @@ def _getitem_iterable(self, key, axis: int):
# A boolean indexer
key = check_bool_indexer(labels, key)
(inds,) = key.nonzero()
return self.obj.take(inds, axis=axis)
return self.obj._take_with_is_copy(inds, axis=axis)
else:
# A collection of keys
keyarr, indexer = self._get_listlike_indexer(key, axis, raise_missing=False)
Expand Down Expand Up @@ -1704,7 +1704,7 @@ def _getbool_axis(self, key, axis: int):
labels = self.obj._get_axis(axis)
key = check_bool_indexer(labels, key)
inds = key.nonzero()[0]
return self.obj.take(inds, axis=axis)
return self.obj._take_with_is_copy(inds, axis=axis)

def _get_slice_axis(self, slice_obj: slice, axis: int):
"""
Expand All @@ -1725,7 +1725,7 @@ def _get_slice_axis(self, slice_obj: slice, axis: int):
else:
# DatetimeIndex overrides Index.slice_indexer and may
# return a DatetimeIndex instead of a slice object.
return self.obj.take(indexer, axis=axis)
return self.obj._take_with_is_copy(indexer, axis=axis)


@Appender(IndexingMixin.loc.__doc__)
Expand Down Expand Up @@ -2020,7 +2020,7 @@ def _get_list_axis(self, key, axis: int):
`axis` can only be zero.
"""
try:
return self.obj.take(key, axis=axis)
return self.obj._take_with_is_copy(key, axis=axis)
except IndexError:
# re-raise with different error message
raise IndexError("positional indexers are out-of-bounds")
Expand Down
25 changes: 18 additions & 7 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -794,7 +794,14 @@ def axes(self) -> List[Index]:
# Indexing Methods

@Appender(generic.NDFrame.take.__doc__)
def take(self, indices, axis=0, is_copy=False, **kwargs) -> "Series":
def take(self, indices, axis=0, is_copy=None, **kwargs) -> "Series":
if is_copy is not None:
warnings.warn(
"is_copy is deprecated and will be removed in a future version. "
"'take' always returns a copy, so there is no need to specify this.",
FutureWarning,
stacklevel=2,
)
nv.validate_take(tuple(), kwargs)

indices = ensure_platform_int(indices)
Expand All @@ -809,16 +816,20 @@ def take(self, indices, axis=0, is_copy=False, **kwargs) -> "Series":
kwargs = {}
new_values = self._values.take(indices, **kwargs)

result = self._constructor(
return self._constructor(
new_values, index=new_index, fastpath=True
).__finalize__(self)

# Maybe set copy if we didn't actually change the index.
if is_copy:
if not result._get_axis(axis).equals(self._get_axis(axis)):
result._set_is_copy(self)
def _take_with_is_copy(self, indices, axis=0, **kwargs):
"""
Internal version of the `take` method that sets the `_is_copy`
attribute to keep track of the parent dataframe (using in indexing
for the SettingWithCopyWarning). For Series this does the same
as the public take (it never sets `_is_copy`).

return result
See the docstring of `take` for full explanation of the parameters.
"""
return self.take(indices=indices, axis=axis, **kwargs)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am including a Series._take_with_is_copy to override the generic one. This is because the defaults for the old is_copy kwargs were different for Series (False) and DataFrame (True).

I am not fully sure if the code paths that use _take_with_is_copy actually can have a Series calling it, but included this to be sure.


def _ixs(self, i: int, axis: int = 0):
"""
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/frame/methods/test_asof.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,3 +143,16 @@ def test_time_zone_aware_index(self, stamp, expected):

result = df.asof(stamp)
tm.assert_series_equal(result, expected)

def test_is_copy(self, date_range_frame):
# GH-27357, GH-30784: ensure the result of asof is an actual copy and
# doesn't track the parent dataframe / doesn't give SettingWithCopy warnings
df = date_range_frame
N = 50
df.loc[15:30, "A"] = np.nan
dates = date_range("1/1/1990", periods=N * 3, freq="25s")

result = df.asof(dates)

with tm.assert_produces_warning(None):
result["C"] = 1
20 changes: 17 additions & 3 deletions pandas/tests/generic/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,15 @@ def test_sample_upsampling_without_replacement(self):
with pytest.raises(ValueError, match=msg):
df.sample(frac=2, replace=False)

def test_sample_is_copy(self):
# GH-27357, GH-30784: ensure the result of sample is an actual copy and
# doesn't track the parent dataframe / doesn't give SettingWithCopy warnings
df = pd.DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"])
df2 = df.sample(3)

with tm.assert_produces_warning(None):
df2["d"] = 1

def test_size_compat(self):
# GH8846
# size property should be defined
Expand Down Expand Up @@ -817,18 +826,23 @@ def test_take_invalid_kwargs(self):
with pytest.raises(ValueError, match=msg):
obj.take(indices, mode="clip")

def test_depr_take_kwarg_is_copy(self):
@pytest.mark.parametrize("is_copy", [True, False])
def test_depr_take_kwarg_is_copy(self, is_copy):
# GH 27357
df = DataFrame({"A": [1, 2, 3]})
msg = (
"is_copy is deprecated and will be removed in a future version. "
"take will always return a copy in the future."
"'take' always returns a copy, so there is no need to specify this."
)
with tm.assert_produces_warning(FutureWarning) as w:
df.take([0, 1], is_copy=True)
df.take([0, 1], is_copy=is_copy)

assert w[0].message.args[0] == msg

s = Series([1, 2, 3])
with tm.assert_produces_warning(FutureWarning):
s.take([0, 1], is_copy=is_copy)

def test_equals(self):
s1 = pd.Series([1, 2, 3], index=[0, 2, 1])
s2 = s1.copy()
Expand Down