Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from pandas.util._decorators import (
doc,
)
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.cast import (
can_hold_element,
Expand Down Expand Up @@ -63,6 +64,7 @@
)

from pandas.core import algorithms as algos
from pandas.core.arrays import ExtensionArray
import pandas.core.common as com
from pandas.core.construction import (
array as pd_array,
Expand Down Expand Up @@ -926,6 +928,7 @@ def __setitem__(self, key, value) -> None:
_chained_assignment_msg, ChainedAssignmentError, stacklevel=2
)

self._maybe_warn_non_casting_setitem(key, value)
check_dict_or_set_indexers(key)
if isinstance(key, tuple):
key = (list(x) if is_iterator(x) else x for x in key)
Expand All @@ -941,6 +944,47 @@ def __setitem__(self, key, value) -> None:
)
iloc._setitem_with_indexer(indexer, value, self.name)

@final
def _maybe_warn_non_casting_setitem(self, key, value) -> None:
# GH#52593 many users got confused by this, so issue a warning

if (
self.ndim == 2
and isinstance(key, tuple)
and len(key) > 1
and isinstance(key[0], slice)
and key[0] == slice(None)
):
# This is a `df.loc[:, foo] = bar` call
if (
is_hashable(key[1])
and not isinstance(key[1], slice)
and not (
isinstance(key[1], tuple)
and any(isinstance(x, slice) for x in key[1])
)
and key[1] in self.obj.columns
):
obj = self.obj[key[1]]
if isinstance(obj, ABCSeries) and isinstance(
value, (ABCSeries, Index, ExtensionArray, np.ndarray)
):
# check necessary in case of non-unique columns
if obj.dtype != value.dtype:
warnings.warn(
"Setting `df.loc[:, col] = values` does *not* change "
"the dtype of `df[col]`. It writes the entries from "
"`values` into the existing array behind `df[col]`. "
"To swap out the old array for the new one, use "
"`df[col] = values` instead.",
UserWarning,
stacklevel=find_stack_level(),
)
# TODO: the checks above handle the most common cases, but miss
# a) obj.columns is MultiIndex
# b) non-unique columns
# c) df.loc[:, [col]] = ...

def _validate_key(self, key, axis: AxisInt) -> None:
"""
Ensure that key is valid for current indexer.
Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/copy_view/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,10 @@ def test_subset_set_column_with_loc(backend, dtype):
df_orig = df.copy()
subset = df[1:3]

subset.loc[:, "a"] = np.array([10, 11], dtype="int64")
msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change"
err = UserWarning if backend[0] != "numpy" else None
with tm.assert_produces_warning(err, match=msg):
subset.loc[:, "a"] = np.array([10, 11], dtype="int64")

subset._mgr._verify_integrity()
expected = DataFrame(
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -792,7 +792,9 @@ def test_setitem_frame_midx_columns(self):
def test_loc_setitem_ea_dtype(self):
# GH#55604
df = DataFrame({"a": np.array([10], dtype="i8")})
df.loc[:, "a"] = Series([11], dtype="Int64")
msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change"
with tm.assert_produces_warning(UserWarning, match=msg):
df.loc[:, "a"] = Series([11], dtype="Int64")
expected = DataFrame({"a": np.array([11], dtype="i8")})
tm.assert_frame_equal(df, expected)

Expand Down
14 changes: 11 additions & 3 deletions pandas/tests/indexing/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,12 @@ def test_iloc_setitem_fullcol_categorical(self, indexer_li, key):
df = frame.copy()
orig_vals = df.values

indexer_li(df)[key, 0] = cat
msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change"
err = None
if isinstance(key, slice) and key == slice(None):
err = UserWarning
with tm.assert_produces_warning(err, match=msg):
indexer_li(df)[key, 0] = cat

expected = DataFrame({0: cat}).astype(object)
assert np.shares_memory(df[0].values, orig_vals)
Expand All @@ -103,7 +108,8 @@ def test_iloc_setitem_fullcol_categorical(self, indexer_li, key):
# we retain the object dtype.
frame = DataFrame({0: np.array([0, 1, 2], dtype=object), 1: range(3)})
df = frame.copy()
indexer_li(df)[key, 0] = cat
with tm.assert_produces_warning(err, match=msg):
indexer_li(df)[key, 0] = cat
expected = DataFrame({0: Series(cat.astype(object), dtype=object), 1: range(3)})
tm.assert_frame_equal(df, expected)

Expand Down Expand Up @@ -1521,10 +1527,12 @@ def test_iloc_setitem_pure_position_based(self):
def test_iloc_nullable_int64_size_1_nan(self):
# GH 31861
result = DataFrame({"a": ["test"], "b": [np.nan]})
msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change"

ser = Series([NA], name="b", dtype="Int64")
with pytest.raises(TypeError, match="Invalid value"):
result.loc[:, "b"] = ser
with tm.assert_produces_warning(UserWarning, match=msg):
result.loc[:, "b"] = ser

def test_iloc_arrow_extension_array(self):
# GH#61311
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,9 @@ def test_astype_assignment(self, using_infer_string):

# GH5702 (loc)
df = df_orig.copy()
df.loc[:, "A"] = df.loc[:, "A"].astype(np.int64)
msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change"
with tm.assert_produces_warning(UserWarning, match=msg):
df.loc[:, "A"] = df.loc[:, "A"].astype(np.int64)
expected = DataFrame(
[[1, "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
)
Expand All @@ -570,12 +572,14 @@ def test_astype_assignment_full_replacements(self):

# With the enforcement of GH#45333 in 2.0, this assignment occurs inplace,
# so float64 is retained
msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change"
df.iloc[:, 0] = df["A"].astype(np.int64)
expected = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]})
tm.assert_frame_equal(df, expected)

df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]})
df.loc[:, "A"] = df["A"].astype(np.int64)
with tm.assert_produces_warning(UserWarning, match=msg):
df.loc[:, "A"] = df["A"].astype(np.int64)
tm.assert_frame_equal(df, expected)

@pytest.mark.parametrize("indexer", [tm.getitem, tm.loc])
Expand Down
24 changes: 16 additions & 8 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,8 +581,11 @@ def test_loc_setitem_consistency(self, frame_for_consistency, val):
# GH 6149
# coerce similarly for setitem and loc when rows have a null-slice
df = frame_for_consistency.copy()
msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change"
err = UserWarning if isinstance(val, np.ndarray) else None
with pytest.raises(TypeError, match="Invalid value"):
df.loc[:, "date"] = val
with tm.assert_produces_warning(err, match=msg):
df.loc[:, "date"] = val

def test_loc_setitem_consistency_dt64_to_str(self, frame_for_consistency):
# GH 6149
Expand Down Expand Up @@ -646,18 +649,21 @@ def test_loc_setitem_consistency_slice_column_len(self, using_infer_string):
]
df = DataFrame(values, index=mi, columns=cols)

msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change"
ctx = contextlib.nullcontext()
if using_infer_string:
ctx = pytest.raises(TypeError, match="Invalid value")

with ctx:
df.loc[:, ("Respondent", "StartDate")] = to_datetime(
df.loc[:, ("Respondent", "StartDate")]
)
with tm.assert_produces_warning(UserWarning, match=msg):
df.loc[:, ("Respondent", "StartDate")] = to_datetime(
df.loc[:, ("Respondent", "StartDate")]
)
with ctx:
df.loc[:, ("Respondent", "EndDate")] = to_datetime(
df.loc[:, ("Respondent", "EndDate")]
)
with tm.assert_produces_warning(UserWarning, match=msg):
df.loc[:, ("Respondent", "EndDate")] = to_datetime(
df.loc[:, ("Respondent", "EndDate")]
)

if using_infer_string:
# infer-objects won't infer stuff anymore
Expand Down Expand Up @@ -1426,7 +1432,9 @@ def test_loc_setitem_single_row_categorical(self, using_infer_string):

# pre-2.0 this swapped in a new array, in 2.0 it operates inplace,
# consistent with non-split-path
df.loc[:, "Alpha"] = categories
msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change"
with tm.assert_produces_warning(UserWarning, match=msg):
df.loc[:, "Alpha"] = categories

result = df["Alpha"]
expected = Series(categories, index=df.index, name="Alpha").astype(
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/indexing/test_partial.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,9 @@ def test_partial_setting_frame(self):
df["B"] = df["B"].astype(np.float64)
# as of 2.0, df.loc[:, "B"] = ... attempts (and here succeeds) at
# setting inplace
df.loc[:, "B"] = df.loc[:, "A"]
msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change"
with tm.assert_produces_warning(UserWarning, match=msg):
df.loc[:, "B"] = df.loc[:, "A"]
tm.assert_frame_equal(df, expected)

# single dtype frame, partial setting
Expand Down
Loading