diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 3f9749f1f7a99..f410cdff6d3de 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -31,6 +31,7 @@ from pandas.util._decorators import ( doc, ) +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.cast import ( can_hold_element, @@ -63,6 +64,7 @@ ) from pandas.core import algorithms as algos +from pandas.core.arrays import ExtensionArray import pandas.core.common as com from pandas.core.construction import ( array as pd_array, @@ -926,6 +928,7 @@ def __setitem__(self, key, value) -> None: _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 ) + self._maybe_warn_non_casting_setitem(key, value) check_dict_or_set_indexers(key) if isinstance(key, tuple): key = (list(x) if is_iterator(x) else x for x in key) @@ -941,6 +944,47 @@ def __setitem__(self, key, value) -> None: ) iloc._setitem_with_indexer(indexer, value, self.name) + @final + def _maybe_warn_non_casting_setitem(self, key, value) -> None: + # GH#52593 many users got confused by this, so issue a warning + + if ( + self.ndim == 2 + and isinstance(key, tuple) + and len(key) > 1 + and isinstance(key[0], slice) + and key[0] == slice(None) + ): + # This is a `df.loc[:, foo] = bar` call + if ( + is_hashable(key[1]) + and not isinstance(key[1], slice) + and not ( + isinstance(key[1], tuple) + and any(isinstance(x, slice) for x in key[1]) + ) + and key[1] in self.obj.columns + ): + obj = self.obj[key[1]] + if isinstance(obj, ABCSeries) and isinstance( + value, (ABCSeries, Index, ExtensionArray, np.ndarray) + ): + # check necessary in case of non-unique columns + if obj.dtype != value.dtype: + warnings.warn( + "Setting `df.loc[:, col] = values` does *not* change " + "the dtype of `df[col]`. It writes the entries from " + "`values` into the existing array behind `df[col]`. " + "To swap out the old array for the new one, use " + "`df[col] = values` instead.", + UserWarning, + stacklevel=find_stack_level(), + ) + # TODO: the checks above handle the most common cases, but miss + # a) obj.columns is MultiIndex + # b) non-unique columns + # c) df.loc[:, [col]] = ... + def _validate_key(self, key, axis: AxisInt) -> None: """ Ensure that key is valid for current indexer. diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index 37a21e1098e78..7d4b4f6562e23 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -308,7 +308,10 @@ def test_subset_set_column_with_loc(backend, dtype): df_orig = df.copy() subset = df[1:3] - subset.loc[:, "a"] = np.array([10, 11], dtype="int64") + msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change" + err = UserWarning if backend[0] != "numpy" else None + with tm.assert_produces_warning(err, match=msg): + subset.loc[:, "a"] = np.array([10, 11], dtype="int64") subset._mgr._verify_integrity() expected = DataFrame( diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index c0fead4889932..37e927bf3c3ba 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -792,7 +792,9 @@ def test_setitem_frame_midx_columns(self): def test_loc_setitem_ea_dtype(self): # GH#55604 df = DataFrame({"a": np.array([10], dtype="i8")}) - df.loc[:, "a"] = Series([11], dtype="Int64") + msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change" + with tm.assert_produces_warning(UserWarning, match=msg): + df.loc[:, "a"] = Series([11], dtype="Int64") expected = DataFrame({"a": np.array([11], dtype="i8")}) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index ddb58ecbfa6f3..623a47383ff30 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -87,7 +87,12 @@ def test_iloc_setitem_fullcol_categorical(self, indexer_li, key): df = frame.copy() orig_vals = df.values - indexer_li(df)[key, 0] = cat + msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change" + err = None + if isinstance(key, slice) and key == slice(None): + err = UserWarning + with tm.assert_produces_warning(err, match=msg): + indexer_li(df)[key, 0] = cat expected = DataFrame({0: cat}).astype(object) assert np.shares_memory(df[0].values, orig_vals) @@ -103,7 +108,8 @@ def test_iloc_setitem_fullcol_categorical(self, indexer_li, key): # we retain the object dtype. frame = DataFrame({0: np.array([0, 1, 2], dtype=object), 1: range(3)}) df = frame.copy() - indexer_li(df)[key, 0] = cat + with tm.assert_produces_warning(err, match=msg): + indexer_li(df)[key, 0] = cat expected = DataFrame({0: Series(cat.astype(object), dtype=object), 1: range(3)}) tm.assert_frame_equal(df, expected) @@ -1521,10 +1527,12 @@ def test_iloc_setitem_pure_position_based(self): def test_iloc_nullable_int64_size_1_nan(self): # GH 31861 result = DataFrame({"a": ["test"], "b": [np.nan]}) + msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change" ser = Series([NA], name="b", dtype="Int64") with pytest.raises(TypeError, match="Invalid value"): - result.loc[:, "b"] = ser + with tm.assert_produces_warning(UserWarning, match=msg): + result.loc[:, "b"] = ser def test_iloc_arrow_extension_array(self): # GH#61311 diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 113eb6c2b2c31..1a611dcf54f20 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -548,7 +548,9 @@ def test_astype_assignment(self, using_infer_string): # GH5702 (loc) df = df_orig.copy() - df.loc[:, "A"] = df.loc[:, "A"].astype(np.int64) + msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change" + with tm.assert_produces_warning(UserWarning, match=msg): + df.loc[:, "A"] = df.loc[:, "A"].astype(np.int64) expected = DataFrame( [[1, "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") ) @@ -570,12 +572,14 @@ def test_astype_assignment_full_replacements(self): # With the enforcement of GH#45333 in 2.0, this assignment occurs inplace, # so float64 is retained + msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change" df.iloc[:, 0] = df["A"].astype(np.int64) expected = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]}) tm.assert_frame_equal(df, expected) df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]}) - df.loc[:, "A"] = df["A"].astype(np.int64) + with tm.assert_produces_warning(UserWarning, match=msg): + df.loc[:, "A"] = df["A"].astype(np.int64) tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("indexer", [tm.getitem, tm.loc]) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index de2d914aab229..37f5fea1d749c 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -581,8 +581,11 @@ def test_loc_setitem_consistency(self, frame_for_consistency, val): # GH 6149 # coerce similarly for setitem and loc when rows have a null-slice df = frame_for_consistency.copy() + msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change" + err = UserWarning if isinstance(val, np.ndarray) else None with pytest.raises(TypeError, match="Invalid value"): - df.loc[:, "date"] = val + with tm.assert_produces_warning(err, match=msg): + df.loc[:, "date"] = val def test_loc_setitem_consistency_dt64_to_str(self, frame_for_consistency): # GH 6149 @@ -646,18 +649,21 @@ def test_loc_setitem_consistency_slice_column_len(self, using_infer_string): ] df = DataFrame(values, index=mi, columns=cols) + msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change" ctx = contextlib.nullcontext() if using_infer_string: ctx = pytest.raises(TypeError, match="Invalid value") with ctx: - df.loc[:, ("Respondent", "StartDate")] = to_datetime( - df.loc[:, ("Respondent", "StartDate")] - ) + with tm.assert_produces_warning(UserWarning, match=msg): + df.loc[:, ("Respondent", "StartDate")] = to_datetime( + df.loc[:, ("Respondent", "StartDate")] + ) with ctx: - df.loc[:, ("Respondent", "EndDate")] = to_datetime( - df.loc[:, ("Respondent", "EndDate")] - ) + with tm.assert_produces_warning(UserWarning, match=msg): + df.loc[:, ("Respondent", "EndDate")] = to_datetime( + df.loc[:, ("Respondent", "EndDate")] + ) if using_infer_string: # infer-objects won't infer stuff anymore @@ -1426,7 +1432,9 @@ def test_loc_setitem_single_row_categorical(self, using_infer_string): # pre-2.0 this swapped in a new array, in 2.0 it operates inplace, # consistent with non-split-path - df.loc[:, "Alpha"] = categories + msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change" + with tm.assert_produces_warning(UserWarning, match=msg): + df.loc[:, "Alpha"] = categories result = df["Alpha"] expected = Series(categories, index=df.index, name="Alpha").astype( diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 6f20d0e4e7cbf..11fa4d4b5ac80 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -318,7 +318,9 @@ def test_partial_setting_frame(self): df["B"] = df["B"].astype(np.float64) # as of 2.0, df.loc[:, "B"] = ... attempts (and here succeeds) at # setting inplace - df.loc[:, "B"] = df.loc[:, "A"] + msg = r"Setting `df.loc\[:, col\] = values` does \*not\* change" + with tm.assert_produces_warning(UserWarning, match=msg): + df.loc[:, "B"] = df.loc[:, "A"] tm.assert_frame_equal(df, expected) # single dtype frame, partial setting