Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion doc/source/user_guide/missing_data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ Limit the number of NA values filled

df.ffill(limit=1)

NA values can be replaced with corresponding value from a :class:`Series`` or :class:`DataFrame``
NA values can be replaced with corresponding value from a :class:`Series` or :class:`DataFrame`
where the index and column aligns between the original object and the filled object.

.. ipython:: python
Expand Down Expand Up @@ -659,6 +659,7 @@ Pass nested dictionaries of regular expressions that use the ``regex`` keyword.
Pass a list of regular expressions that will replace matches with a scalar.

.. ipython:: python
:okwarning:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can these examples be changed to not produce a warning?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated to not produce warning + green


df.replace([r"\s*\.\s*", r"a|b"], np.nan, regex=True)

Expand All @@ -668,6 +669,7 @@ argument must be passed explicitly by name or ``regex`` must be a nested
dictionary.

.. ipython:: python
:okwarning:

df.replace(regex=[r"\s*\.\s*", r"a|b"], value=np.nan)

Expand Down
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,8 @@ Deprecations
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_parquet` except ``path``. (:issue:`54229`)
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_pickle` except ``path``. (:issue:`54229`)
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_string` except ``buf``. (:issue:`54229`)
- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.downcasting", True)`` (:issue:`53656`)
- Deprecated automatic downcasting of object-dtype results in :meth:`Series.replace` and :meth:`DataFrame.replace`, explicitly call ``result = result.infer_objects(copy=False)`` instead. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`54710`)
- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`53656`)
- Deprecated not passing a tuple to :class:`DataFrameGroupBy.get_group` or :class:`SeriesGroupBy.get_group` when grouping by a length-1 list-like (:issue:`25971`)
- Deprecated strings ``S``, ``U``, and ``N`` denoting units in :func:`to_timedelta` (:issue:`52536`)
- Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`)
Expand Down
68 changes: 59 additions & 9 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -761,7 +761,23 @@ def replace(
if not (self.is_object and value is None):
# if the user *explicitly* gave None, we keep None, otherwise
# may downcast to NaN
blocks = blk.convert(copy=False, using_cow=using_cow)
if get_option("future.no_silent_downcasting") is True:
blocks = [blk]
else:
blocks = blk.convert(copy=False, using_cow=using_cow)
if len(blocks) > 1 or blocks[0].dtype != blk.dtype:
warnings.warn(
# GH#54710
"Downcasting behavior in `replace` is deprecated and "
"will be removed in a future version. To retain the old "
"behavior, explicitly call "
"`result.infer_objects(copy=False)`. "
"To opt-in to the future "
"behavior, set "
"`pd.set_option('future.no_silent_downcasting', True)`",
FutureWarning,
stacklevel=find_stack_level(),
)
else:
blocks = [blk]
return blocks
Expand Down Expand Up @@ -836,7 +852,21 @@ def _replace_regex(

replace_regex(block.values, rx, value, mask)

return block.convert(copy=False, using_cow=using_cow)
nbs = block.convert(copy=False, using_cow=using_cow)
opt = get_option("future.no_silent_downcasting")
if (len(nbs) > 1 or nbs[0].dtype != block.dtype) and not opt:
warnings.warn(
# GH#54710
"Downcasting behavior in `replace` is deprecated and "
"will be removed in a future version. To retain the old "
"behavior, explicitly call `result.infer_objects(copy=False)`. "
"To opt-in to the future "
"behavior, set "
"`pd.set_option('future.no_silent_downcasting', True)`",
FutureWarning,
stacklevel=find_stack_level(),
)
return nbs

@final
def replace_list(
Expand Down Expand Up @@ -902,6 +932,7 @@ def replace_list(
else:
rb = [self if inplace else self.copy()]

opt = get_option("future.no_silent_downcasting")
for i, ((src, dest), mask) in enumerate(zip(pairs, masks)):
convert = i == src_len # only convert once at the end
new_rb: list[Block] = []
Expand Down Expand Up @@ -939,14 +970,33 @@ def replace_list(
b.refs.referenced_blocks.index(ref)
)

if convert and blk.is_object and not all(x is None for x in dest_list):
if (
not opt
and convert
and blk.is_object
and not all(x is None for x in dest_list)
):
# GH#44498 avoid unwanted cast-back
result = extend_blocks(
[
b.convert(copy=True and not using_cow, using_cow=using_cow)
for b in result
]
)
nbs = []
for res_blk in result:
converted = res_blk.convert(
copy=True and not using_cow, using_cow=using_cow
)
if len(converted) > 1 or converted[0].dtype != res_blk.dtype:
warnings.warn(
# GH#54710
"Downcasting behavior in `replace` is deprecated "
"and will be removed in a future version. To "
"retain the old behavior, explicitly call "
"`result.infer_objects(copy=False)`. "
"To opt-in to the future "
"behavior, set "
"`pd.set_option('future.no_silent_downcasting', True)`",
FutureWarning,
stacklevel=find_stack_level(),
)
nbs.extend(converted)
result = nbs
new_rb.extend(result)
rb = new_rb
return rb
Expand Down
62 changes: 44 additions & 18 deletions pandas/tests/frame/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,9 @@ def test_regex_replace_dict_nested_non_first_character(self, any_string_dtype):
def test_regex_replace_dict_nested_gh4115(self):
df = DataFrame({"Type": ["Q", "T", "Q", "Q", "T"], "tmp": 2})
expected = DataFrame({"Type": [0, 1, 0, 0, 1], "tmp": 2})
result = df.replace({"Type": {"Q": 0, "T": 1}})
msg = "Downcasting behavior in `replace`"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.replace({"Type": {"Q": 0, "T": 1}})
tm.assert_frame_equal(result, expected)

def test_regex_replace_list_to_scalar(self, mix_abc):
Expand All @@ -301,16 +303,20 @@ def test_regex_replace_list_to_scalar(self, mix_abc):
"c": [np.nan, np.nan, np.nan, "d"],
}
)
res = df.replace([r"\s*\.\s*", "a|b"], np.nan, regex=True)
msg = "Downcasting behavior in `replace`"
with tm.assert_produces_warning(FutureWarning, match=msg):
res = df.replace([r"\s*\.\s*", "a|b"], np.nan, regex=True)
res2 = df.copy()
res3 = df.copy()
return_value = res2.replace(
[r"\s*\.\s*", "a|b"], np.nan, regex=True, inplace=True
)
with tm.assert_produces_warning(FutureWarning, match=msg):
return_value = res2.replace(
[r"\s*\.\s*", "a|b"], np.nan, regex=True, inplace=True
)
assert return_value is None
return_value = res3.replace(
regex=[r"\s*\.\s*", "a|b"], value=np.nan, inplace=True
)
with tm.assert_produces_warning(FutureWarning, match=msg):
return_value = res3.replace(
regex=[r"\s*\.\s*", "a|b"], value=np.nan, inplace=True
)
assert return_value is None
tm.assert_frame_equal(res, expec)
tm.assert_frame_equal(res2, expec)
Expand Down Expand Up @@ -520,7 +526,9 @@ def test_replace_convert(self):
# gh 3907
df = DataFrame([["foo", "bar", "bah"], ["bar", "foo", "bah"]])
m = {"foo": 1, "bar": 2, "bah": 3}
rep = df.replace(m)
msg = "Downcasting behavior in `replace` "
with tm.assert_produces_warning(FutureWarning, match=msg):
rep = df.replace(m)
expec = Series([np.int64] * 3)
res = rep.dtypes
tm.assert_series_equal(expec, res)
Expand Down Expand Up @@ -838,7 +846,12 @@ def test_replace_for_new_dtypes(self, datetime_frame):
],
)
def test_replace_dtypes(self, frame, to_replace, value, expected):
result = frame.replace(to_replace, value)
warn = None
if isinstance(to_replace, datetime) and to_replace.year == 2920:
warn = FutureWarning
msg = "Downcasting behavior in `replace` "
with tm.assert_produces_warning(warn, match=msg):
result = frame.replace(to_replace, value)
tm.assert_frame_equal(result, expected)

def test_replace_input_formats_listlike(self):
Expand Down Expand Up @@ -927,7 +940,9 @@ def test_replace_dict_no_regex(self):
"Strongly Disagree": 1,
}
expected = Series({0: 5, 1: 4, 2: 3, 3: 2, 4: 1})
result = answer.replace(weights)
msg = "Downcasting behavior in `replace` "
with tm.assert_produces_warning(FutureWarning, match=msg):
result = answer.replace(weights)
tm.assert_series_equal(result, expected)

def test_replace_series_no_regex(self):
Expand All @@ -950,7 +965,9 @@ def test_replace_series_no_regex(self):
}
)
expected = Series({0: 5, 1: 4, 2: 3, 3: 2, 4: 1})
result = answer.replace(weights)
msg = "Downcasting behavior in `replace` "
with tm.assert_produces_warning(FutureWarning, match=msg):
result = answer.replace(weights)
tm.assert_series_equal(result, expected)

def test_replace_dict_tuple_list_ordering_remains_the_same(self):
Expand Down Expand Up @@ -1076,7 +1093,9 @@ def test_replace_period(self):

expected = DataFrame({"fname": [d["fname"][k] for k in df.fname.values]})
assert expected.dtypes.iloc[0] == "Period[M]"
result = df.replace(d)
msg = "Downcasting behavior in `replace` "
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.replace(d)
tm.assert_frame_equal(result, expected)

def test_replace_datetime(self):
Expand Down Expand Up @@ -1106,7 +1125,9 @@ def test_replace_datetime(self):
)
assert set(df.fname.values) == set(d["fname"].keys())
expected = DataFrame({"fname": [d["fname"][k] for k in df.fname.values]})
result = df.replace(d)
msg = "Downcasting behavior in `replace` "
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.replace(d)
tm.assert_frame_equal(result, expected)

def test_replace_datetimetz(self):
Expand Down Expand Up @@ -1307,10 +1328,12 @@ def test_replace_commutative(self, df, to_replace, exp):
np.float64(1),
],
)
def test_replace_replacer_dtype(self, request, replacer):
def test_replace_replacer_dtype(self, replacer):
# GH26632
df = DataFrame(["a"])
result = df.replace({"a": replacer, "b": replacer})
msg = "Downcasting behavior in `replace` "
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.replace({"a": replacer, "b": replacer})
expected = DataFrame([replacer])
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -1564,12 +1587,15 @@ def test_replace_regex_dtype_frame(self, regex):
# GH-48644
df1 = DataFrame({"A": ["0"], "B": ["0"]})
expected_df1 = DataFrame({"A": [1], "B": [1]})
result_df1 = df1.replace(to_replace="0", value=1, regex=regex)
msg = "Downcasting behavior in `replace`"
with tm.assert_produces_warning(FutureWarning, match=msg):
result_df1 = df1.replace(to_replace="0", value=1, regex=regex)
tm.assert_frame_equal(result_df1, expected_df1)

df2 = DataFrame({"A": ["0"], "B": ["1"]})
expected_df2 = DataFrame({"A": [1], "B": ["1"]})
result_df2 = df2.replace(to_replace="0", value=1, regex=regex)
with tm.assert_produces_warning(FutureWarning, match=msg):
result_df2 = df2.replace(to_replace="0", value=1, regex=regex)
tm.assert_frame_equal(result_df2, expected_df2)

def test_replace_with_value_also_being_replaced(self):
Expand Down
30 changes: 24 additions & 6 deletions pandas/tests/indexing/test_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -836,8 +836,6 @@ def test_replace_series(self, how, to_key, from_key, replacer):
# tested below
return

result = obj.replace(replacer)

if (from_key == "float64" and to_key in ("int64")) or (
from_key == "complex128" and to_key in ("int64", "float64")
):
Expand All @@ -851,6 +849,17 @@ def test_replace_series(self, how, to_key, from_key, replacer):
exp = pd.Series(self.rep[to_key], index=index, name="yyy")
assert exp.dtype == to_key

msg = "Downcasting behavior in `replace`"
warn = FutureWarning
if (
exp.dtype == obj.dtype
or exp.dtype == object
or (exp.dtype.kind in "iufc" and obj.dtype.kind in "iufc")
):
warn = None
with tm.assert_produces_warning(warn, match=msg):
result = obj.replace(replacer)

tm.assert_series_equal(result, exp)

@pytest.mark.parametrize(
Expand All @@ -866,11 +875,14 @@ def test_replace_series_datetime_tz(self, how, to_key, from_key, replacer):
obj = pd.Series(self.rep[from_key], index=index, name="yyy")
assert obj.dtype == from_key

result = obj.replace(replacer)

exp = pd.Series(self.rep[to_key], index=index, name="yyy")
assert exp.dtype == to_key

msg = "Downcasting behavior in `replace`"
warn = FutureWarning if exp.dtype != object else None
with tm.assert_produces_warning(warn, match=msg):
result = obj.replace(replacer)

tm.assert_series_equal(result, exp)

@pytest.mark.parametrize(
Expand All @@ -888,16 +900,22 @@ def test_replace_series_datetime_datetime(self, how, to_key, from_key, replacer)
obj = pd.Series(self.rep[from_key], index=index, name="yyy")
assert obj.dtype == from_key

result = obj.replace(replacer)

exp = pd.Series(self.rep[to_key], index=index, name="yyy")
warn = FutureWarning
if isinstance(obj.dtype, pd.DatetimeTZDtype) and isinstance(
exp.dtype, pd.DatetimeTZDtype
):
# with mismatched tzs, we retain the original dtype as of 2.0
exp = exp.astype(obj.dtype)
warn = None
else:
assert exp.dtype == to_key
if to_key == from_key:
warn = None

msg = "Downcasting behavior in `replace`"
with tm.assert_produces_warning(warn, match=msg):
result = obj.replace(replacer)

tm.assert_series_equal(result, exp)

Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/io/excel/test_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1197,7 +1197,9 @@ def test_render_as_column_name(self, path):
def test_true_and_false_value_options(self, path):
# see gh-13347
df = DataFrame([["foo", "bar"]], columns=["col1", "col2"])
expected = df.replace({"foo": True, "bar": False})
msg = "Downcasting behavior in `replace`"
with tm.assert_produces_warning(FutureWarning, match=msg):
expected = df.replace({"foo": True, "bar": False})

df.to_excel(path)
read_frame = pd.read_excel(
Expand Down
Loading