Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,7 @@ this pathological behavior (:issue:`37827`):
*New behavior*:

.. ipython:: python
:okwarning:

df.mean()

Expand All @@ -394,6 +395,7 @@ instead of casting to a NumPy array which may have different semantics (:issue:`
:issue:`28949`, :issue:`21020`).

.. ipython:: python
:okwarning:

ser = pd.Series([0, 1], dtype="category", name="A")
df = ser.to_frame()
Expand All @@ -411,6 +413,7 @@ instead of casting to a NumPy array which may have different semantics (:issue:`
*New behavior*:

.. ipython:: python
:okwarning:

df.any()

Expand Down
39 changes: 39 additions & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,45 @@ Deprecations
- Deprecated behavior of :meth:`DatetimeIndex.union` with mixed timezones; in a future version both will be cast to UTC instead of object dtype (:issue:`39328`)
- Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`)

.. _whatsnew_130.deprecations.nuisance_columns:

Deprecated Dropping Nuisance Columns in DataFrame Reductions and DataFrameGroupBy Operations
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When calling a reduction (.min, .max, .sum, ...) on a :class:`DataFrame` with
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
When calling a reduction (.min, .max, .sum, ...) on a :class:`DataFrame` with
The default of calling a reduction (.min, .max, .sum, ...) on a :class:`DataFrame` with
``numeric_only=None`` will silently ignore and drop from the result nuiscance columns, e.g. a string column in a .mean() reduction.

``numeric_only=None`` (the default, columns on which the reduction raises ``TypeError``
are silently ignored and dropped from the result. This behavior is deprecated.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Start a new paragraph with 'This behavior is deprecated'

In a future version, the ``TypeError`` will be raised, and users will need to
select only valid columns before calling the function.

For example:

.. ipython:: python

df = pd.DataFrame({"A": [1, 2, 3, 4], "B": pd.date_range("2016-01-01", periods=4)})

*Old behavior*:

.. code-block:: ipython

In [3]: df.prod()
Out[3]:
Out[3]:
A 24
dtype: int64

*Future behavior*:

.. code-block:: ipython

In [4]: df.prod()
...
TypeError: 'DatetimeArray' does not implement reduction 'prod'

In [5]: df[["A"]].prod()
Out[5]:
A 24
dtype: int64

.. ---------------------------------------------------------------------------


Expand Down
28 changes: 28 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -9801,6 +9801,21 @@ def _get_data() -> DataFrame:
# Even if we are object dtype, follow numpy and return
# float64, see test_apply_funcs_over_empty
out = out.astype(np.float64)

if numeric_only is None and out.shape[0] != df.shape[1]:
# columns have been dropped GH#41480
arg_name = "numeric_only"
if name in ["all", "any"]:
arg_name = "bool_only"
warnings.warn(
"Dropping of nuisance columns in DataFrame reductions "
f"(with '{arg_name}=None') is deprecated; in a future "
"version this will raise TypeError. Select only valid "
"columns before calling the reduction.",
FutureWarning,
stacklevel=5,
)

return out

assert numeric_only is None
Expand All @@ -9821,6 +9836,19 @@ def _get_data() -> DataFrame:
with np.errstate(all="ignore"):
result = func(values)

# columns have been dropped GH#41480
arg_name = "numeric_only"
if name in ["all", "any"]:
arg_name = "bool_only"
warnings.warn(
"Dropping of nuisance columns in DataFrame reductions "
f"(with '{arg_name}=None') is deprecated; in a future "
"version this will raise TypeError. Select only valid "
"columns before calling the reduction.",
FutureWarning,
stacklevel=5,
)

if hasattr(result, "dtype"):
if filter_type == "bool" and notna(result).all():
result = result.astype(np.bool_)
Expand Down
10 changes: 7 additions & 3 deletions pandas/tests/apply/test_frame_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -1210,7 +1210,10 @@ def test_nuiscance_columns():
)
tm.assert_frame_equal(result, expected)

result = df.agg("sum")
with tm.assert_produces_warning(
FutureWarning, match="Select only valid", check_stacklevel=False
):
result = df.agg("sum")
expected = Series([6, 6.0, "foobarbaz"], index=["A", "B", "C"])
tm.assert_series_equal(result, expected)

Expand Down Expand Up @@ -1427,8 +1430,9 @@ def test_apply_datetime_tz_issue():
@pytest.mark.parametrize("method", ["min", "max", "sum"])
def test_consistency_of_aggregates_of_columns_with_missing_values(df, method):
# GH 16832
none_in_first_column_result = getattr(df[["A", "B"]], method)()
none_in_second_column_result = getattr(df[["B", "A"]], method)()
with tm.assert_produces_warning(FutureWarning, match="Select only valid"):
none_in_first_column_result = getattr(df[["A", "B"]], method)()
none_in_second_column_result = getattr(df[["B", "A"]], method)()

tm.assert_series_equal(none_in_first_column_result, none_in_second_column_result)

Expand Down
1 change: 1 addition & 0 deletions pandas/tests/apply/test_invalid_arg.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,7 @@ def test_transform_wont_agg_series(string_series, func):
@pytest.mark.parametrize(
"op_wrapper", [lambda x: x, lambda x: [x], lambda x: {"A": x}, lambda x: {"A": [x]}]
)
@pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning")
def test_transform_reducer_raises(all_reductions, frame_or_series, op_wrapper):
# GH 35964
op = op_wrapper(all_reductions)
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/frame/methods/test_quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ def test_quantile(self, datetime_frame):
# non-numeric exclusion
df = DataFrame({"col1": ["A", "A", "B", "B"], "col2": [1, 2, 3, 4]})
rs = df.quantile(0.5)
xp = df.median().rename(0.5)
with tm.assert_produces_warning(FutureWarning, match="Select only valid"):
xp = df.median().rename(0.5)
tm.assert_series_equal(rs, xp)

# axis
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/frame/methods/test_rank.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ def test_rank_methods_frame(self):

@td.skip_array_manager_not_yet_implemented
@pytest.mark.parametrize("dtype", ["O", "f8", "i8"])
@pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning")
def test_rank_descending(self, method, dtype):

if "i" in dtype:
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1021,6 +1021,7 @@ def test_zero_len_frame_with_series_corner_cases():
tm.assert_frame_equal(result, expected)


@pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning")
def test_frame_single_columns_object_sum_axis_1():
# GH 13758
data = {
Expand Down
Loading