Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
14 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ Backwards incompatible API changes

Deprecations
~~~~~~~~~~~~

- :meth:`DataFrame.mean` and :meth:`DataFrame.median` with ``numeric_only=None`` will include datetime64 and datetime64tz columns in a future version (:issue:`29941`)
-
-

Expand Down
21 changes: 20 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7921,6 +7921,19 @@ def _count_level(self, level, axis=0, numeric_only=False):
def _reduce(
self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds
):

dtype_is_dt = self.dtypes.apply(lambda x: x.kind == "M")
if numeric_only is None and name in ["mean", "median"] and dtype_is_dt.any():
warnings.warn(
"DataFrame.mean and DataFrame.median with numeric_only=None "
"will include datetime64 and datetime64tz columns in a "
"future version.",
FutureWarning,
stacklevel=3,
)
cols = self.columns[~dtype_is_dt]
self = self[cols]

if axis is None and filter_type == "bool":
labels = None
constructor = None
Expand Down Expand Up @@ -7960,9 +7973,15 @@ def _get_data(axis_matters):

out_dtype = "bool" if filter_type == "bool" else None

def blk_func(values):
if values.ndim == 1 and not isinstance(values, np.ndarray):
# we can't pass axis=1
return op(values, axis=0, skipna=skipna, **kwds)
return op(values, axis=1, skipna=skipna, **kwds)

# After possibly _get_data and transposing, we are now in the
# simple case where we can use BlockManager._reduce
res = df._data.reduce(op, axis=1, skipna=skipna, **kwds)
res = df._data.reduce(blk_func)
assert isinstance(res, dict)
if len(res):
assert len(res) == max(list(res.keys())) + 1, res.keys()
Expand Down
17 changes: 11 additions & 6 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
is_timedelta64_dtype,
pandas_dtype,
)
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna

bn = import_optional_dependency("bottleneck", raise_on_missing=False, on_version="warn")
Expand Down Expand Up @@ -519,7 +518,6 @@ def nansum(
return _wrap_results(the_sum, dtype)


@disallow("M8", DatetimeTZDtype)
@bottleneck_switch()
def nanmean(values, axis=None, skipna=True, mask=None):
"""
Expand Down Expand Up @@ -577,7 +575,6 @@ def nanmean(values, axis=None, skipna=True, mask=None):
return _wrap_results(the_mean, dtype)


@disallow("M8")
@bottleneck_switch()
def nanmedian(values, axis=None, skipna=True, mask=None):
"""
Expand Down Expand Up @@ -610,8 +607,12 @@ def get_median(x):
return np.nanmedian(x[mask])

values, mask, dtype, dtype_max, _ = _get_values(values, skipna, mask=mask)
if not is_float_dtype(values):
values = values.astype("f8")
if not is_float_dtype(values.dtype):
try:
values = values.astype("f8")
except ValueError:
# e.g. "could not convert string to float: 'a'"
raise TypeError
if mask is not None:
values[mask] = np.nan

Expand Down Expand Up @@ -1359,7 +1360,11 @@ def _ensure_numeric(x):
try:
x = x.astype(np.complex128)
except (TypeError, ValueError):
x = x.astype(np.float64)
try:
x = x.astype(np.float64)
except ValueError:
# GH#29941 we get here with object arrays containing strs
raise TypeError(f"Could not convert {x} to numeric")
else:
if not np.any(np.imag(x)):
x = x.real
Expand Down
18 changes: 13 additions & 5 deletions pandas/tests/frame/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,15 @@ def assert_stat_op_calc(
f = getattr(frame, opname)

if check_dates:
expected_warning = FutureWarning if opname in ["mean", "median"] else None
df = DataFrame({"b": date_range("1/1/2001", periods=2)})
result = getattr(df, opname)()
with tm.assert_produces_warning(expected_warning):
result = getattr(df, opname)()
assert isinstance(result, Series)

df["a"] = range(len(df))
result = getattr(df, opname)()
with tm.assert_produces_warning(expected_warning):
result = getattr(df, opname)()
assert isinstance(result, Series)
assert len(result)

Expand Down Expand Up @@ -460,7 +463,8 @@ def test_nunique(self):
def test_mean_mixed_datetime_numeric(self, tz):
# https://github.com/pandas-dev/pandas/issues/24752
df = pd.DataFrame({"A": [1, 1], "B": [pd.Timestamp("2000", tz=tz)] * 2})
result = df.mean()
with tm.assert_produces_warning(FutureWarning):
result = df.mean()
expected = pd.Series([1.0], index=["A"])
tm.assert_series_equal(result, expected)

Expand All @@ -470,7 +474,9 @@ def test_mean_excludes_datetimes(self, tz):
# Our long-term desired behavior is unclear, but the behavior in
# 0.24.0rc1 was buggy.
df = pd.DataFrame({"A": [pd.Timestamp("2000", tz=tz)] * 2})
result = df.mean()
with tm.assert_produces_warning(FutureWarning):
result = df.mean()

expected = pd.Series(dtype=np.float64)
tm.assert_series_equal(result, expected)

Expand Down Expand Up @@ -866,7 +872,9 @@ def test_mean_datetimelike(self):
expected = pd.Series({"A": 1.0})
tm.assert_series_equal(result, expected)

result = df.mean()
with tm.assert_produces_warning(FutureWarning):
# in the future datetime columns will be included
result = df.mean()
expected = pd.Series({"A": 1.0, "C": df.loc[1, "C"]})
tm.assert_series_equal(result, expected)

Expand Down
5 changes: 2 additions & 3 deletions pandas/tests/test_nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -750,8 +750,8 @@ def test_ndarray(self):

# Test non-convertible string ndarray
s_values = np.array(["foo", "bar", "baz"], dtype=object)
msg = r"could not convert string to float: '(foo|baz)'"
with pytest.raises(ValueError, match=msg):
msg = r"Could not convert .* to numeric"
with pytest.raises(TypeError, match=msg):
nanops._ensure_numeric(s_values)

def test_convertable_values(self):
Expand Down Expand Up @@ -993,7 +993,6 @@ def prng(self):

class TestDatetime64NaNOps:
@pytest.mark.parametrize("tz", [None, "UTC"])
@pytest.mark.xfail(reason="disabled")
# Enabling mean changes the behavior of DataFrame.mean
# See https://github.com/pandas-dev/pandas/issues/24752
def test_nanmean(self, tz):
Expand Down