Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ Deprecations
- :meth:`DataFrame.lookup` is deprecated and will be removed in a future version, use :meth:`DataFrame.melt` and :meth:`DataFrame.loc` instead (:issue:`18682`)
- The :meth:`Index.to_native_types` is deprecated. Use ``.astype(str)`` instead (:issue:`28867`)
- Deprecated indexing :class:`DataFrame` rows with datetime-like strings ``df[string]``, use ``df.loc[string]`` instead (:issue:`36179`)
- :meth:`Rolling.count` with ``min_periods=None`` will default to the size of the window in a future version (:issue:`31302`)

.. ---------------------------------------------------------------------------

Expand Down
56 changes: 0 additions & 56 deletions pandas/_libs/window/aggregations.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -89,62 +89,6 @@ cdef bint is_monotonic_start_end_bounds(
# Physical description: 366 p.
# Series: Prentice-Hall Series in Automatic Computation

# ----------------------------------------------------------------------
# Rolling count
# this is only an impl for index not None, IOW, freq aware


def roll_count(
ndarray[float64_t] values,
ndarray[int64_t] start,
ndarray[int64_t] end,
int64_t minp,
):
cdef:
float64_t val, count_x = 0.0
int64_t s, e, nobs, N = len(values)
Py_ssize_t i, j
ndarray[float64_t] output

output = np.empty(N, dtype=float)

with nogil:

for i in range(0, N):
s = start[i]
e = end[i]

if i == 0:

# setup
count_x = 0.0
for j in range(s, e):
val = values[j]
if notnan(val):
count_x += 1.0

else:

# calculate deletes
for j in range(start[i - 1], s):
val = values[j]
if notnan(val):
count_x -= 1.0

# calculate adds
for j in range(end[i - 1], e):
val = values[j]
if notnan(val):
count_x += 1.0

if count_x >= minp:
output[i] = count_x
else:
output[i] = NaN

return output


# ----------------------------------------------------------------------
# Rolling sum

Expand Down
66 changes: 26 additions & 40 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
Type,
Union,
)
import warnings

import numpy as np

Expand Down Expand Up @@ -471,31 +472,39 @@ def _get_window_indexer(self, window: int) -> BaseIndexer:
return VariableWindowIndexer(index_array=self._on.asi8, window_size=window)
return FixedWindowIndexer(window_size=window)

def _apply_series(self, homogeneous_func: Callable[..., ArrayLike]) -> "Series":
def _apply_series(
self, homogeneous_func: Callable[..., ArrayLike], name: Optional[str] = None
) -> "Series":
"""
Series version of _apply_blockwise
"""
obj = self._create_data(self._selected_obj)

try:
values = self._prep_values(obj.values)
# GH 12541: Special case for count where we support date-like types
input = obj.values if name != "count" else notna(obj.values).astype(int)
values = self._prep_values(input)
except (TypeError, NotImplementedError) as err:
raise DataError("No numeric types to aggregate") from err

result = homogeneous_func(values)
return obj._constructor(result, index=obj.index, name=obj.name)

def _apply_blockwise(
self, homogeneous_func: Callable[..., ArrayLike]
self, homogeneous_func: Callable[..., ArrayLike], name: Optional[str] = None
) -> FrameOrSeriesUnion:
"""
Apply the given function to the DataFrame broken down into homogeneous
sub-frames.
"""
if self._selected_obj.ndim == 1:
return self._apply_series(homogeneous_func)
return self._apply_series(homogeneous_func, name)

obj = self._create_data(self._selected_obj)
if name == "count":
# GH 12541: Special case for count where we support date-like types
obj = notna(obj).astype(int)
obj._mgr = obj._mgr.consolidate()
mgr = obj._mgr

def hfunc(bvalues: ArrayLike) -> ArrayLike:
Expand Down Expand Up @@ -608,7 +617,7 @@ def calc(x):

return result

return self._apply_blockwise(homogeneous_func)
return self._apply_blockwise(homogeneous_func, name)

def aggregate(self, func, *args, **kwargs):
result, how = self._aggregate(func, *args, **kwargs)
Expand Down Expand Up @@ -1269,33 +1278,8 @@ class RollingAndExpandingMixin(RollingMixin):
)

def count(self):
# GH 32865. Using count with custom BaseIndexer subclass
# implementations shouldn't end up here
assert not isinstance(self.window, BaseIndexer)

obj = self._create_data(self._selected_obj)

def hfunc(values: np.ndarray) -> np.ndarray:
result = notna(values)
result = result.astype(int)
frame = type(obj)(result.T)
result = self._constructor(
frame,
window=self._get_window(),
min_periods=self.min_periods or 0,
center=self.center,
axis=self.axis,
closed=self.closed,
).sum()
return result.values.T

new_mgr = obj._mgr.apply(hfunc)
out = obj._constructor(new_mgr)
if obj.ndim == 1:
out.name = obj.name
else:
self._insert_on_column(out, obj)
return out
window_func = self._get_cython_func_type("roll_sum")
return self._apply(window_func, center=self.center, name="count")

_shared_docs["apply"] = dedent(
r"""
Expand Down Expand Up @@ -2050,14 +2034,16 @@ def aggregate(self, func, *args, **kwargs):
@Substitution(name="rolling")
@Appender(_shared_docs["count"])
def count(self):

# different impl for freq counting
# GH 32865. Use a custom count function implementation
# when using a BaseIndexer subclass as a window
if self.is_freq_type or isinstance(self.window, BaseIndexer):
window_func = self._get_roll_func("roll_count")
return self._apply(window_func, center=self.center, name="count")

if self.min_periods is None:
warnings.warn(
(
"min_periods=None will default to the size of window "
"consistent with other methods in a future version. "
"Specify min_periods=0 instead."
),
DeprecationWarning,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

make this a FutureWarning. I know its loud but i think we need to.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

alt we could do this for a version and then change it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed to FutureWarning

)
self.min_periods = 0
return super().count()

@Substitution(name="rolling")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ def test_moment_functions_zero_length():
df2_expected = df2

functions = [
lambda x: x.rolling(window=10).count(),
lambda x: x.rolling(window=10, min_periods=0).count(),
lambda x: x.rolling(window=10, min_periods=5).cov(x, pairwise=False),
lambda x: x.rolling(window=10, min_periods=5).corr(x, pairwise=False),
lambda x: x.rolling(window=10, min_periods=5).max(),
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/window/moments/test_moments_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ def test_rolling_sum(raw, series, frame):
)


@pytest.mark.filterwarnings("ignore:min_periods:DeprecationWarning")
def test_rolling_count(raw, series, frame):
counter = lambda x: np.isfinite(x).astype(float).sum()
_check_moment_func(
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/window/test_base_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ def get_window_bounds(self, num_values, min_periods, center, closed):
),
],
)
@pytest.mark.filterwarnings("ignore:min_periods:DeprecationWarning")
def test_rolling_forward_window(constructor, func, np_func, expected, np_kwargs):
# GH 32865
values = np.arange(10.0)
Expand Down
103 changes: 66 additions & 37 deletions pandas/tests/window/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,82 +21,111 @@ def get_dtype(dtype, coerce_int=None):


@pytest.mark.parametrize(
"method, data, expected_data, coerce_int",
"method, data, expected_data, coerce_int, min_periods",
[
("count", np.arange(5), [1, 2, 2, 2, 2], True),
("count", np.arange(10, 0, -2), [1, 2, 2, 2, 2], True),
("count", [0, 1, 2, np.nan, 4], [1, 2, 2, 1, 1], False),
("max", np.arange(5), [np.nan, 1, 2, 3, 4], True),
("max", np.arange(10, 0, -2), [np.nan, 10, 8, 6, 4], True),
("max", [0, 1, 2, np.nan, 4], [np.nan, 1, 2, np.nan, np.nan], False),
("min", np.arange(5), [np.nan, 0, 1, 2, 3], True),
("min", np.arange(10, 0, -2), [np.nan, 8, 6, 4, 2], True),
("min", [0, 1, 2, np.nan, 4], [np.nan, 0, 1, np.nan, np.nan], False),
("sum", np.arange(5), [np.nan, 1, 3, 5, 7], True),
("sum", np.arange(10, 0, -2), [np.nan, 18, 14, 10, 6], True),
("sum", [0, 1, 2, np.nan, 4], [np.nan, 1, 3, np.nan, np.nan], False),
("mean", np.arange(5), [np.nan, 0.5, 1.5, 2.5, 3.5], True),
("mean", np.arange(10, 0, -2), [np.nan, 9, 7, 5, 3], True),
("mean", [0, 1, 2, np.nan, 4], [np.nan, 0.5, 1.5, np.nan, np.nan], False),
("std", np.arange(5), [np.nan] + [np.sqrt(0.5)] * 4, True),
("std", np.arange(10, 0, -2), [np.nan] + [np.sqrt(2)] * 4, True),
("count", np.arange(5), [1, 2, 2, 2, 2], True, 0),
("count", np.arange(10, 0, -2), [1, 2, 2, 2, 2], True, 0),
("count", [0, 1, 2, np.nan, 4], [1, 2, 2, 1, 1], False, 0),
("max", np.arange(5), [np.nan, 1, 2, 3, 4], True, None),
("max", np.arange(10, 0, -2), [np.nan, 10, 8, 6, 4], True, None),
("max", [0, 1, 2, np.nan, 4], [np.nan, 1, 2, np.nan, np.nan], False, None),
("min", np.arange(5), [np.nan, 0, 1, 2, 3], True, None),
("min", np.arange(10, 0, -2), [np.nan, 8, 6, 4, 2], True, None),
("min", [0, 1, 2, np.nan, 4], [np.nan, 0, 1, np.nan, np.nan], False, None),
("sum", np.arange(5), [np.nan, 1, 3, 5, 7], True, None),
("sum", np.arange(10, 0, -2), [np.nan, 18, 14, 10, 6], True, None),
("sum", [0, 1, 2, np.nan, 4], [np.nan, 1, 3, np.nan, np.nan], False, None),
("mean", np.arange(5), [np.nan, 0.5, 1.5, 2.5, 3.5], True, None),
("mean", np.arange(10, 0, -2), [np.nan, 9, 7, 5, 3], True, None),
("mean", [0, 1, 2, np.nan, 4], [np.nan, 0.5, 1.5, np.nan, np.nan], False, None),
("std", np.arange(5), [np.nan] + [np.sqrt(0.5)] * 4, True, None),
("std", np.arange(10, 0, -2), [np.nan] + [np.sqrt(2)] * 4, True, None),
(
"std",
[0, 1, 2, np.nan, 4],
[np.nan] + [np.sqrt(0.5)] * 2 + [np.nan] * 2,
False,
None,
),
("var", np.arange(5), [np.nan, 0.5, 0.5, 0.5, 0.5], True, None),
("var", np.arange(10, 0, -2), [np.nan, 2, 2, 2, 2], True, None),
("var", [0, 1, 2, np.nan, 4], [np.nan, 0.5, 0.5, np.nan, np.nan], False, None),
("median", np.arange(5), [np.nan, 0.5, 1.5, 2.5, 3.5], True, None),
("median", np.arange(10, 0, -2), [np.nan, 9, 7, 5, 3], True, None),
(
"median",
[0, 1, 2, np.nan, 4],
[np.nan, 0.5, 1.5, np.nan, np.nan],
False,
None,
),
("var", np.arange(5), [np.nan, 0.5, 0.5, 0.5, 0.5], True),
("var", np.arange(10, 0, -2), [np.nan, 2, 2, 2, 2], True),
("var", [0, 1, 2, np.nan, 4], [np.nan, 0.5, 0.5, np.nan, np.nan], False),
("median", np.arange(5), [np.nan, 0.5, 1.5, 2.5, 3.5], True),
("median", np.arange(10, 0, -2), [np.nan, 9, 7, 5, 3], True),
("median", [0, 1, 2, np.nan, 4], [np.nan, 0.5, 1.5, np.nan, np.nan], False),
],
)
def test_series_dtypes(method, data, expected_data, coerce_int, dtypes):
def test_series_dtypes(method, data, expected_data, coerce_int, dtypes, min_periods):
s = Series(data, dtype=get_dtype(dtypes, coerce_int=coerce_int))
if dtypes in ("m8[ns]", "M8[ns]") and method != "count":
msg = "No numeric types to aggregate"
with pytest.raises(DataError, match=msg):
getattr(s.rolling(2), method)()
getattr(s.rolling(2, min_periods=min_periods), method)()
else:
result = getattr(s.rolling(2), method)()
result = getattr(s.rolling(2, min_periods=min_periods), method)()
expected = Series(expected_data, dtype="float64")
tm.assert_almost_equal(result, expected)


@pytest.mark.parametrize(
"method, expected_data",
"method, expected_data, min_periods",
[
("count", {0: Series([1, 2, 2, 2, 2]), 1: Series([1, 2, 2, 2, 2])}),
("max", {0: Series([np.nan, 2, 4, 6, 8]), 1: Series([np.nan, 3, 5, 7, 9])}),
("min", {0: Series([np.nan, 0, 2, 4, 6]), 1: Series([np.nan, 1, 3, 5, 7])}),
("count", {0: Series([1, 2, 2, 2, 2]), 1: Series([1, 2, 2, 2, 2])}, 0),
(
"max",
{0: Series([np.nan, 2, 4, 6, 8]), 1: Series([np.nan, 3, 5, 7, 9])},
None,
),
(
"min",
{0: Series([np.nan, 0, 2, 4, 6]), 1: Series([np.nan, 1, 3, 5, 7])},
None,
),
(
"sum",
{0: Series([np.nan, 2, 6, 10, 14]), 1: Series([np.nan, 4, 8, 12, 16])},
None,
),
(
"mean",
{0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])},
None,
),
("mean", {0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])}),
(
"std",
{
0: Series([np.nan] + [np.sqrt(2)] * 4),
1: Series([np.nan] + [np.sqrt(2)] * 4),
},
None,
),
(
"var",
{0: Series([np.nan, 2, 2, 2, 2]), 1: Series([np.nan, 2, 2, 2, 2])},
None,
),
(
"median",
{0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])},
None,
),
("var", {0: Series([np.nan, 2, 2, 2, 2]), 1: Series([np.nan, 2, 2, 2, 2])}),
("median", {0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])}),
],
)
def test_dataframe_dtypes(method, expected_data, dtypes):
def test_dataframe_dtypes(method, expected_data, dtypes, min_periods):
if dtypes == "category":
pytest.skip("Category dataframe testing not implemented.")
df = DataFrame(np.arange(10).reshape((5, 2)), dtype=get_dtype(dtypes))
if dtypes in ("m8[ns]", "M8[ns]") and method != "count":
msg = "No numeric types to aggregate"
with pytest.raises(DataError, match=msg):
getattr(df.rolling(2), method)()
getattr(df.rolling(2, min_periods=min_periods), method)()
else:
result = getattr(df.rolling(2), method)()
result = getattr(df.rolling(2, min_periods=min_periods), method)()
expected = DataFrame(expected_data, dtype="float64")
tm.assert_frame_equal(result, expected)
5 changes: 3 additions & 2 deletions pandas/tests/window/test_grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,17 @@ def test_getitem_multiple(self):

# GH 13174
g = self.frame.groupby("A")
r = g.rolling(2)
r = g.rolling(2, min_periods=0)
g_mutated = get_groupby(self.frame, by="A", mutated=True)
expected = g_mutated.B.apply(lambda x: x.rolling(2).count())
expected = g_mutated.B.apply(lambda x: x.rolling(2, min_periods=0).count())

result = r.B.count()
tm.assert_series_equal(result, expected)

result = r.B.count()
tm.assert_series_equal(result, expected)

@pytest.mark.filterwarnings("ignore:min_periods:DeprecationWarning")
def test_rolling(self):
g = self.frame.groupby("A")
r = g.rolling(window=4)
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/window/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,9 @@ def test_rolling_count_default_min_periods_with_null_values(constructor):
values = [1, 2, 3, np.nan, 4, 5, 6]
expected_counts = [1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0]

result = constructor(values).rolling(3).count()
# GH 31302
with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False):
result = constructor(values).rolling(3).count()
expected = constructor(expected_counts)
tm.assert_equal(result, expected)

Expand Down
Loading