pandas-dev · jreback · Oct 2, 2020 · Sep 26, 2020 · Sep 26, 2020 · Sep 26, 2020
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -214,6 +214,7 @@ Deprecations
 - :meth:`DataFrame.lookup` is deprecated and will be removed in a future version, use :meth:`DataFrame.melt` and :meth:`DataFrame.loc` instead (:issue:`18682`)
 - The :meth:`Index.to_native_types` is deprecated. Use ``.astype(str)`` instead (:issue:`28867`)
 - Deprecated indexing :class:`DataFrame` rows with datetime-like strings ``df[string]``, use ``df.loc[string]`` instead (:issue:`36179`)
+- :meth:`Rolling.count` with ``min_periods=None`` will default to the size of the window in a future version (:issue:`31302`)
 
 .. ---------------------------------------------------------------------------
 

diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx
@@ -89,62 +89,6 @@ cdef bint is_monotonic_start_end_bounds(
 # Physical description: 366 p.
 #               Series: Prentice-Hall Series in Automatic Computation
 
-# ----------------------------------------------------------------------
-# Rolling count
-# this is only an impl for index not None, IOW, freq aware
-
-
-def roll_count(
-    ndarray[float64_t] values,
-    ndarray[int64_t] start,
-    ndarray[int64_t] end,
-    int64_t minp,
-):
-    cdef:
-        float64_t val, count_x = 0.0
-        int64_t s, e, nobs, N = len(values)
-        Py_ssize_t i, j
-        ndarray[float64_t] output
-
-    output = np.empty(N, dtype=float)
-
-    with nogil:
-
-        for i in range(0, N):
-            s = start[i]
-            e = end[i]
-
-            if i == 0:
-
-                # setup
-                count_x = 0.0
-                for j in range(s, e):
-                    val = values[j]
-                    if notnan(val):
-                        count_x += 1.0
-
-            else:
-
-                # calculate deletes
-                for j in range(start[i - 1], s):
-                    val = values[j]
-                    if notnan(val):
-                        count_x -= 1.0
-
-                # calculate adds
-                for j in range(end[i - 1], e):
-                    val = values[j]
-                    if notnan(val):
-                        count_x += 1.0
-
-            if count_x >= minp:
-                output[i] = count_x
-            else:
-                output[i] = NaN
-
-    return output
-
-
 # ----------------------------------------------------------------------
 # Rolling sum
 

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
@@ -17,6 +17,7 @@
     Type,
     Union,
 )
+import warnings
 
 import numpy as np
 
@@ -471,31 +472,39 @@ def _get_window_indexer(self, window: int) -> BaseIndexer:
             return VariableWindowIndexer(index_array=self._on.asi8, window_size=window)
         return FixedWindowIndexer(window_size=window)
 
-    def _apply_series(self, homogeneous_func: Callable[..., ArrayLike]) -> "Series":
+    def _apply_series(
+        self, homogeneous_func: Callable[..., ArrayLike], name: Optional[str] = None
+    ) -> "Series":
         """
         Series version of _apply_blockwise
         """
         obj = self._create_data(self._selected_obj)
 
         try:
-            values = self._prep_values(obj.values)
+            # GH 12541: Special case for count where we support date-like types
+            input = obj.values if name != "count" else notna(obj.values).astype(int)
+            values = self._prep_values(input)
         except (TypeError, NotImplementedError) as err:
             raise DataError("No numeric types to aggregate") from err
 
         result = homogeneous_func(values)
         return obj._constructor(result, index=obj.index, name=obj.name)
 
     def _apply_blockwise(
-        self, homogeneous_func: Callable[..., ArrayLike]
+        self, homogeneous_func: Callable[..., ArrayLike], name: Optional[str] = None
     ) -> FrameOrSeriesUnion:
         """
         Apply the given function to the DataFrame broken down into homogeneous
         sub-frames.
         """
         if self._selected_obj.ndim == 1:
-            return self._apply_series(homogeneous_func)
+            return self._apply_series(homogeneous_func, name)
 
         obj = self._create_data(self._selected_obj)
+        if name == "count":
+            # GH 12541: Special case for count where we support date-like types
+            obj = notna(obj).astype(int)
+            obj._mgr = obj._mgr.consolidate()
         mgr = obj._mgr
 
         def hfunc(bvalues: ArrayLike) -> ArrayLike:
@@ -608,7 +617,7 @@ def calc(x):
 
             return result
 
-        return self._apply_blockwise(homogeneous_func)
+        return self._apply_blockwise(homogeneous_func, name)
 
     def aggregate(self, func, *args, **kwargs):
         result, how = self._aggregate(func, *args, **kwargs)
@@ -1269,33 +1278,8 @@ class RollingAndExpandingMixin(RollingMixin):
     )
 
     def count(self):
-        # GH 32865. Using count with custom BaseIndexer subclass
-        # implementations shouldn't end up here
-        assert not isinstance(self.window, BaseIndexer)
-
-        obj = self._create_data(self._selected_obj)
-
-        def hfunc(values: np.ndarray) -> np.ndarray:
-            result = notna(values)
-            result = result.astype(int)
-            frame = type(obj)(result.T)
-            result = self._constructor(
-                frame,
-                window=self._get_window(),
-                min_periods=self.min_periods or 0,
-                center=self.center,
-                axis=self.axis,
-                closed=self.closed,
-            ).sum()
-            return result.values.T
-
-        new_mgr = obj._mgr.apply(hfunc)
-        out = obj._constructor(new_mgr)
-        if obj.ndim == 1:
-            out.name = obj.name
-        else:
-            self._insert_on_column(out, obj)
-        return out
+        window_func = self._get_cython_func_type("roll_sum")
+        return self._apply(window_func, center=self.center, name="count")
 
     _shared_docs["apply"] = dedent(
         r"""
@@ -2050,14 +2034,16 @@ def aggregate(self, func, *args, **kwargs):
     @Substitution(name="rolling")
     @Appender(_shared_docs["count"])
     def count(self):
-
-        # different impl for freq counting
-        # GH 32865. Use a custom count function implementation
-        # when using a BaseIndexer subclass as a window
-        if self.is_freq_type or isinstance(self.window, BaseIndexer):
-            window_func = self._get_roll_func("roll_count")
-            return self._apply(window_func, center=self.center, name="count")
-
+        if self.min_periods is None:
+            warnings.warn(
+                (
+                    "min_periods=None will default to the size of window "
+                    "consistent with other methods in a future version. "
+                    "Specify min_periods=0 instead."
+                ),
+                DeprecationWarning,
+            )
+            self.min_periods = 0
         return super().count()
 
     @Substitution(name="rolling")

diff --git a/pandas/tests/window/moments/test_moments_consistency_rolling.py b/pandas/tests/window/moments/test_moments_consistency_rolling.py
@@ -452,7 +452,7 @@ def test_moment_functions_zero_length():
     df2_expected = df2
 
     functions = [
-        lambda x: x.rolling(window=10).count(),
+        lambda x: x.rolling(window=10, min_periods=0).count(),
         lambda x: x.rolling(window=10, min_periods=5).cov(x, pairwise=False),
         lambda x: x.rolling(window=10, min_periods=5).corr(x, pairwise=False),
         lambda x: x.rolling(window=10, min_periods=5).max(),

diff --git a/pandas/tests/window/moments/test_moments_rolling.py b/pandas/tests/window/moments/test_moments_rolling.py
@@ -223,6 +223,7 @@ def test_rolling_sum(raw, series, frame):
     )
 
 
+@pytest.mark.filterwarnings("ignore:min_periods:DeprecationWarning")
 def test_rolling_count(raw, series, frame):
     counter = lambda x: np.isfinite(x).astype(float).sum()
     _check_moment_func(

diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py
@@ -138,6 +138,7 @@ def get_window_bounds(self, num_values, min_periods, center, closed):
         ),
     ],
 )
+@pytest.mark.filterwarnings("ignore:min_periods:DeprecationWarning")
 def test_rolling_forward_window(constructor, func, np_func, expected, np_kwargs):
     # GH 32865
     values = np.arange(10.0)

diff --git a/pandas/tests/window/test_dtypes.py b/pandas/tests/window/test_dtypes.py
@@ -21,82 +21,111 @@ def get_dtype(dtype, coerce_int=None):
 
 
 @pytest.mark.parametrize(
-    "method, data, expected_data, coerce_int",
+    "method, data, expected_data, coerce_int, min_periods",
     [
-        ("count", np.arange(5), [1, 2, 2, 2, 2], True),
-        ("count", np.arange(10, 0, -2), [1, 2, 2, 2, 2], True),
-        ("count", [0, 1, 2, np.nan, 4], [1, 2, 2, 1, 1], False),
-        ("max", np.arange(5), [np.nan, 1, 2, 3, 4], True),
-        ("max", np.arange(10, 0, -2), [np.nan, 10, 8, 6, 4], True),
-        ("max", [0, 1, 2, np.nan, 4], [np.nan, 1, 2, np.nan, np.nan], False),
-        ("min", np.arange(5), [np.nan, 0, 1, 2, 3], True),
-        ("min", np.arange(10, 0, -2), [np.nan, 8, 6, 4, 2], True),
-        ("min", [0, 1, 2, np.nan, 4], [np.nan, 0, 1, np.nan, np.nan], False),
-        ("sum", np.arange(5), [np.nan, 1, 3, 5, 7], True),
-        ("sum", np.arange(10, 0, -2), [np.nan, 18, 14, 10, 6], True),
-        ("sum", [0, 1, 2, np.nan, 4], [np.nan, 1, 3, np.nan, np.nan], False),
-        ("mean", np.arange(5), [np.nan, 0.5, 1.5, 2.5, 3.5], True),
-        ("mean", np.arange(10, 0, -2), [np.nan, 9, 7, 5, 3], True),
-        ("mean", [0, 1, 2, np.nan, 4], [np.nan, 0.5, 1.5, np.nan, np.nan], False),
-        ("std", np.arange(5), [np.nan] + [np.sqrt(0.5)] * 4, True),
-        ("std", np.arange(10, 0, -2), [np.nan] + [np.sqrt(2)] * 4, True),
+        ("count", np.arange(5), [1, 2, 2, 2, 2], True, 0),
+        ("count", np.arange(10, 0, -2), [1, 2, 2, 2, 2], True, 0),
+        ("count", [0, 1, 2, np.nan, 4], [1, 2, 2, 1, 1], False, 0),
+        ("max", np.arange(5), [np.nan, 1, 2, 3, 4], True, None),
+        ("max", np.arange(10, 0, -2), [np.nan, 10, 8, 6, 4], True, None),
+        ("max", [0, 1, 2, np.nan, 4], [np.nan, 1, 2, np.nan, np.nan], False, None),
+        ("min", np.arange(5), [np.nan, 0, 1, 2, 3], True, None),
+        ("min", np.arange(10, 0, -2), [np.nan, 8, 6, 4, 2], True, None),
+        ("min", [0, 1, 2, np.nan, 4], [np.nan, 0, 1, np.nan, np.nan], False, None),
+        ("sum", np.arange(5), [np.nan, 1, 3, 5, 7], True, None),
+        ("sum", np.arange(10, 0, -2), [np.nan, 18, 14, 10, 6], True, None),
+        ("sum", [0, 1, 2, np.nan, 4], [np.nan, 1, 3, np.nan, np.nan], False, None),
+        ("mean", np.arange(5), [np.nan, 0.5, 1.5, 2.5, 3.5], True, None),
+        ("mean", np.arange(10, 0, -2), [np.nan, 9, 7, 5, 3], True, None),
+        ("mean", [0, 1, 2, np.nan, 4], [np.nan, 0.5, 1.5, np.nan, np.nan], False, None),
+        ("std", np.arange(5), [np.nan] + [np.sqrt(0.5)] * 4, True, None),
+        ("std", np.arange(10, 0, -2), [np.nan] + [np.sqrt(2)] * 4, True, None),
         (
             "std",
             [0, 1, 2, np.nan, 4],
             [np.nan] + [np.sqrt(0.5)] * 2 + [np.nan] * 2,
             False,
+            None,
+        ),
+        ("var", np.arange(5), [np.nan, 0.5, 0.5, 0.5, 0.5], True, None),
+        ("var", np.arange(10, 0, -2), [np.nan, 2, 2, 2, 2], True, None),
+        ("var", [0, 1, 2, np.nan, 4], [np.nan, 0.5, 0.5, np.nan, np.nan], False, None),
+        ("median", np.arange(5), [np.nan, 0.5, 1.5, 2.5, 3.5], True, None),
+        ("median", np.arange(10, 0, -2), [np.nan, 9, 7, 5, 3], True, None),
+        (
+            "median",
+            [0, 1, 2, np.nan, 4],
+            [np.nan, 0.5, 1.5, np.nan, np.nan],
+            False,
+            None,
         ),
-        ("var", np.arange(5), [np.nan, 0.5, 0.5, 0.5, 0.5], True),
-        ("var", np.arange(10, 0, -2), [np.nan, 2, 2, 2, 2], True),
-        ("var", [0, 1, 2, np.nan, 4], [np.nan, 0.5, 0.5, np.nan, np.nan], False),
-        ("median", np.arange(5), [np.nan, 0.5, 1.5, 2.5, 3.5], True),
-        ("median", np.arange(10, 0, -2), [np.nan, 9, 7, 5, 3], True),
-        ("median", [0, 1, 2, np.nan, 4], [np.nan, 0.5, 1.5, np.nan, np.nan], False),
     ],
 )
-def test_series_dtypes(method, data, expected_data, coerce_int, dtypes):
+def test_series_dtypes(method, data, expected_data, coerce_int, dtypes, min_periods):
     s = Series(data, dtype=get_dtype(dtypes, coerce_int=coerce_int))
     if dtypes in ("m8[ns]", "M8[ns]") and method != "count":
         msg = "No numeric types to aggregate"
         with pytest.raises(DataError, match=msg):
-            getattr(s.rolling(2), method)()
+            getattr(s.rolling(2, min_periods=min_periods), method)()
     else:
-        result = getattr(s.rolling(2), method)()
+        result = getattr(s.rolling(2, min_periods=min_periods), method)()
         expected = Series(expected_data, dtype="float64")
         tm.assert_almost_equal(result, expected)
 
 
 @pytest.mark.parametrize(
-    "method, expected_data",
+    "method, expected_data, min_periods",
     [
-        ("count", {0: Series([1, 2, 2, 2, 2]), 1: Series([1, 2, 2, 2, 2])}),
-        ("max", {0: Series([np.nan, 2, 4, 6, 8]), 1: Series([np.nan, 3, 5, 7, 9])}),
-        ("min", {0: Series([np.nan, 0, 2, 4, 6]), 1: Series([np.nan, 1, 3, 5, 7])}),
+        ("count", {0: Series([1, 2, 2, 2, 2]), 1: Series([1, 2, 2, 2, 2])}, 0),
+        (
+            "max",
+            {0: Series([np.nan, 2, 4, 6, 8]), 1: Series([np.nan, 3, 5, 7, 9])},
+            None,
+        ),
+        (
+            "min",
+            {0: Series([np.nan, 0, 2, 4, 6]), 1: Series([np.nan, 1, 3, 5, 7])},
+            None,
+        ),
         (
             "sum",
             {0: Series([np.nan, 2, 6, 10, 14]), 1: Series([np.nan, 4, 8, 12, 16])},
+            None,
+        ),
+        (
+            "mean",
+            {0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])},
+            None,
         ),
-        ("mean", {0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])}),
         (
             "std",
             {
                 0: Series([np.nan] + [np.sqrt(2)] * 4),
                 1: Series([np.nan] + [np.sqrt(2)] * 4),
             },
+            None,
+        ),
+        (
+            "var",
+            {0: Series([np.nan, 2, 2, 2, 2]), 1: Series([np.nan, 2, 2, 2, 2])},
+            None,
+        ),
+        (
+            "median",
+            {0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])},
+            None,
         ),
-        ("var", {0: Series([np.nan, 2, 2, 2, 2]), 1: Series([np.nan, 2, 2, 2, 2])}),
-        ("median", {0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])}),
     ],
 )
-def test_dataframe_dtypes(method, expected_data, dtypes):
+def test_dataframe_dtypes(method, expected_data, dtypes, min_periods):
     if dtypes == "category":
         pytest.skip("Category dataframe testing not implemented.")
     df = DataFrame(np.arange(10).reshape((5, 2)), dtype=get_dtype(dtypes))
     if dtypes in ("m8[ns]", "M8[ns]") and method != "count":
         msg = "No numeric types to aggregate"
         with pytest.raises(DataError, match=msg):
-            getattr(df.rolling(2), method)()
+            getattr(df.rolling(2, min_periods=min_periods), method)()
     else:
-        result = getattr(df.rolling(2), method)()
+        result = getattr(df.rolling(2, min_periods=min_periods), method)()
         expected = DataFrame(expected_data, dtype="float64")
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py
@@ -45,16 +45,17 @@ def test_getitem_multiple(self):
 
         # GH 13174
         g = self.frame.groupby("A")
-        r = g.rolling(2)
+        r = g.rolling(2, min_periods=0)
         g_mutated = get_groupby(self.frame, by="A", mutated=True)
-        expected = g_mutated.B.apply(lambda x: x.rolling(2).count())
+        expected = g_mutated.B.apply(lambda x: x.rolling(2, min_periods=0).count())
 
         result = r.B.count()
         tm.assert_series_equal(result, expected)
 
         result = r.B.count()
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.filterwarnings("ignore:min_periods:DeprecationWarning")
     def test_rolling(self):
         g = self.frame.groupby("A")
         r = g.rolling(window=4)

diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py
@@ -455,7 +455,9 @@ def test_rolling_count_default_min_periods_with_null_values(constructor):
     values = [1, 2, 3, np.nan, 4, 5, 6]
     expected_counts = [1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0]
 
-    result = constructor(values).rolling(3).count()
+    # GH 31302
+    with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False):
+        result = constructor(values).rolling(3).count()
     expected = constructor(expected_counts)
     tm.assert_equal(result, expected)