Skip to content

Commit

Permalink
BUG: df.resample('MS', closed='right') incorrectly places bins (#55283)
Browse files Browse the repository at this point in the history
* hardcode allowed freqs for hack

* whatsnew

* tests for business day issues

* move whatsnew to 2.1.2
  • Loading branch information
MarcoGorelli authored Sep 26, 2023
1 parent 1f16762 commit 98f5a78
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 3 deletions.
4 changes: 3 additions & 1 deletion doc/source/whatsnew/v2.1.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ including other versions of pandas.

Fixed regressions
~~~~~~~~~~~~~~~~~
- Fixed bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.MonthBegin` (:issue:`55271`)
- Fixed bug where PDEP-6 warning about setting an item of an incompatible dtype was being shown when creating a new conditional column (:issue:`55025`)
-

Expand All @@ -21,7 +22,8 @@ Fixed regressions

Bug fixes
~~~~~~~~~
-
- Fixed bug in :meth:`DataFrame.resample` not respecting ``closed`` and ``label`` arguments for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55282`)
- Fixed bug in :meth:`DataFrame.resample` where bin edges were not correct for :class:`~pandas.tseries.offsets.BusinessDay` (:issue:`55281`)
-

.. ---------------------------------------------------------------------------
Expand Down
12 changes: 11 additions & 1 deletion pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -2297,7 +2297,17 @@ def _adjust_bin_edges(
) -> tuple[DatetimeIndex, npt.NDArray[np.int64]]:
# Some hacks for > daily data, see #1471, #1458, #1483

if self.freq != "D" and is_superperiod(self.freq, "D"):
if self.freq.name in ("BM", "ME", "W") or self.freq.name.split("-")[0] in (
"BQ",
"BA",
"Q",
"A",
"W",
):
# If the right end-point is on the last day of the month, roll forwards
# until the last moment of that day. Note that we only do this for offsets
# which correspond to the end of a super-daily period - "month start", for
# example, is excluded.
if self.closed == "right":
# GH 21459, GH 9119: Adjust the bins relative to the wall time
edges_dti = binner.tz_localize(None)
Expand Down
65 changes: 64 additions & 1 deletion pandas/tests/resample/test_datetime_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -660,7 +660,7 @@ def test_resample_reresample(unit):
s = Series(np.random.default_rng(2).random(len(dti)), dti)
bs = s.resample("B", closed="right", label="right").mean()
result = bs.resample("8H").mean()
assert len(result) == 22
assert len(result) == 25
assert isinstance(result.index.freq, offsets.DateOffset)
assert result.index.freq == offsets.Hour(8)

Expand Down Expand Up @@ -2051,3 +2051,66 @@ def test_resample_M_deprecated():
with tm.assert_produces_warning(UserWarning, match=depr_msg):
result = s.resample("2M").mean()
tm.assert_series_equal(result, expected)


def test_resample_ms_closed_right():
# https://github.com/pandas-dev/pandas/issues/55271
dti = date_range(start="2020-01-31", freq="1min", periods=6000)
df = DataFrame({"ts": dti}, index=dti)
grouped = df.resample("MS", closed="right")
result = grouped.last()
expected = DataFrame(
{"ts": [datetime(2020, 2, 1), datetime(2020, 2, 4, 3, 59)]},
index=DatetimeIndex([datetime(2020, 1, 1), datetime(2020, 2, 1)], freq="MS"),
)
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("freq", ["B", "C"])
def test_resample_c_b_closed_right(freq: str):
# https://github.com/pandas-dev/pandas/issues/55281
dti = date_range(start="2020-01-31", freq="1min", periods=6000)
df = DataFrame({"ts": dti}, index=dti)
grouped = df.resample(freq, closed="right")
result = grouped.last()
expected = DataFrame(
{
"ts": [
datetime(2020, 1, 31),
datetime(2020, 2, 3),
datetime(2020, 2, 4),
datetime(2020, 2, 4, 3, 59),
]
},
index=DatetimeIndex(
[
datetime(2020, 1, 30),
datetime(2020, 1, 31),
datetime(2020, 2, 3),
datetime(2020, 2, 4),
],
freq=freq,
),
)
tm.assert_frame_equal(result, expected)


def test_resample_b_55282():
# https://github.com/pandas-dev/pandas/issues/55282
s = Series(
[1, 2, 3, 4, 5, 6], index=date_range("2023-09-26", periods=6, freq="12H")
)
result = s.resample("B", closed="right", label="right").mean()
expected = Series(
[1.0, 2.5, 4.5, 6.0],
index=DatetimeIndex(
[
datetime(2023, 9, 26),
datetime(2023, 9, 27),
datetime(2023, 9, 28),
datetime(2023, 9, 29),
],
freq="B",
),
)
tm.assert_series_equal(result, expected)

0 comments on commit 98f5a78

Please sign in to comment.