Skip to content

Commit

Permalink
Change .groupby fastpath to work for monotonic increasing and decreas…
Browse files Browse the repository at this point in the history
…ing (#7427)

* Fix GH6220

This fixes GH6220 which makes it possible to use the fast path for .groupby for monotonically increasing and decreasing values.

* Implemented groupby tests as described by feedback

* Implemented groupby tests as described by feedback

* Minor test.

* Test resampling error with monotonic decreasing data.

* Fix test.

* Added feature to whats-new.rst

* Update whats-new.rst

* main

* flox test

* Update xarray/tests/test_groupby.py

Co-authored-by: Michael Niklas  <[email protected]>

---------

Co-authored-by: dcherian <[email protected]>
Co-authored-by: Deepak Cherian <[email protected]>
Co-authored-by: Michael Niklas <[email protected]>
  • Loading branch information
4 people authored Jul 26, 2024
1 parent d0048ef commit 0023e5d
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 3 deletions.
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ v2024.06.1 (unreleased)

New Features
~~~~~~~~~~~~
- Use fastpath when grouping both montonically increasing and decreasing variable
in :py:class:`GroupBy` (:issue:`6220`, :pull:`7427`). By `Joel Jaeschke <https://github.com/joeljaeschke>`_.
- Introduce new :py:class:`groupers.UniqueGrouper`, :py:class:`groupers.BinGrouper`, and
:py:class:`groupers.TimeResampler` objects as a step towards supporting grouping by
multiple variables. See the `docs <groupby.groupers_>` and the
Expand Down
6 changes: 4 additions & 2 deletions xarray/core/groupers.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,9 @@ def is_unique_and_monotonic(self) -> bool:
if isinstance(self.group, _DummyGroup):
return True
index = self.group_as_index
return index.is_unique and index.is_monotonic_increasing
return index.is_unique and (
index.is_monotonic_increasing or index.is_monotonic_decreasing
)

@property
def group_as_index(self) -> pd.Index:
Expand Down Expand Up @@ -326,7 +328,7 @@ def _init_properties(self, group: T_Group) -> None:

if not group_as_index.is_monotonic_increasing:
# TODO: sort instead of raising an error
raise ValueError("index must be monotonic for resampling")
raise ValueError("Index must be monotonic for resampling")

if isinstance(group_as_index, CFTimeIndex):
from xarray.core.resample_cftime import CFTimeGrouper
Expand Down
23 changes: 22 additions & 1 deletion xarray/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1792,6 +1792,23 @@ def test_groupby_fillna(self) -> None:
actual = a.groupby("b").fillna(DataArray([0, 2], dims="b"))
assert_identical(expected, actual)

@pytest.mark.parametrize("use_flox", [True, False])
def test_groupby_fastpath_for_monotonic(self, use_flox: bool) -> None:
# Fixes https://github.com/pydata/xarray/issues/6220
# Fixes https://github.com/pydata/xarray/issues/9279
index = [1, 2, 3, 4, 7, 9, 10]
array = DataArray(np.arange(len(index)), [("idx", index)])
array_rev = array.copy().assign_coords({"idx": index[::-1]})
fwd = array.groupby("idx", squeeze=False)
rev = array_rev.groupby("idx", squeeze=False)

for gb in [fwd, rev]:
assert all([isinstance(elem, slice) for elem in gb._group_indices])

with xr.set_options(use_flox=use_flox):
assert_identical(fwd.sum(), array)
assert_identical(rev.sum(), array_rev)


class TestDataArrayResample:
@pytest.mark.parametrize("use_cftime", [True, False])
Expand Down Expand Up @@ -1828,9 +1845,13 @@ def resample_as_pandas(array, *args, **kwargs):
expected = resample_as_pandas(array, "24h", closed="right")
assert_identical(expected, actual)

with pytest.raises(ValueError, match=r"index must be monotonic"):
with pytest.raises(ValueError, match=r"Index must be monotonic"):
array[[2, 0, 1]].resample(time="1D")

reverse = array.isel(time=slice(-1, None, -1))
with pytest.raises(ValueError):
reverse.resample(time="1D").mean()

@pytest.mark.parametrize("use_cftime", [True, False])
def test_resample_doctest(self, use_cftime: bool) -> None:
# run the doctest example here so we are not surprised
Expand Down

0 comments on commit 0023e5d

Please sign in to comment.