Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ Bug fixes
- Fix deprecation warning that was raised when calling ``np.array`` on an ``xr.DataArray``
in NumPy 2.0 (:issue:`9312`, :pull:`9393`)
By `Andrew Scherer <https://github.com/andrew-s28>`_.
- Fix support for using ``pandas.BaseOffset``, ``pandas.Timedelta``, and
``datetime.timedelta`` objects as ``resample`` frequencies
(:issue:`9408`, :pull:`9413`).
By `Oliver Higgs <https://github.com/oliverhiggs>`_.

Documentation
~~~~~~~~~~~~~
Expand Down
37 changes: 36 additions & 1 deletion xarray/coding/cftime_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -772,11 +772,18 @@ def _emit_freq_deprecation_warning(deprecated_freq):
emit_user_level_warning(message, FutureWarning)


def to_offset(freq: BaseCFTimeOffset | str, warn: bool = True) -> BaseCFTimeOffset:
def to_offset(
freq: BaseCFTimeOffset | str | timedelta | pd.Timedelta | pd.DateOffset,
warn: bool = True,
) -> BaseCFTimeOffset:
"""Convert a frequency string to the appropriate subclass of
BaseCFTimeOffset."""
if isinstance(freq, BaseCFTimeOffset):
return freq
if isinstance(freq, timedelta | pd.Timedelta):
return delta_to_tick(freq)
if isinstance(freq, pd.DateOffset):
freq = freq.freqstr

match = re.match(_PATTERN, freq)
if match is None:
Expand All @@ -791,6 +798,34 @@ def to_offset(freq: BaseCFTimeOffset | str, warn: bool = True) -> BaseCFTimeOffs
return _FREQUENCIES[freq](n=multiples)


def delta_to_tick(delta: timedelta | pd.Timedelta) -> Tick:
"""Adapted from pandas.tslib.delta_to_tick"""
if isinstance(delta, pd.Timedelta) and delta.nanoseconds != 0:
# pandas.Timedelta has nanoseconds, but these are not supported
raise ValueError(
"Unable to convert 'pandas.Timedelta' object with non-zero "
"nanoseconds to 'CFTimeOffset' object"
)
if delta.microseconds == 0:
if delta.seconds == 0:
return Day(n=delta.days)
else:
seconds = delta.days * 86400 + delta.seconds
if seconds % 3600 == 0:
return Hour(n=seconds // 3600)
elif seconds % 60 == 0:
return Minute(n=seconds // 60)
else:
return Second(n=seconds)
else:
# Regardless of the days and seconds this will always be a Millsecond
# or Microsecond object
if delta.microseconds % 1_000 == 0:
return Millisecond(n=delta.microseconds // 1_000)
else:
return Microsecond(n=delta.microseconds)


def to_cftime_datetime(date_str_or_date, calendar=None):
if cftime is None:
raise ModuleNotFoundError("No module named 'cftime'")
Expand Down
16 changes: 10 additions & 6 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import datetime
import warnings
from collections.abc import Callable, Hashable, Iterable, Iterator, Mapping
from contextlib import suppress
Expand All @@ -13,6 +14,7 @@
from xarray.core import dtypes, duck_array_ops, formatting, formatting_html, ops
from xarray.core.indexing import BasicIndexer, ExplicitlyIndexed
from xarray.core.options import OPTIONS, _get_keep_attrs
from xarray.core.types import ResampleCompatible
from xarray.core.utils import (
Frozen,
either_dict_or_kwargs,
Expand All @@ -32,8 +34,6 @@


if TYPE_CHECKING:
import datetime

from numpy.typing import DTypeLike

from xarray.core.dataarray import DataArray
Expand Down Expand Up @@ -891,14 +891,14 @@ def rolling_exp(
def _resample(
self,
resample_cls: type[T_Resample],
indexer: Mapping[Hashable, str | Resampler] | None,
indexer: Mapping[Hashable, ResampleCompatible | Resampler] | None,
skipna: bool | None,
closed: SideOptions | None,
label: SideOptions | None,
offset: pd.Timedelta | datetime.timedelta | str | None,
origin: str | DatetimeLike,
restore_coord_dims: bool | None,
**indexer_kwargs: str | Resampler,
**indexer_kwargs: ResampleCompatible | Resampler,
) -> T_Resample:
"""Returns a Resample object for performing resampling operations.

Expand Down Expand Up @@ -1078,14 +1078,18 @@ def _resample(
)

grouper: Resampler
if isinstance(freq, str):
if isinstance(freq, ResampleCompatible):
grouper = TimeResampler(
freq=freq, closed=closed, label=label, origin=origin, offset=offset
)
elif isinstance(freq, Resampler):
grouper = freq
else:
raise ValueError("freq must be a str or a Resampler object")
raise ValueError(
"freq must be an object of type 'str', 'datetime.timedelta', "
"'pandas.Timedelta', 'pandas.DateOffset', or 'TimeResampler'. "
f"Received {type(freq)} instead."
)

rgrouper = ResolvedGrouper(grouper, group, self)

Expand Down
9 changes: 5 additions & 4 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@
QueryEngineOptions,
QueryParserOptions,
ReindexMethodOptions,
ResampleCompatible,
Self,
SideOptions,
T_ChunkDimFreq,
Expand Down Expand Up @@ -7244,15 +7245,15 @@ def coarsen(
@_deprecate_positional_args("v2024.07.0")
def resample(
self,
indexer: Mapping[Hashable, str | Resampler] | None = None,
indexer: Mapping[Hashable, ResampleCompatible | Resampler] | None = None,
*,
skipna: bool | None = None,
closed: SideOptions | None = None,
label: SideOptions | None = None,
offset: pd.Timedelta | datetime.timedelta | str | None = None,
origin: str | DatetimeLike = "start_day",
restore_coord_dims: bool | None = None,
**indexer_kwargs: str | Resampler,
**indexer_kwargs: ResampleCompatible | Resampler,
) -> DataArrayResample:
"""Returns a Resample object for performing resampling operations.

Expand All @@ -7263,7 +7264,7 @@ def resample(

Parameters
----------
indexer : Mapping of Hashable to str, optional
indexer : Mapping of Hashable to str, datetime.timedelta, pd.Timedelta, pd.DateOffset, or Resampler, optional
Mapping from the dimension name to resample frequency [1]_. The
dimension must be datetime-like.
skipna : bool, optional
Expand All @@ -7287,7 +7288,7 @@ def resample(
restore_coord_dims : bool, optional
If True, also restore the dimension order of multi-dimensional
coordinates.
**indexer_kwargs : str
**indexer_kwargs : str, datetime.timedelta, pd.Timedelta, pd.DateOffset, or Resampler
The keyword arguments form of ``indexer``.
One of indexer or indexer_kwargs must be provided.

Expand Down
9 changes: 5 additions & 4 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@
QueryEngineOptions,
QueryParserOptions,
ReindexMethodOptions,
ResampleCompatible,
SideOptions,
T_ChunkDimFreq,
T_DatasetPadConstantValues,
Expand Down Expand Up @@ -10685,15 +10686,15 @@ def coarsen(
@_deprecate_positional_args("v2024.07.0")
def resample(
self,
indexer: Mapping[Any, str | Resampler] | None = None,
indexer: Mapping[Any, ResampleCompatible | Resampler] | None = None,
*,
skipna: bool | None = None,
closed: SideOptions | None = None,
label: SideOptions | None = None,
offset: pd.Timedelta | datetime.timedelta | str | None = None,
origin: str | DatetimeLike = "start_day",
restore_coord_dims: bool | None = None,
**indexer_kwargs: str | Resampler,
**indexer_kwargs: ResampleCompatible | Resampler,
) -> DatasetResample:
"""Returns a Resample object for performing resampling operations.

Expand All @@ -10704,7 +10705,7 @@ def resample(

Parameters
----------
indexer : Mapping of Hashable to str, optional
indexer : Mapping of Hashable to str, datetime.timedelta, pd.Timedelta, pd.DateOffset, or Resampler, optional
Mapping from the dimension name to resample frequency [1]_. The
dimension must be datetime-like.
skipna : bool, optional
Expand All @@ -10728,7 +10729,7 @@ def resample(
restore_coord_dims : bool, optional
If True, also restore the dimension order of multi-dimensional
coordinates.
**indexer_kwargs : str
**indexer_kwargs : str, datetime.timedelta, pd.Timedelta, pd.DateOffset, or Resampler
The keyword arguments form of ``indexer``.
One of indexer or indexer_kwargs must be provided.

Expand Down
4 changes: 2 additions & 2 deletions xarray/core/resample_cftime.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
from xarray.core.types import SideOptions

if typing.TYPE_CHECKING:
from xarray.core.types import CFTimeDatetime
from xarray.core.types import CFTimeDatetime, ResampleCompatible


class CFTimeGrouper:
Expand All @@ -75,7 +75,7 @@ class CFTimeGrouper:

def __init__(
self,
freq: str | BaseCFTimeOffset,
freq: ResampleCompatible | BaseCFTimeOffset,
closed: SideOptions | None = None,
label: SideOptions | None = None,
origin: str | CFTimeDatetime = "start_day",
Expand Down
2 changes: 2 additions & 0 deletions xarray/core/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,3 +309,5 @@ def copy(
Bins = Union[
int, Sequence[int], Sequence[float], Sequence[pd.Timestamp], np.ndarray, pd.Index
]

ResampleCompatible: TypeAlias = str | datetime.timedelta | pd.Timedelta | pd.DateOffset
20 changes: 16 additions & 4 deletions xarray/groupers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,20 @@
import numpy as np
import pandas as pd

from xarray.coding.cftime_offsets import _new_to_legacy_freq
from xarray.coding.cftime_offsets import BaseCFTimeOffset, _new_to_legacy_freq
from xarray.core import duck_array_ops
from xarray.core.coordinates import Coordinates
from xarray.core.dataarray import DataArray
from xarray.core.groupby import T_Group, _DummyGroup
from xarray.core.indexes import safe_cast_to_index
from xarray.core.resample_cftime import CFTimeGrouper
from xarray.core.types import Bins, DatetimeLike, GroupIndices, SideOptions
from xarray.core.types import (
Bins,
DatetimeLike,
GroupIndices,
ResampleCompatible,
SideOptions,
)
from xarray.core.variable import Variable

__all__ = [
Expand Down Expand Up @@ -336,7 +342,7 @@ class TimeResampler(Resampler):

Attributes
----------
freq : str
freq : str, datetime.timedelta, pandas.Timestamp, or pandas.DateOffset
Frequency to resample to. See `Pandas frequency
aliases <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_
for a list of possible values.
Expand All @@ -358,7 +364,7 @@ class TimeResampler(Resampler):
An offset timedelta added to the origin.
"""

freq: str
freq: ResampleCompatible
closed: SideOptions | None = field(default=None)
label: SideOptions | None = field(default=None)
origin: str | DatetimeLike = field(default="start_day")
Expand Down Expand Up @@ -388,6 +394,12 @@ def _init_properties(self, group: T_Group) -> None:
offset=offset,
)
else:
if isinstance(self.freq, BaseCFTimeOffset):
raise ValueError(
"'BaseCFTimeOffset' resample frequencies are only supported "
"when resampling a 'CFTimeIndex'"
)

self.index_grouper = pd.Grouper(
# TODO remove once requiring pandas >= 2.2
freq=_new_to_legacy_freq(self.freq),
Expand Down
49 changes: 48 additions & 1 deletion xarray/tests/test_groupby.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import datetime
import operator
import warnings
from unittest import mock
Expand Down Expand Up @@ -757,7 +758,6 @@ def test_groupby_none_group_name() -> None:


def test_groupby_getitem(dataset) -> None:

assert_identical(dataset.sel(x=["a"]), dataset.groupby("x")["a"])
assert_identical(dataset.sel(z=[1]), dataset.groupby("z")[1])
assert_identical(dataset.foo.sel(x=["a"]), dataset.foo.groupby("x")["a"])
Expand Down Expand Up @@ -1813,6 +1813,30 @@ def resample_as_pandas(array, *args, **kwargs):
with pytest.raises(ValueError):
reverse.resample(time="1D").mean()

@pytest.mark.parametrize("use_cftime", [True, False])
def test_resample_dtype(self, use_cftime: bool) -> None:
if use_cftime and not has_cftime:
pytest.skip()
array = DataArray(
np.arange(10),
[
(
"time",
xr.date_range(
"2000-01-01", freq="6h", periods=10, use_cftime=use_cftime
),
)
],
)
test_resample_freqs = (
"10min",
pd.Timedelta(hours=2),
pd.offsets.MonthBegin(),
datetime.timedelta(days=1, hours=6),
)
for freq in test_resample_freqs:
array.resample(time=freq)

@pytest.mark.parametrize("use_cftime", [True, False])
def test_resample_doctest(self, use_cftime: bool) -> None:
# run the doctest example here so we are not surprised
Expand Down Expand Up @@ -2232,6 +2256,29 @@ def test_resample_and_first(self) -> None:
result = actual.reduce(method)
assert_equal(expected, result)

@pytest.mark.parametrize("use_cftime", [True, False])
def test_resample_dtype(self, use_cftime: bool) -> None:
if use_cftime and not has_cftime:
pytest.skip()
times = xr.date_range(
"2000-01-01", freq="6h", periods=10, use_cftime=use_cftime
)
ds = Dataset(
{
"foo": (["time", "x", "y"], np.random.randn(10, 5, 3)),
"bar": ("time", np.random.randn(10), {"meta": "data"}),
"time": times,
}
)
test_resample_freqs = (
"10min",
pd.Timedelta(hours=2),
pd.offsets.MonthBegin(),
datetime.timedelta(days=1, hours=6),
)
for freq in test_resample_freqs:
ds.resample(time=freq)

def test_resample_min_count(self) -> None:
times = pd.date_range("2000-01-01", freq="6h", periods=10)
ds = Dataset(
Expand Down