diff --git a/xarray/core/_aggregations.py b/xarray/core/_aggregations.py index adc064840de..13e3716e869 100644 --- a/xarray/core/_aggregations.py +++ b/xarray/core/_aggregations.py @@ -4,7 +4,7 @@ from __future__ import annotations -from collections.abc import Callable, Sequence +from collections.abc import Callable, Mapping, Sequence from typing import TYPE_CHECKING, Any from xarray.core import duck_array_ops @@ -13,6 +13,7 @@ from xarray.core.utils import contains_only_chunked_or_numpy, module_available if TYPE_CHECKING: + from xarray.core.coordinates import DatasetCoordinates from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset @@ -104,13 +105,14 @@ def count( Data variables: foo int64 8B 5 """ - return self.reduce( + out = self.reduce( duck_array_ops.count, dim=dim, numeric_only=False, keep_attrs=keep_attrs, **kwargs, ) + return out def all( self, @@ -187,13 +189,14 @@ def all( Data variables: foo bool 1B False """ - return self.reduce( + out = self.reduce( duck_array_ops.array_all, dim=dim, numeric_only=False, keep_attrs=keep_attrs, **kwargs, ) + return out def any( self, @@ -270,13 +273,14 @@ def any( Data variables: foo bool 1B True """ - return self.reduce( + out = self.reduce( duck_array_ops.array_any, dim=dim, numeric_only=False, keep_attrs=keep_attrs, **kwargs, ) + return out def max( self, @@ -363,7 +367,7 @@ def max( Data variables: foo float64 8B nan """ - return self.reduce( + out = self.reduce( duck_array_ops.max, dim=dim, skipna=skipna, @@ -371,6 +375,7 @@ def max( keep_attrs=keep_attrs, **kwargs, ) + return out def min( self, @@ -457,7 +462,7 @@ def min( Data variables: foo float64 8B nan """ - return self.reduce( + out = self.reduce( duck_array_ops.min, dim=dim, skipna=skipna, @@ -465,6 +470,7 @@ def min( keep_attrs=keep_attrs, **kwargs, ) + return out def mean( self, @@ -555,7 +561,7 @@ def mean( Data variables: foo float64 8B nan """ - return self.reduce( + out = self.reduce( duck_array_ops.mean, dim=dim, skipna=skipna, @@ -563,6 +569,7 @@ def mean( keep_attrs=keep_attrs, **kwargs, ) + return out def prod( self, @@ -669,7 +676,7 @@ def prod( Data variables: foo float64 8B 0.0 """ - return self.reduce( + out = self.reduce( duck_array_ops.prod, dim=dim, skipna=skipna, @@ -678,6 +685,7 @@ def prod( keep_attrs=keep_attrs, **kwargs, ) + return out def sum( self, @@ -784,7 +792,7 @@ def sum( Data variables: foo float64 8B 8.0 """ - return self.reduce( + out = self.reduce( duck_array_ops.sum, dim=dim, skipna=skipna, @@ -793,6 +801,7 @@ def sum( keep_attrs=keep_attrs, **kwargs, ) + return out def std( self, @@ -896,7 +905,7 @@ def std( Data variables: foo float64 8B 1.14 """ - return self.reduce( + out = self.reduce( duck_array_ops.std, dim=dim, skipna=skipna, @@ -905,6 +914,7 @@ def std( keep_attrs=keep_attrs, **kwargs, ) + return out def var( self, @@ -1008,7 +1018,7 @@ def var( Data variables: foo float64 8B 1.3 """ - return self.reduce( + out = self.reduce( duck_array_ops.var, dim=dim, skipna=skipna, @@ -1017,6 +1027,7 @@ def var( keep_attrs=keep_attrs, **kwargs, ) + return out def median( self, @@ -1107,7 +1118,7 @@ def median( Data variables: foo float64 8B nan """ - return self.reduce( + out = self.reduce( duck_array_ops.median, dim=dim, skipna=skipna, @@ -1115,6 +1126,7 @@ def median( keep_attrs=keep_attrs, **kwargs, ) + return out def cumsum( self, @@ -1198,7 +1210,9 @@ def cumsum( Group: / Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) Group: / Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) Group: / Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) Group: / Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) Self: raise NotImplementedError() + @property + def coords(self) -> DatasetCoordinates: + raise NotImplementedError() + + def assign_coords( + self, + coords: Mapping | None = None, + **coords_kwargs: Any, + ) -> Self: + raise NotImplementedError() + def count( self, dim: Dims = None, @@ -1406,13 +1439,14 @@ def count( Data variables: da int64 8B 5 """ - return self.reduce( + out = self.reduce( duck_array_ops.count, dim=dim, numeric_only=False, keep_attrs=keep_attrs, **kwargs, ) + return out def all( self, @@ -1478,13 +1512,14 @@ def all( Data variables: da bool 1B False """ - return self.reduce( + out = self.reduce( duck_array_ops.array_all, dim=dim, numeric_only=False, keep_attrs=keep_attrs, **kwargs, ) + return out def any( self, @@ -1550,13 +1585,14 @@ def any( Data variables: da bool 1B True """ - return self.reduce( + out = self.reduce( duck_array_ops.array_any, dim=dim, numeric_only=False, keep_attrs=keep_attrs, **kwargs, ) + return out def max( self, @@ -1636,7 +1672,7 @@ def max( Data variables: da float64 8B nan """ - return self.reduce( + out = self.reduce( duck_array_ops.max, dim=dim, skipna=skipna, @@ -1644,6 +1680,7 @@ def max( keep_attrs=keep_attrs, **kwargs, ) + return out def min( self, @@ -1723,7 +1760,7 @@ def min( Data variables: da float64 8B nan """ - return self.reduce( + out = self.reduce( duck_array_ops.min, dim=dim, skipna=skipna, @@ -1731,6 +1768,7 @@ def min( keep_attrs=keep_attrs, **kwargs, ) + return out def mean( self, @@ -1810,7 +1848,7 @@ def mean( Data variables: da float64 8B nan """ - return self.reduce( + out = self.reduce( duck_array_ops.mean, dim=dim, skipna=skipna, @@ -1818,6 +1856,7 @@ def mean( keep_attrs=keep_attrs, **kwargs, ) + return out def prod( self, @@ -1916,7 +1955,7 @@ def prod( Data variables: da float64 8B 0.0 """ - return self.reduce( + out = self.reduce( duck_array_ops.prod, dim=dim, skipna=skipna, @@ -1925,6 +1964,7 @@ def prod( keep_attrs=keep_attrs, **kwargs, ) + return out def sum( self, @@ -2023,7 +2063,7 @@ def sum( Data variables: da float64 8B 8.0 """ - return self.reduce( + out = self.reduce( duck_array_ops.sum, dim=dim, skipna=skipna, @@ -2032,6 +2072,7 @@ def sum( keep_attrs=keep_attrs, **kwargs, ) + return out def std( self, @@ -2127,7 +2168,7 @@ def std( Data variables: da float64 8B 1.14 """ - return self.reduce( + out = self.reduce( duck_array_ops.std, dim=dim, skipna=skipna, @@ -2136,6 +2177,7 @@ def std( keep_attrs=keep_attrs, **kwargs, ) + return out def var( self, @@ -2231,7 +2273,7 @@ def var( Data variables: da float64 8B 1.3 """ - return self.reduce( + out = self.reduce( duck_array_ops.var, dim=dim, skipna=skipna, @@ -2240,6 +2282,7 @@ def var( keep_attrs=keep_attrs, **kwargs, ) + return out def median( self, @@ -2323,7 +2366,7 @@ def median( Data variables: da float64 8B nan """ - return self.reduce( + out = self.reduce( duck_array_ops.median, dim=dim, skipna=skipna, @@ -2331,6 +2374,7 @@ def median( keep_attrs=keep_attrs, **kwargs, ) + return out def cumsum( self, @@ -2406,22 +2450,26 @@ def cumsum( da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan >>> ds.cumsum() - Size: 48B + Size: 120B Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.cumsum(skipna=False) - Size: 48B + Size: 120B Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.cumprod() - Size: 48B + Size: 120B Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.cumprod(skipna=False) - Size: 48B + Size: 120B Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) Size: 8B array(5) """ - return self.reduce( + out = self.reduce( duck_array_ops.count, dim=dim, keep_attrs=keep_attrs, **kwargs, ) + return out def all( self, @@ -2669,12 +2724,13 @@ def all( Size: 1B array(False) """ - return self.reduce( + out = self.reduce( duck_array_ops.array_all, dim=dim, keep_attrs=keep_attrs, **kwargs, ) + return out def any( self, @@ -2735,12 +2791,13 @@ def any( Size: 1B array(True) """ - return self.reduce( + out = self.reduce( duck_array_ops.array_any, dim=dim, keep_attrs=keep_attrs, **kwargs, ) + return out def max( self, @@ -2813,13 +2870,14 @@ def max( Size: 8B array(nan) """ - return self.reduce( + out = self.reduce( duck_array_ops.max, dim=dim, skipna=skipna, keep_attrs=keep_attrs, **kwargs, ) + return out def min( self, @@ -2892,13 +2950,14 @@ def min( Size: 8B array(nan) """ - return self.reduce( + out = self.reduce( duck_array_ops.min, dim=dim, skipna=skipna, keep_attrs=keep_attrs, **kwargs, ) + return out def mean( self, @@ -2971,13 +3030,14 @@ def mean( Size: 8B array(nan) """ - return self.reduce( + out = self.reduce( duck_array_ops.mean, dim=dim, skipna=skipna, keep_attrs=keep_attrs, **kwargs, ) + return out def prod( self, @@ -3067,7 +3127,7 @@ def prod( Size: 8B array(0.) """ - return self.reduce( + out = self.reduce( duck_array_ops.prod, dim=dim, skipna=skipna, @@ -3075,6 +3135,7 @@ def prod( keep_attrs=keep_attrs, **kwargs, ) + return out def sum( self, @@ -3164,7 +3225,7 @@ def sum( Size: 8B array(8.) """ - return self.reduce( + out = self.reduce( duck_array_ops.sum, dim=dim, skipna=skipna, @@ -3172,6 +3233,7 @@ def sum( keep_attrs=keep_attrs, **kwargs, ) + return out def std( self, @@ -3258,7 +3320,7 @@ def std( Size: 8B array(1.14017543) """ - return self.reduce( + out = self.reduce( duck_array_ops.std, dim=dim, skipna=skipna, @@ -3266,6 +3328,7 @@ def std( keep_attrs=keep_attrs, **kwargs, ) + return out def var( self, @@ -3352,7 +3415,7 @@ def var( Size: 8B array(1.3) """ - return self.reduce( + out = self.reduce( duck_array_ops.var, dim=dim, skipna=skipna, @@ -3360,6 +3423,7 @@ def var( keep_attrs=keep_attrs, **kwargs, ) + return out def median( self, @@ -3436,13 +3500,14 @@ def median( Size: 8B array(nan) """ - return self.reduce( + out = self.reduce( duck_array_ops.median, dim=dim, skipna=skipna, keep_attrs=keep_attrs, **kwargs, ) + return out def cumsum( self, @@ -3530,13 +3595,14 @@ def cumsum( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) Dataset: raise NotImplementedError() + def _flox_scan( + self, + dim: Dims, + *, + func: str, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Dataset: + raise NotImplementedError() + def count( self, dim: Dims = None, @@ -3743,13 +3821,14 @@ def count( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.count, dim=dim, numeric_only=False, keep_attrs=keep_attrs, **kwargs, ) + return out def all( self, @@ -3839,13 +3918,14 @@ def all( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.array_all, dim=dim, numeric_only=False, keep_attrs=keep_attrs, **kwargs, ) + return out def any( self, @@ -3935,13 +4015,14 @@ def any( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.array_any, dim=dim, numeric_only=False, keep_attrs=keep_attrs, **kwargs, ) + return out def max( self, @@ -4048,7 +4129,7 @@ def max( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.max, dim=dim, skipna=skipna, @@ -4056,6 +4137,7 @@ def max( keep_attrs=keep_attrs, **kwargs, ) + return out def min( self, @@ -4162,7 +4244,7 @@ def min( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.min, dim=dim, skipna=skipna, @@ -4170,6 +4252,7 @@ def min( keep_attrs=keep_attrs, **kwargs, ) + return out def mean( self, @@ -4276,7 +4359,7 @@ def mean( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.mean, dim=dim, skipna=skipna, @@ -4284,6 +4367,7 @@ def mean( keep_attrs=keep_attrs, **kwargs, ) + return out def prod( self, @@ -4410,7 +4494,7 @@ def prod( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.prod, dim=dim, skipna=skipna, @@ -4419,6 +4503,7 @@ def prod( keep_attrs=keep_attrs, **kwargs, ) + return out def sum( self, @@ -4545,7 +4630,7 @@ def sum( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.sum, dim=dim, skipna=skipna, @@ -4554,6 +4639,7 @@ def sum( keep_attrs=keep_attrs, **kwargs, ) + return out def std( self, @@ -4677,7 +4763,7 @@ def std( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.std, dim=dim, skipna=skipna, @@ -4686,6 +4772,7 @@ def std( keep_attrs=keep_attrs, **kwargs, ) + return out def var( self, @@ -4809,7 +4896,7 @@ def var( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.var, dim=dim, skipna=skipna, @@ -4818,6 +4905,7 @@ def var( keep_attrs=keep_attrs, **kwargs, ) + return out def median( self, @@ -4911,7 +4999,7 @@ def median( Data variables: da (labels) float64 24B nan 2.0 1.5 """ - return self.reduce( + out = self.reduce( duck_array_ops.median, dim=dim, skipna=skipna, @@ -4919,6 +5007,7 @@ def median( keep_attrs=keep_attrs, **kwargs, ) + return out def cumsum( self, @@ -5000,29 +5089,50 @@ def cumsum( da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan >>> ds.groupby("labels").cumsum() - Size: 48B + Size: 120B Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").cumsum(skipna=False) - Size: 48B + Size: 120B Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").cumprod() - Size: 48B + Size: 120B Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").cumprod(skipna=False) - Size: 48B + Size: 120B Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) Dataset: raise NotImplementedError() + def _flox_scan( + self, + dim: Dims, + *, + func: str, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> Dataset: + raise NotImplementedError() + def count( self, dim: Dims = None, @@ -5239,13 +5365,14 @@ def count( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.count, dim=dim, numeric_only=False, keep_attrs=keep_attrs, **kwargs, ) + return out def all( self, @@ -5335,13 +5462,14 @@ def all( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.array_all, dim=dim, numeric_only=False, keep_attrs=keep_attrs, **kwargs, ) + return out def any( self, @@ -5431,13 +5559,14 @@ def any( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.array_any, dim=dim, numeric_only=False, keep_attrs=keep_attrs, **kwargs, ) + return out def max( self, @@ -5544,7 +5673,7 @@ def max( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.max, dim=dim, skipna=skipna, @@ -5552,6 +5681,7 @@ def max( keep_attrs=keep_attrs, **kwargs, ) + return out def min( self, @@ -5658,7 +5788,7 @@ def min( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.min, dim=dim, skipna=skipna, @@ -5666,6 +5796,7 @@ def min( keep_attrs=keep_attrs, **kwargs, ) + return out def mean( self, @@ -5772,7 +5903,7 @@ def mean( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.mean, dim=dim, skipna=skipna, @@ -5780,6 +5911,7 @@ def mean( keep_attrs=keep_attrs, **kwargs, ) + return out def prod( self, @@ -5906,7 +6038,7 @@ def prod( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.prod, dim=dim, skipna=skipna, @@ -5915,6 +6047,7 @@ def prod( keep_attrs=keep_attrs, **kwargs, ) + return out def sum( self, @@ -6041,7 +6174,7 @@ def sum( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.sum, dim=dim, skipna=skipna, @@ -6050,6 +6183,7 @@ def sum( keep_attrs=keep_attrs, **kwargs, ) + return out def std( self, @@ -6173,7 +6307,7 @@ def std( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.std, dim=dim, skipna=skipna, @@ -6182,6 +6316,7 @@ def std( keep_attrs=keep_attrs, **kwargs, ) + return out def var( self, @@ -6305,7 +6440,7 @@ def var( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.var, dim=dim, skipna=skipna, @@ -6314,6 +6449,7 @@ def var( keep_attrs=keep_attrs, **kwargs, ) + return out def median( self, @@ -6407,7 +6543,7 @@ def median( Data variables: da (time) float64 24B 1.0 2.0 nan """ - return self.reduce( + out = self.reduce( duck_array_ops.median, dim=dim, skipna=skipna, @@ -6415,6 +6551,7 @@ def median( keep_attrs=keep_attrs, **kwargs, ) + return out def cumsum( self, @@ -6496,29 +6633,50 @@ def cumsum( da (time) float64 48B 1.0 2.0 3.0 0.0 2.0 nan >>> ds.resample(time="3ME").cumsum() - Size: 48B + Size: 120B Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").cumsum(skipna=False) - Size: 48B + Size: 120B Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").cumprod() - Size: 48B + Size: 120B Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3ME").cumprod(skipna=False) - Size: 48B + Size: 120B Dimensions: (time: 6) - Dimensions without coordinates: time + Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) DataArray: raise NotImplementedError() + def _flox_scan( + self, + dim: Dims, + *, + func: str, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> DataArray: + raise NotImplementedError() + def count( self, dim: Dims = None, @@ -6729,12 +6903,13 @@ def count( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.count, dim=dim, keep_attrs=keep_attrs, **kwargs, ) + return out def all( self, @@ -6818,12 +6993,13 @@ def all( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.array_all, dim=dim, keep_attrs=keep_attrs, **kwargs, ) + return out def any( self, @@ -6907,12 +7083,13 @@ def any( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.array_any, dim=dim, keep_attrs=keep_attrs, **kwargs, ) + return out def max( self, @@ -7011,13 +7188,14 @@ def max( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.max, dim=dim, skipna=skipna, keep_attrs=keep_attrs, **kwargs, ) + return out def min( self, @@ -7116,13 +7294,14 @@ def min( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.min, dim=dim, skipna=skipna, keep_attrs=keep_attrs, **kwargs, ) + return out def mean( self, @@ -7221,13 +7400,14 @@ def mean( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.mean, dim=dim, skipna=skipna, keep_attrs=keep_attrs, **kwargs, ) + return out def prod( self, @@ -7344,7 +7524,7 @@ def prod( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.prod, dim=dim, skipna=skipna, @@ -7352,6 +7532,7 @@ def prod( keep_attrs=keep_attrs, **kwargs, ) + return out def sum( self, @@ -7468,7 +7649,7 @@ def sum( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.sum, dim=dim, skipna=skipna, @@ -7476,6 +7657,7 @@ def sum( keep_attrs=keep_attrs, **kwargs, ) + return out def std( self, @@ -7589,7 +7771,7 @@ def std( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.std, dim=dim, skipna=skipna, @@ -7597,6 +7779,7 @@ def std( keep_attrs=keep_attrs, **kwargs, ) + return out def var( self, @@ -7710,7 +7893,7 @@ def var( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.var, dim=dim, skipna=skipna, @@ -7718,6 +7901,7 @@ def var( keep_attrs=keep_attrs, **kwargs, ) + return out def median( self, @@ -7804,13 +7988,14 @@ def median( Coordinates: * labels (labels) object 24B 'a' 'b' 'c' """ - return self.reduce( + out = self.reduce( duck_array_ops.median, dim=dim, skipna=skipna, keep_attrs=keep_attrs, **kwargs, ) + return out def cumsum( self, @@ -7904,13 +8089,29 @@ def cumsum( * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) DataArray: raise NotImplementedError() + def _flox_scan( + self, + dim: Dims, + *, + func: str, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> DataArray: + raise NotImplementedError() + def count( self, dim: Dims = None, @@ -8117,12 +8330,13 @@ def count( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.count, dim=dim, keep_attrs=keep_attrs, **kwargs, ) + return out def all( self, @@ -8206,12 +8420,13 @@ def all( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.array_all, dim=dim, keep_attrs=keep_attrs, **kwargs, ) + return out def any( self, @@ -8295,12 +8510,13 @@ def any( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.array_any, dim=dim, keep_attrs=keep_attrs, **kwargs, ) + return out def max( self, @@ -8399,13 +8615,14 @@ def max( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.max, dim=dim, skipna=skipna, keep_attrs=keep_attrs, **kwargs, ) + return out def min( self, @@ -8504,13 +8721,14 @@ def min( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.min, dim=dim, skipna=skipna, keep_attrs=keep_attrs, **kwargs, ) + return out def mean( self, @@ -8609,13 +8827,14 @@ def mean( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.mean, dim=dim, skipna=skipna, keep_attrs=keep_attrs, **kwargs, ) + return out def prod( self, @@ -8732,7 +8951,7 @@ def prod( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.prod, dim=dim, skipna=skipna, @@ -8740,6 +8959,7 @@ def prod( keep_attrs=keep_attrs, **kwargs, ) + return out def sum( self, @@ -8856,7 +9076,7 @@ def sum( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.sum, dim=dim, skipna=skipna, @@ -8864,6 +9084,7 @@ def sum( keep_attrs=keep_attrs, **kwargs, ) + return out def std( self, @@ -8977,7 +9198,7 @@ def std( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.std, dim=dim, skipna=skipna, @@ -8985,6 +9206,7 @@ def std( keep_attrs=keep_attrs, **kwargs, ) + return out def var( self, @@ -9098,7 +9320,7 @@ def var( **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.var, dim=dim, skipna=skipna, @@ -9106,6 +9328,7 @@ def var( keep_attrs=keep_attrs, **kwargs, ) + return out def median( self, @@ -9192,13 +9415,14 @@ def median( Coordinates: * time (time) datetime64[ns] 24B 2001-01-31 2001-04-30 2001-07-31 """ - return self.reduce( + out = self.reduce( duck_array_ops.median, dim=dim, skipna=skipna, keep_attrs=keep_attrs, **kwargs, ) + return out def cumsum( self, @@ -9280,8 +9504,8 @@ def cumsum( Size: 48B array([1., 2., 5., 5., 2., 2.]) Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) Size: 48B array([ 1., 2., 5., 5., 2., nan]) Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) Size: 48B array([1., 2., 6., 0., 2., 2.]) Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) Size: 48B array([ 1., 2., 6., 0., 2., nan]) Coordinates: + * time (time) datetime64[ns] 48B 2001-01-31 2001-02-28 ... 2001-06-30 labels (time) tuple[Hashable, ...]: + parsed_dim: tuple[Hashable, ...] + if isinstance(dim, str): + parsed_dim = (dim,) + elif dim is None: + parsed_dim_list = list() + # preserve order + for dim_ in itertools.chain( + *(grouper.codes.dims for grouper in self.groupers) + ): + if dim_ not in parsed_dim_list: + parsed_dim_list.append(dim_) + parsed_dim = tuple(parsed_dim_list) + elif dim is ...: + parsed_dim = tuple(self._original_obj.dims) + else: + parsed_dim = tuple(dim) + + # Do this so we raise the same error message whether flox is present or not. + # Better to control it here than in flox. + for grouper in self.groupers: + if any( + d not in grouper.codes.dims and d not in self._original_obj.dims + for d in parsed_dim + ): + # TODO: Not a helpful error, it's a sanity check that dim actually exist + # either in self.groupers or self._original_obj + raise ValueError(f"cannot reduce over dimensions {dim}.") + + return parsed_dim + def _flox_reduce( self, dim: Dims, @@ -1088,30 +1120,7 @@ def _flox_reduce( # set explicitly to avoid unnecessarily accumulating count kwargs["min_count"] = 0 - parsed_dim: tuple[Hashable, ...] - if isinstance(dim, str): - parsed_dim = (dim,) - elif dim is None: - parsed_dim_list = list() - # preserve order - for dim_ in itertools.chain( - *(grouper.codes.dims for grouper in self.groupers) - ): - if dim_ not in parsed_dim_list: - parsed_dim_list.append(dim_) - parsed_dim = tuple(parsed_dim_list) - elif dim is ...: - parsed_dim = tuple(obj.dims) - else: - parsed_dim = tuple(dim) - - # Do this so we raise the same error message whether flox is present or not. - # Better to control it here than in flox. - for grouper in self.groupers: - if any( - d not in grouper.codes.dims and d not in obj.dims for d in parsed_dim - ): - raise ValueError(f"cannot reduce over dimensions {dim}.") + parsed_dim = self._parse_dim(dim) has_missing_groups = ( self.encoded.unique_coord.size != self.encoded.full_index.size @@ -1202,6 +1211,50 @@ def _flox_reduce( return result + def _flox_scan( + self, + dim: Dims, + *, + func: str, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, + ) -> T_Xarray: + from flox import groupby_scan + + parsed_dim = self._parse_dim(dim) + obj = self._original_obj.transpose(..., *parsed_dim) + axis = range(-len(parsed_dim), 0) + codes = tuple(g.codes for g in self.groupers) + + def wrapper(array, *by, func: str, skipna: bool | None, **kwargs): + if skipna or (skipna is None and array.dtype.kind in "cfO"): + if "nan" not in func: + func = f"nan{func}" + + return groupby_scan(array, *codes, func=func, **kwargs) + + actual = apply_ufunc( + wrapper, + obj, + *codes, + dask="allowed", + keep_attrs=( + _get_keep_attrs(default=True) if keep_attrs is None else keep_attrs + ), + kwargs=dict( + func=func, + skipna=skipna, + expected_groups=None, # TODO: Should be same as _flox_reduce? + axis=axis, + dtype=kwargs.get("dtype"), + method=kwargs.get("method"), + engine=kwargs.get("engine"), + ), + ) + + return actual + def fillna(self, value: Any) -> T_Xarray: """Fill missing values in this object by group. diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 6dce32aeb5c..3d5aeafa9af 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -6073,16 +6073,16 @@ def test_reduce_cumsum(self) -> None: assert_identical(expected, data.cumsum()) @pytest.mark.parametrize( - "reduct, expected", + "reduct", [ - ("dim1", ["dim2", "dim3", "time", "dim1"]), - ("dim2", ["dim3", "time", "dim1", "dim2"]), - ("dim3", ["dim2", "time", "dim1", "dim3"]), - ("time", ["dim2", "dim3", "dim1"]), + ("dim1",), + ("dim2",), + ("dim3",), + ("time",), ], ) @pytest.mark.parametrize("func", ["cumsum", "cumprod"]) - def test_reduce_cumsum_test_dims(self, reduct, expected, func) -> None: + def test_reduce_cumsum_test_dims(self, reduct, func) -> None: data = create_test_data() with pytest.raises( ValueError, @@ -6090,9 +6090,10 @@ def test_reduce_cumsum_test_dims(self, reduct, expected, func) -> None: ): getattr(data, func)(dim="bad_dim") - # ensure dimensions are correct + # ensure dimensions are retained: actual = getattr(data, func)(dim=reduct).dims - assert list(actual) == expected + expected = data.dims + assert set(actual) == set(expected) def test_reduce_non_numeric(self) -> None: data1 = create_test_data(seed=44, use_extension_array=True) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 47ea2fcd2b0..9253c733f66 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -2551,54 +2551,86 @@ def func(arg1, arg2, arg3=0.0): assert_identical(expected, actual) -def test_groupby_cumsum() -> None: - ds = xr.Dataset( - {"foo": (("x",), [7, 3, 1, 1, 1, 1, 1])}, - coords={"x": [0, 1, 2, 3, 4, 5, 6], "group_id": ("x", [0, 0, 1, 1, 2, 2, 2])}, - ) - actual = ds.groupby("group_id").cumsum(dim="x") - expected = xr.Dataset( - { - "foo": (("x",), [7, 10, 1, 2, 1, 2, 3]), - }, - coords={ - "x": [0, 1, 2, 3, 4, 5, 6], - "group_id": ds.group_id, - }, - ) - # TODO: Remove drop_vars when GH6528 is fixed - # when Dataset.cumsum propagates indexes, and the group variable? - assert_identical(expected.drop_vars(["x", "group_id"]), actual) - - actual = ds.foo.groupby("group_id").cumsum(dim="x") - expected.coords["group_id"] = ds.group_id - expected.coords["x"] = np.arange(7) - assert_identical(expected.foo, actual) - +@pytest.mark.parametrize( + "method, expected_array, use_flox, use_dask", + [ + ("cumsum", [7.0, 9.0, 0.0, 1.0, 2.0, 2.0], True, True), + ("cumsum", [7.0, 9.0, 0.0, 1.0, 2.0, 2.0], True, False), + ("cumsum", [7.0, 9.0, 0.0, 1.0, 2.0, 2.0], False, True), + ("cumsum", [7.0, 9.0, 0.0, 1.0, 2.0, 2.0], False, False), + pytest.param( + "cumprod", + [7.0, 14.0, 0.0, 0.0, 2.0, 2.0], + True, + True, + marks=pytest.mark.skip( + reason="TODO: Groupby with cumprod is currently not supported with flox" + ), + ), + pytest.param( + "cumprod", + [7.0, 14.0, 0.0, 0.0, 2.0, 2.0], + True, + False, + marks=pytest.mark.skip( + reason="TODO: Groupby with cumprod is currently not supported with flox" + ), + ), + ("cumprod", [7.0, 14.0, 0.0, 0.0, 2.0, 2.0], False, True), + ("cumprod", [7.0, 14.0, 0.0, 0.0, 2.0, 2.0], False, False), + ], +) +def test_groupby_scans( + method: Literal["cumsum", "cumprod"], + expected_array: list[float], + use_flox: bool, + use_dask: bool, + use_lazy_group_idx: bool = False, +) -> None: + if use_dask and not has_dask: + pytest.skip("requires dask") -def test_groupby_cumprod() -> None: + # Test Dataset groupby: ds = xr.Dataset( - {"foo": (("x",), [7, 3, 0, 1, 1, 2, 1])}, - coords={"x": [0, 1, 2, 3, 4, 5, 6], "group_id": ("x", [0, 0, 1, 1, 2, 2, 2])}, + {"foo": (("x",), [7, 2, 0, 1, 2, np.nan])}, + coords={"x": [0, 1, 2, 3, 4, 5], "group_idx": ("x", [0, 0, 1, 1, 2, 2])}, ) - actual = ds.groupby("group_id").cumprod(dim="x") + with xr.set_options(use_flox=use_flox): + if use_dask: + ds = ds.chunk() + if use_lazy_group_idx: + grouper = xr.groupers.UniqueGrouper(labels=[0, 1, 2]) + actual = getattr(ds.groupby(group_idx=grouper), method)(dim="x") + else: + grouper = ds.group_idx.compute() + actual = getattr(ds.groupby(grouper), method)(dim="x") + else: + actual = getattr(ds.groupby("group_idx"), method)(dim="x") + expected = xr.Dataset( { - "foo": (("x",), [7, 21, 0, 0, 1, 2, 2]), + "foo": (("x",), expected_array), }, coords={ - "x": [0, 1, 2, 3, 4, 5, 6], - "group_id": ds.group_id, + "x": ds.x, + "group_idx": ds.group_idx, }, ) - # TODO: Remove drop_vars when GH6528 is fixed - # when Dataset.cumsum propagates indexes, and the group variable? - assert_identical(expected.drop_vars(["x", "group_id"]), actual) + assert_identical(expected, actual.compute()) - actual = ds.foo.groupby("group_id").cumprod(dim="x") - expected.coords["group_id"] = ds.group_id - expected.coords["x"] = np.arange(7) - assert_identical(expected.foo, actual) + # Test DataArray groupby: + with xr.set_options(use_flox=use_flox): + if use_dask: + ds = ds.chunk() + if use_lazy_group_idx: + grouper = xr.groupers.UniqueGrouper(labels=[0, 1, 2]) + actual = getattr(ds.foo.groupby(group_idx=grouper), method)(dim="x") + else: + grouper = ds.group_idx.compute() + actual = getattr(ds.foo.groupby(grouper), method)(dim="x") + else: + actual = getattr(ds.foo.groupby("group_idx"), method)(dim="x") + assert_identical(expected.foo.compute(), actual.compute()) @pytest.mark.parametrize( @@ -2608,7 +2640,7 @@ def test_groupby_cumprod() -> None: ("cumprod", [1.0, 2.0, 6.0, 6.0, 2.0, 2.0]), ], ) -def test_resample_cumsum(method: str, expected_array: list[float]) -> None: +def test_resample_scans(method: str, expected_array: list[float]) -> None: ds = xr.Dataset( {"foo": ("time", [1, 2, 3, 1, 2, np.nan])}, coords={ @@ -2622,13 +2654,11 @@ def test_resample_cumsum(method: str, expected_array: list[float]) -> None: "time": xr.date_range("01-01-2001", freq="ME", periods=6, use_cftime=False), }, ) - # TODO: Remove drop_vars when GH6528 is fixed - # when Dataset.cumsum propagates indexes, and the group variable? - assert_identical(expected.drop_vars(["time"]), actual) + assert_identical(expected, actual) actual = getattr(ds.foo.resample(time="3ME"), method)(dim="time") expected.coords["time"] = ds.time - assert_identical(expected.drop_vars(["time"]).foo, actual) + assert_identical(expected.foo, actual) def test_groupby_binary_op_regression() -> None: diff --git a/xarray/util/generate_aggregations.py b/xarray/util/generate_aggregations.py index e386b96f63d..545850fcc46 100644 --- a/xarray/util/generate_aggregations.py +++ b/xarray/util/generate_aggregations.py @@ -4,8 +4,10 @@ Usage: python xarray/util/generate_aggregations.py - pytest --doctest-modules xarray/{core,namedarray}/_aggregations.py --accept || true - pytest --doctest-modules xarray/{core,namedarray}/_aggregations.py + pytest --doctest-modules xarray/core/_aggregations.py --accept + pytest --doctest-modules xarray/core/_aggregations.py + pytest --doctest-modules xarray/namedarray/_aggregations.py --accept + pytest --doctest-modules xarray/namedarray/_aggregations.py This requires [pytest-accept](https://github.com/max-sixty/pytest-accept). The second run of pytest is deliberate, since the first will return an error @@ -15,7 +17,7 @@ import textwrap from dataclasses import dataclass, field -from typing import NamedTuple +from typing import Literal, NamedTuple MODULE_PREAMBLE = '''\ """Mixin classes with reduction operations.""" @@ -24,7 +26,7 @@ from __future__ import annotations -from collections.abc import Callable, Sequence +from collections.abc import Callable, Mapping, Sequence from typing import TYPE_CHECKING, Any from xarray.core import duck_array_ops @@ -33,6 +35,7 @@ from xarray.core.utils import contains_only_chunked_or_numpy, module_available if TYPE_CHECKING: + from xarray.core.coordinates import DatasetCoordinates from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset @@ -69,6 +72,34 @@ def reduce( ) -> Self: raise NotImplementedError()""" +DATASET_PREAMBLE = """ + +class {obj}{cls}Aggregations: + __slots__ = () + + def reduce( + self, + func: Callable[..., Any], + dim: Dims = None, + *, + axis: int | Sequence[int] | None = None, + keep_attrs: bool | None = None, + keepdims: bool = False, + **kwargs: Any, + ) -> Self: + raise NotImplementedError() + + @property + def coords(self) -> DatasetCoordinates: + raise NotImplementedError() + + def assign_coords( + self, + coords: Mapping | None = None, + **coords_kwargs: Any, + ) -> Self: + raise NotImplementedError()""" + NAMED_ARRAY_AGGREGATIONS_PREAMBLE = """ class {obj}{cls}Aggregations: @@ -107,6 +138,17 @@ def _flox_reduce( self, dim: Dims, **kwargs: Any, + ) -> {obj}: + raise NotImplementedError() + + def _flox_scan( + self, + dim: Dims, + *, + func: str, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, ) -> {obj}: raise NotImplementedError()""" @@ -131,6 +173,17 @@ def _flox_reduce( self, dim: Dims, **kwargs: Any, + ) -> {obj}: + raise NotImplementedError() + + def _flox_scan( + self, + dim: Dims, + *, + func: str, + skipna: bool | None = None, + keep_attrs: bool | None = None, + **kwargs: Any, ) -> {obj}: raise NotImplementedError()""" @@ -284,6 +337,7 @@ def __init__( see_also_methods=(), min_flox_version=None, additional_notes="", + aggregation_type: Literal["reduce", "scan"] = "reduce", ): self.name = name self.extra_kwargs = extra_kwargs @@ -292,6 +346,7 @@ def __init__( self.see_also_methods = see_also_methods self.min_flox_version = min_flox_version self.additional_notes = additional_notes + self.aggregation_type = aggregation_type if bool_reduce: self.array_method = f"array_{name}" self.np_example_array = ( @@ -444,7 +499,7 @@ def generate_code(self, method, has_keep_attrs): # median isn't enabled yet, because it would break if a single group was present in multiple # chunks. The non-flox code path will just rechunk every group to a single chunk and execute the median - method_is_not_flox_supported = method.name in ("median", "cumsum", "cumprod") + method_is_not_flox_supported = method.name in ("median", "cumprod") if method_is_not_flox_supported: indent = 12 else: @@ -455,14 +510,21 @@ def generate_code(self, method, has_keep_attrs): else: extra_kwargs = "" + if method.aggregation_type == "scan": + # Scans retain dimensions. + out_finalized = "out.assign_coords(self._obj.coords)" + else: + out_finalized = "out" + if method_is_not_flox_supported: return f"""\ - return self.reduce( + out = self.reduce( duck_array_ops.{method.array_method}, dim=dim,{extra_kwargs} keep_attrs=keep_attrs, **kwargs, - )""" + ) + return {out_finalized}""" min_version_check = f""" and module_available("flox", minversion="{method.min_flox_version}")""" @@ -476,7 +538,7 @@ def generate_code(self, method, has_keep_attrs): + f""" and contains_only_chunked_or_numpy(self._obj) ): - return self._flox_reduce( + return self._flox_{method.aggregation_type}( func="{method.name}", dim=dim,{extra_kwargs} # fill_value=fill_value, @@ -484,12 +546,13 @@ def generate_code(self, method, has_keep_attrs): **kwargs, ) else: - return self.reduce( + out = self.reduce( duck_array_ops.{method.array_method}, dim=dim,{extra_kwargs} keep_attrs=keep_attrs, **kwargs, - )""" + ) + return {out_finalized}""" ) @@ -507,12 +570,20 @@ def generate_code(self, method, has_keep_attrs): keep_attrs = ( "\n" + 12 * " " + "keep_attrs=keep_attrs," if has_keep_attrs else "" ) + + if method.aggregation_type == "scan" and self.datastructure.name == "Dataset": + # Scans retain dimensions, datasets drops them somehow: + out_finalized = "out.assign_coords(self.coords)" + else: + out_finalized = "out" + return f"""\ - return self.reduce( + out = self.reduce( duck_array_ops.{method.array_method}, dim=dim,{extra_kwargs}{keep_attrs} **kwargs, - )""" + ) + return {out_finalized}""" AGGREGATION_METHODS = ( @@ -530,13 +601,15 @@ def generate_code(self, method, has_keep_attrs): Method( "median", extra_kwargs=(skipna,), numeric_only=True, min_flox_version="0.9.2" ), - # Cumulatives: + # Scans: Method( "cumsum", extra_kwargs=(skipna,), numeric_only=True, see_also_methods=("cumulative",), additional_notes=_CUM_NOTES, + min_flox_version="0.10.5", + aggregation_type="scan", ), Method( "cumprod", @@ -544,6 +617,7 @@ def generate_code(self, method, has_keep_attrs): numeric_only=True, see_also_methods=("cumulative",), additional_notes=_CUM_NOTES, + aggregation_type="scan", ), ) @@ -611,7 +685,7 @@ def generate_code(self, method, has_keep_attrs): docref="agg", docref_description="reduction or aggregation operations", example_call_preamble="", - definition_preamble=AGGREGATIONS_PREAMBLE, + definition_preamble=DATASET_PREAMBLE, ) DATAARRAY_GENERATOR = GenericAggregationGenerator( cls="", @@ -700,8 +774,11 @@ def write_methods(filepath, generators, preamble): from pathlib import Path p = Path(os.getcwd()) + + filepath = p.parent / "xarray" / "xarray" / "core" / "_aggregations.py" + # filepath = p.parent / "core" / "_aggregations.py" # Run from script location write_methods( - filepath=p.parent / "xarray" / "xarray" / "core" / "_aggregations.py", + filepath=filepath, generators=[ DATATREE_GENERATOR, DATASET_GENERATOR, @@ -713,9 +790,12 @@ def write_methods(filepath, generators, preamble): ], preamble=MODULE_PREAMBLE, ) + + # NamedArray: + filepath = p.parent / "xarray" / "xarray" / "namedarray" / "_aggregations.py" + # filepath = p.parent / "namedarray" / "_aggregations.py" # Run from script location write_methods( - filepath=p.parent / "xarray" / "xarray" / "namedarray" / "_aggregations.py", + filepath=filepath, generators=[NAMED_ARRAY_GENERATOR], preamble=NAMED_ARRAY_MODULE_PREAMBLE, ) - # filepath = p.parent / "core" / "_aggregations.py" # Run from script location