From c486df7720edf547e336112a376a7d444f2f8778 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 12 Aug 2021 16:09:54 -0600 Subject: [PATCH 001/138] Move `_reduce_method` classmethod to `groupby.py` module --- xarray/core/arithmetic.py | 4 -- xarray/core/groupby.py | 87 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 4 deletions(-) diff --git a/xarray/core/arithmetic.py b/xarray/core/arithmetic.py index 27ec5ab8dd9..814e9a59877 100644 --- a/xarray/core/arithmetic.py +++ b/xarray/core/arithmetic.py @@ -128,8 +128,6 @@ class DataArrayArithmetic( class DataArrayGroupbyArithmetic( - ImplementsArrayReduce, - IncludeReduceMethods, SupportsArithmetic, DataArrayGroupByOpsMixin, ): @@ -137,8 +135,6 @@ class DataArrayGroupbyArithmetic( class DatasetGroupbyArithmetic( - ImplementsDatasetReduce, - IncludeReduceMethods, SupportsArithmetic, DatasetGroupByOpsMixin, ): diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 1ca5de965d0..5847da99d4c 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -1,5 +1,8 @@ import datetime +import os import warnings +from textwrap import dedent +from typing import Callable import numpy as np import pandas as pd @@ -21,6 +24,11 @@ ) from .variable import IndexVariable, Variable, as_variable +XARRAY_NUMPY_GROUPIES = os.environ.get("XARRAY_NUMPY_GROUPIES", "False").lower() in ( + "true", + "1", +) + def check_reduce_dims(reduce_dims, dimensions): @@ -876,6 +884,43 @@ def reduce_array(ar): return self.map(reduce_array, shortcut=shortcut) + if not XARRAY_NUMPY_GROUPIES: + + @classmethod + def _reduce_method( + cls, func: Callable, include_skipna: bool, numeric_only: bool + ): + if include_skipna: + + def wrapped_func(self, dim=None, axis=None, skipna=None, **kwargs): + return self.reduce(func, dim, axis, skipna=skipna, **kwargs) + + else: + + def wrapped_func(self, dim=None, axis=None, **kwargs): # type: ignore[misc] + return self.reduce(func, dim, axis, **kwargs) + + return wrapped_func + + _reduce_extra_args_docstring = dedent( + """\ + dim : str or sequence of str, optional + Dimension(s) over which to apply `{name}`. + axis : int or sequence of int, optional + Axis(es) over which to apply `{name}`. Only one of the 'dim' + and 'axis' arguments can be supplied. If neither are supplied, then + `{name}` is calculated over axes.""" + ) + + _cum_extra_args_docstring = dedent( + """\ + dim : str or sequence of str, optional + Dimension over which to apply `{name}`. + axis : int or sequence of int, optional + Axis over which to apply `{name}`. Only one of the 'dim' + and 'axis' arguments can be supplied.""" + ) + class DatasetGroupBy(GroupBy, DatasetGroupbyArithmetic): @@ -944,6 +989,43 @@ def _combine(self, applied): combined = self._maybe_unstack(combined) return combined + if not XARRAY_NUMPY_GROUPIES: + + @classmethod + def _reduce_method( + cls, func: Callable, include_skipna: bool, numeric_only: bool + ): + if include_skipna: + + def wrapped_func(self, dim=None, axis=None, skipna=None, **kwargs): + return self.reduce(func, dim, axis, skipna=skipna, **kwargs) + + else: + + def wrapped_func(self, dim=None, axis=None, **kwargs): # type: ignore[misc] + return self.reduce(func, dim, axis, **kwargs) + + return wrapped_func + + _reduce_extra_args_docstring = dedent( + """\ + dim : str or sequence of str, optional + Dimension(s) over which to apply `{name}`. + axis : int or sequence of int, optional + Axis(es) over which to apply `{name}`. Only one of the 'dim' + and 'axis' arguments can be supplied. If neither are supplied, then + `{name}` is calculated over axes.""" + ) + + _cum_extra_args_docstring = dedent( + """\ + dim : str or sequence of str, optional + Dimension over which to apply `{name}`. + axis : int or sequence of int, optional + Axis over which to apply `{name}`. Only one of the 'dim' + and 'axis' arguments can be supplied.""" + ) + def reduce(self, func, dim=None, keep_attrs=None, **kwargs): """Reduce the items in this group by applying `func` along some dimension(s). @@ -994,3 +1076,8 @@ def assign(self, **kwargs): Dataset.assign """ return self.map(lambda ds: ds.assign(**kwargs)) + + +if not XARRAY_NUMPY_GROUPIES: + ops.inject_reduce_methods(DataArrayGroupBy) + ops.inject_reduce_methods(DatasetGroupBy) From ef91e6e24eb13a2974268be2c7e0479dbd219e5e Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 12 Aug 2021 18:24:03 -0600 Subject: [PATCH 002/138] Add _numpy_groupies module --- xarray/core/_numpy_groupies.py | 118 ++++++++++++++++++++++++++++++ xarray/core/groupby.py | 126 ++++++++++++++++++++++++++++++++- 2 files changed, 242 insertions(+), 2 deletions(-) create mode 100644 xarray/core/_numpy_groupies.py diff --git a/xarray/core/_numpy_groupies.py b/xarray/core/_numpy_groupies.py new file mode 100644 index 00000000000..41fb8a44346 --- /dev/null +++ b/xarray/core/_numpy_groupies.py @@ -0,0 +1,118 @@ +import itertools +import os +import typing + +import numpy as np + +XARRAY_NUMPY_GROUPIES = os.environ.get("XARRAY_NUMPY_GROUPIES", "False").lower() in ( + "true", + "1", +) + +if XARRAY_NUMPY_GROUPIES: + try: + import dask_groupby.aggregations + import dask_groupby.core + except ImportError: + raise ImportError( + "Using numpy_groupies with xarray requires the `dask-groupby` package " + "to be installed. To install, run `python -m pip install dask_groupby`." + ) + import dask + + def xarray_reduce( + obj, + *by, + func, + expected_groups=None, + bins=None, + dim=None, + split_out=1, + fill_value=None, + blockwise=False, + ): + """Reduce a DataArray or Dataset using dask_groupby.xarray.""" + + from .alignment import broadcast + from .computation import apply_ufunc + from .dataarray import DataArray + + by: typing.Tuple[DataArray] = tuple(obj[g] if isinstance(g, str) else g for g in by) # type: ignore + if len(by) > 1 and any(dask.is_dask_collection(by_) for by_ in by): + raise ValueError( + "Grouping by multiple variables will call compute dask variables." + ) + + grouper_dims = set(itertools.chain(*tuple(g.dims for g in by))) + obj, *by = broadcast(obj, *by, exclude=set(obj.dims) - grouper_dims) + obj = obj.transpose(..., *by[0].dims) + + dim = by[0].dims if dim is None else dask_groupby.aggregations._atleast_1d(dim) + assert isinstance(obj, DataArray) + axis = tuple(obj.get_axis_num(d) for d in dim) + group_names = tuple(g.name for g in by) + if len(by) > 1: + ( + group_idx, + expected_groups, + group_shape, + _, + _, + _, + ) = dask_groupby.core.factorize_( + tuple(g.data for g in by), expected_groups, bins + ) + to_group = DataArray( + group_idx, dims=dim, coords={d: by[0][d] for d in by[0].indexes} + ) + else: + if expected_groups is None and isinstance(by[0].data, np.ndarray): + expected_groups = (np.unique(by[0].data),) + if expected_groups is None: + raise NotImplementedError( + "Please provided expected_groups if not grouping by a numpy-backed DataArray" + ) + group_shape = (len(expected_groups[0]),) + to_group = by[0] + + group_sizes = dict(zip(group_names, group_shape)) + indims = tuple(obj.dims) + otherdims = tuple(d for d in indims if d not in dim) + result_dims = otherdims + group_names + + def wrapper(*args, **kwargs): + result, groups = dask_groupby.core.groupby_reduce(*args, **kwargs) + if len(by) > 1: + # all groups need not be present. reindex here + # TODO: add test + reindexed = dask_groupby.core.reindex_( + result, + from_=groups, + to=np.arange(np.prod(group_shape)), + fill_value=fill_value, + axis=-1, + ) + result = reindexed.reshape(result.shape[:-1] + group_shape) + return result + + actual = apply_ufunc( + wrapper, + obj, + to_group, + input_core_dims=[indims, dim], + dask="allowed", + output_core_dims=[result_dims], + dask_gufunc_kwargs=dict(output_sizes=group_sizes), + kwargs={ + "func": func, + "axis": axis, + "split_out": split_out, + "fill_value": fill_value, + "blockwise": blockwise, + }, + ) + + for name, expect in zip(group_names, expected_groups): + actual[name] = expect + + return actual diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 5847da99d4c..4c5e7ca27ec 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -29,6 +29,9 @@ "1", ) +if XARRAY_NUMPY_GROUPIES: + from ._numpy_groupies import xarray_reduce + def check_reduce_dims(reduce_dims, dimensions): @@ -393,6 +396,12 @@ def __init__( "Failed to group data. Are you grouping by a variable that is all NaN?" ) + # to_stack = [] + # for index, slicer in enumerate(group_indices): + # stop = obj.sizes[group_dim] if slicer.stop is None else slicer.stop + # to_stack.append(index * np.ones((stop - slicer.start,), dtype=np.int32)) + # by = DataArray(np.hstack(to_stack), dims=(group_dim,), name="__groupby_dim__") + # specification for the groupby operation self._obj = obj self._group = group @@ -403,6 +412,7 @@ def __init__( self._inserted_dims = inserted_dims self._full_index = full_index self._restore_coord_dims = restore_coord_dims + # self._by = by # cached attributes self._groups = None @@ -884,7 +894,63 @@ def reduce_array(ar): return self.map(reduce_array, shortcut=shortcut) - if not XARRAY_NUMPY_GROUPIES: + if XARRAY_NUMPY_GROUPIES: + + def sum(self): + return xarray_reduce( + self._obj, + self._group, + func="sum", + blockwise=True, + expected_groups=(self._unique_coord.data,), + ) + + def mean(self): + ... + + def std(self): + ... + + def nanstd(self): + ... + + def var(self): + ... + + def nanvar(self): + ... + + def max(self): + ... + + def nanmax(self): + ... + + def min(self): + ... + + def nanmin(self): + ... + + def argmin(self): + ... + + def argmax(self): + ... + + def nanargmin(self): + ... + + def nanargmax(self): + ... + + def first(self): + ... + + def last(self): + ... + + else: @classmethod def _reduce_method( @@ -989,7 +1055,63 @@ def _combine(self, applied): combined = self._maybe_unstack(combined) return combined - if not XARRAY_NUMPY_GROUPIES: + if XARRAY_NUMPY_GROUPIES: + + def sum(self): + return xarray_reduce( + self._obj, + self._group, + func="sum", + blockwise=True, + expected_groups=(self._unique_coord.data,), + ) + + def mean(self): + ... + + def std(self): + ... + + def nanstd(self): + ... + + def var(self): + ... + + def nanvar(self): + ... + + def max(self): + ... + + def nanmax(self): + ... + + def min(self): + ... + + def nanmin(self): + ... + + def argmin(self): + ... + + def argmax(self): + ... + + def nanargmin(self): + ... + + def nanargmax(self): + ... + + def first(self): + ... + + def last(self): + ... + + else: @classmethod def _reduce_method( From 511dd448de6877f8af0d1356bf16bb20d481c209 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 12 Aug 2021 18:43:13 -0600 Subject: [PATCH 003/138] Add more aggregations --- xarray/core/groupby.py | 264 +++++++++++++++++++++++++++++------------ 1 file changed, 188 insertions(+), 76 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 4c5e7ca27ec..68d3f7189b1 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -896,59 +896,115 @@ def reduce_array(ar): if XARRAY_NUMPY_GROUPIES: - def sum(self): + def sum(self, *, dim=None, fill_value=None, blockwise=True): return xarray_reduce( self._obj, self._group, func="sum", - blockwise=True, + blockwise=blockwise, + dim=dim, + fill_value=fill_value, expected_groups=(self._unique_coord.data,), ) - def mean(self): - ... - - def std(self): - ... - - def nanstd(self): - ... - - def var(self): - ... - - def nanvar(self): - ... - - def max(self): - ... - - def nanmax(self): - ... + def mean(self, *, dim=None, fill_value=None, blockwise=True): + return xarray_reduce( + self._obj, + self._group, + func="mean", + blockwise=blockwise, + dim=dim, + fill_value=fill_value, + expected_groups=(self._unique_coord.data,), + ) - def min(self): - ... + def std(self, *, dim=None, fill_value=None, blockwise=True): + return xarray_reduce( + self._obj, + self._group, + func="std", + blockwise=blockwise, + dim=dim, + fill_value=fill_value, + expected_groups=(self._unique_coord.data,), + ) - def nanmin(self): - ... + def var(self, *, dim=None, fill_value=None, blockwise=True): + return xarray_reduce( + self._obj, + self._group, + func="var", + blockwise=blockwise, + dim=dim, + fill_value=fill_value, + expected_groups=(self._unique_coord.data,), + ) - def argmin(self): - ... + def max(self, *, dim=None, fill_value=None, blockwise=True): + return xarray_reduce( + self._obj, + self._group, + func="max", + blockwise=blockwise, + dim=dim, + fill_value=fill_value, + expected_groups=(self._unique_coord.data,), + ) - def argmax(self): - ... + def min(self, *, dim=None, fill_value=None, blockwise=True): + return xarray_reduce( + self._obj, + self._group, + func="min", + blockwise=blockwise, + dim=dim, + fill_value=fill_value, + expected_groups=(self._unique_coord.data,), + ) - def nanargmin(self): - ... + def argmin(self, *, dim=None, fill_value=None, blockwise=True): + return xarray_reduce( + self._obj, + self._group, + func="argmin", + blockwise=blockwise, + dim=dim, + fill_value=fill_value, + expected_groups=(self._unique_coord.data,), + ) - def nanargmax(self): - ... + def argmax(self, *, dim=None, fill_value=None, blockwise=True): + return xarray_reduce( + self._obj, + self._group, + func="argmax", + blockwise=blockwise, + dim=dim, + fill_value=fill_value, + expected_groups=(self._unique_coord.data,), + ) - def first(self): - ... + def frist(self, *, dim=None, fill_value=None, blockwise=True): + return xarray_reduce( + self._obj, + self._group, + func="first", + blockwise=blockwise, + dim=dim, + fill_value=fill_value, + expected_groups=(self._unique_coord.data,), + ) - def last(self): - ... + def last(self, *, dim=None, fill_value=None, blockwise=True): + return xarray_reduce( + self._obj, + self._group, + func="last", + blockwise=blockwise, + dim=dim, + fill_value=fill_value, + expected_groups=(self._unique_coord.data,), + ) else: @@ -1057,59 +1113,115 @@ def _combine(self, applied): if XARRAY_NUMPY_GROUPIES: - def sum(self): + def sum(self, *, dim=None, fill_value=None, blockwise=True): return xarray_reduce( self._obj, self._group, func="sum", - blockwise=True, + blockwise=blockwise, + dim=dim, + fill_value=fill_value, expected_groups=(self._unique_coord.data,), ) - def mean(self): - ... - - def std(self): - ... - - def nanstd(self): - ... - - def var(self): - ... - - def nanvar(self): - ... - - def max(self): - ... - - def nanmax(self): - ... + def mean(self, *, dim=None, fill_value=None, blockwise=True): + return xarray_reduce( + self._obj, + self._group, + func="mean", + blockwise=blockwise, + dim=dim, + fill_value=fill_value, + expected_groups=(self._unique_coord.data,), + ) - def min(self): - ... + def std(self, *, dim=None, fill_value=None, blockwise=True): + return xarray_reduce( + self._obj, + self._group, + func="std", + blockwise=blockwise, + dim=dim, + fill_value=fill_value, + expected_groups=(self._unique_coord.data,), + ) - def nanmin(self): - ... + def var(self, *, dim=None, fill_value=None, blockwise=True): + return xarray_reduce( + self._obj, + self._group, + func="var", + blockwise=blockwise, + dim=dim, + fill_value=fill_value, + expected_groups=(self._unique_coord.data,), + ) - def argmin(self): - ... + def max(self, *, dim=None, fill_value=None, blockwise=True): + return xarray_reduce( + self._obj, + self._group, + func="max", + blockwise=blockwise, + dim=dim, + fill_value=fill_value, + expected_groups=(self._unique_coord.data,), + ) - def argmax(self): - ... + def min(self, *, dim=None, fill_value=None, blockwise=True): + return xarray_reduce( + self._obj, + self._group, + func="min", + blockwise=blockwise, + dim=dim, + fill_value=fill_value, + expected_groups=(self._unique_coord.data,), + ) - def nanargmin(self): - ... + def argmin(self, *, dim=None, fill_value=None, blockwise=True): + return xarray_reduce( + self._obj, + self._group, + func="argmin", + blockwise=blockwise, + dim=dim, + fill_value=fill_value, + expected_groups=(self._unique_coord.data,), + ) - def nanargmax(self): - ... + def argmax(self, *, dim=None, fill_value=None, blockwise=True): + return xarray_reduce( + self._obj, + self._group, + func="argmax", + blockwise=blockwise, + dim=dim, + fill_value=fill_value, + expected_groups=(self._unique_coord.data,), + ) - def first(self): - ... + def frist(self, *, dim=None, fill_value=None, blockwise=True): + return xarray_reduce( + self._obj, + self._group, + func="first", + blockwise=blockwise, + dim=dim, + fill_value=fill_value, + expected_groups=(self._unique_coord.data,), + ) - def last(self): - ... + def last(self, *, dim=None, fill_value=None, blockwise=True): + return xarray_reduce( + self._obj, + self._group, + func="last", + blockwise=blockwise, + dim=dim, + fill_value=fill_value, + expected_groups=(self._unique_coord.data,), + ) else: From 3ee620018e7e6a742a4c6d107911196b22282332 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 12 Aug 2021 18:47:32 -0600 Subject: [PATCH 004/138] Remove comments --- xarray/core/groupby.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 68d3f7189b1..e6a53c7665e 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -396,12 +396,6 @@ def __init__( "Failed to group data. Are you grouping by a variable that is all NaN?" ) - # to_stack = [] - # for index, slicer in enumerate(group_indices): - # stop = obj.sizes[group_dim] if slicer.stop is None else slicer.stop - # to_stack.append(index * np.ones((stop - slicer.start,), dtype=np.int32)) - # by = DataArray(np.hstack(to_stack), dims=(group_dim,), name="__groupby_dim__") - # specification for the groupby operation self._obj = obj self._group = group From f0883921adf57d5c21ddb17ff9001ba8dba99514 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 12 Aug 2021 18:49:48 -0600 Subject: [PATCH 005/138] Fix position keyword arguments --- xarray/core/groupby.py | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index e6a53c7665e..8cbee54cf41 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -890,7 +890,7 @@ def reduce_array(ar): if XARRAY_NUMPY_GROUPIES: - def sum(self, *, dim=None, fill_value=None, blockwise=True): + def sum(self, dim=None, *, fill_value=None, blockwise=True): return xarray_reduce( self._obj, self._group, @@ -901,7 +901,7 @@ def sum(self, *, dim=None, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def mean(self, *, dim=None, fill_value=None, blockwise=True): + def mean(self, dim=None, *, fill_value=None, blockwise=True): return xarray_reduce( self._obj, self._group, @@ -912,7 +912,7 @@ def mean(self, *, dim=None, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def std(self, *, dim=None, fill_value=None, blockwise=True): + def std(self, dim=None, *, fill_value=None, blockwise=True): return xarray_reduce( self._obj, self._group, @@ -923,7 +923,7 @@ def std(self, *, dim=None, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def var(self, *, dim=None, fill_value=None, blockwise=True): + def var(self, dim=None, *, fill_value=None, blockwise=True): return xarray_reduce( self._obj, self._group, @@ -934,7 +934,7 @@ def var(self, *, dim=None, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def max(self, *, dim=None, fill_value=None, blockwise=True): + def max(self, dim=None, *, fill_value=None, blockwise=True): return xarray_reduce( self._obj, self._group, @@ -945,7 +945,7 @@ def max(self, *, dim=None, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def min(self, *, dim=None, fill_value=None, blockwise=True): + def min(self, dim=None, *, fill_value=None, blockwise=True): return xarray_reduce( self._obj, self._group, @@ -956,7 +956,7 @@ def min(self, *, dim=None, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def argmin(self, *, dim=None, fill_value=None, blockwise=True): + def argmin(self, dim=None, *, fill_value=None, blockwise=True): return xarray_reduce( self._obj, self._group, @@ -967,7 +967,7 @@ def argmin(self, *, dim=None, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def argmax(self, *, dim=None, fill_value=None, blockwise=True): + def argmax(self, dim=None, *, fill_value=None, blockwise=True): return xarray_reduce( self._obj, self._group, @@ -978,7 +978,7 @@ def argmax(self, *, dim=None, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def frist(self, *, dim=None, fill_value=None, blockwise=True): + def frist(self, dim=None, *, fill_value=None, blockwise=True): return xarray_reduce( self._obj, self._group, @@ -989,7 +989,7 @@ def frist(self, *, dim=None, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def last(self, *, dim=None, fill_value=None, blockwise=True): + def last(self, dim=None, *, fill_value=None, blockwise=True): return xarray_reduce( self._obj, self._group, @@ -1107,7 +1107,7 @@ def _combine(self, applied): if XARRAY_NUMPY_GROUPIES: - def sum(self, *, dim=None, fill_value=None, blockwise=True): + def sum(self, dim=None, *, fill_value=None, blockwise=True): return xarray_reduce( self._obj, self._group, @@ -1118,7 +1118,7 @@ def sum(self, *, dim=None, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def mean(self, *, dim=None, fill_value=None, blockwise=True): + def mean(self, dim=None, *, fill_value=None, blockwise=True): return xarray_reduce( self._obj, self._group, @@ -1129,7 +1129,7 @@ def mean(self, *, dim=None, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def std(self, *, dim=None, fill_value=None, blockwise=True): + def std(self, dim=None, *, fill_value=None, blockwise=True): return xarray_reduce( self._obj, self._group, @@ -1140,7 +1140,7 @@ def std(self, *, dim=None, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def var(self, *, dim=None, fill_value=None, blockwise=True): + def var(self, dim=None, *, fill_value=None, blockwise=True): return xarray_reduce( self._obj, self._group, @@ -1151,7 +1151,7 @@ def var(self, *, dim=None, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def max(self, *, dim=None, fill_value=None, blockwise=True): + def max(self, dim=None, *, fill_value=None, blockwise=True): return xarray_reduce( self._obj, self._group, @@ -1162,7 +1162,7 @@ def max(self, *, dim=None, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def min(self, *, dim=None, fill_value=None, blockwise=True): + def min(self, dim=None, *, fill_value=None, blockwise=True): return xarray_reduce( self._obj, self._group, @@ -1173,7 +1173,7 @@ def min(self, *, dim=None, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def argmin(self, *, dim=None, fill_value=None, blockwise=True): + def argmin(self, dim=None, *, fill_value=None, blockwise=True): return xarray_reduce( self._obj, self._group, @@ -1184,7 +1184,7 @@ def argmin(self, *, dim=None, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def argmax(self, *, dim=None, fill_value=None, blockwise=True): + def argmax(self, dim=None, *, fill_value=None, blockwise=True): return xarray_reduce( self._obj, self._group, @@ -1195,7 +1195,7 @@ def argmax(self, *, dim=None, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def frist(self, *, dim=None, fill_value=None, blockwise=True): + def frist(self, dim=None, *, fill_value=None, blockwise=True): return xarray_reduce( self._obj, self._group, @@ -1206,7 +1206,7 @@ def frist(self, *, dim=None, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def last(self, *, dim=None, fill_value=None, blockwise=True): + def last(self, dim=None, *, fill_value=None, blockwise=True): return xarray_reduce( self._obj, self._group, From 35944e434d30fdc9d4094248d0898ae8a249726c Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 13 Aug 2021 15:23:09 -0600 Subject: [PATCH 006/138] Remove `_numpy_groupies.py` module --- xarray/core/_numpy_groupies.py | 118 --------------------------------- xarray/core/groupby.py | 42 ++++++------ 2 files changed, 21 insertions(+), 139 deletions(-) delete mode 100644 xarray/core/_numpy_groupies.py diff --git a/xarray/core/_numpy_groupies.py b/xarray/core/_numpy_groupies.py deleted file mode 100644 index 41fb8a44346..00000000000 --- a/xarray/core/_numpy_groupies.py +++ /dev/null @@ -1,118 +0,0 @@ -import itertools -import os -import typing - -import numpy as np - -XARRAY_NUMPY_GROUPIES = os.environ.get("XARRAY_NUMPY_GROUPIES", "False").lower() in ( - "true", - "1", -) - -if XARRAY_NUMPY_GROUPIES: - try: - import dask_groupby.aggregations - import dask_groupby.core - except ImportError: - raise ImportError( - "Using numpy_groupies with xarray requires the `dask-groupby` package " - "to be installed. To install, run `python -m pip install dask_groupby`." - ) - import dask - - def xarray_reduce( - obj, - *by, - func, - expected_groups=None, - bins=None, - dim=None, - split_out=1, - fill_value=None, - blockwise=False, - ): - """Reduce a DataArray or Dataset using dask_groupby.xarray.""" - - from .alignment import broadcast - from .computation import apply_ufunc - from .dataarray import DataArray - - by: typing.Tuple[DataArray] = tuple(obj[g] if isinstance(g, str) else g for g in by) # type: ignore - if len(by) > 1 and any(dask.is_dask_collection(by_) for by_ in by): - raise ValueError( - "Grouping by multiple variables will call compute dask variables." - ) - - grouper_dims = set(itertools.chain(*tuple(g.dims for g in by))) - obj, *by = broadcast(obj, *by, exclude=set(obj.dims) - grouper_dims) - obj = obj.transpose(..., *by[0].dims) - - dim = by[0].dims if dim is None else dask_groupby.aggregations._atleast_1d(dim) - assert isinstance(obj, DataArray) - axis = tuple(obj.get_axis_num(d) for d in dim) - group_names = tuple(g.name for g in by) - if len(by) > 1: - ( - group_idx, - expected_groups, - group_shape, - _, - _, - _, - ) = dask_groupby.core.factorize_( - tuple(g.data for g in by), expected_groups, bins - ) - to_group = DataArray( - group_idx, dims=dim, coords={d: by[0][d] for d in by[0].indexes} - ) - else: - if expected_groups is None and isinstance(by[0].data, np.ndarray): - expected_groups = (np.unique(by[0].data),) - if expected_groups is None: - raise NotImplementedError( - "Please provided expected_groups if not grouping by a numpy-backed DataArray" - ) - group_shape = (len(expected_groups[0]),) - to_group = by[0] - - group_sizes = dict(zip(group_names, group_shape)) - indims = tuple(obj.dims) - otherdims = tuple(d for d in indims if d not in dim) - result_dims = otherdims + group_names - - def wrapper(*args, **kwargs): - result, groups = dask_groupby.core.groupby_reduce(*args, **kwargs) - if len(by) > 1: - # all groups need not be present. reindex here - # TODO: add test - reindexed = dask_groupby.core.reindex_( - result, - from_=groups, - to=np.arange(np.prod(group_shape)), - fill_value=fill_value, - axis=-1, - ) - result = reindexed.reshape(result.shape[:-1] + group_shape) - return result - - actual = apply_ufunc( - wrapper, - obj, - to_group, - input_core_dims=[indims, dim], - dask="allowed", - output_core_dims=[result_dims], - dask_gufunc_kwargs=dict(output_sizes=group_sizes), - kwargs={ - "func": func, - "axis": axis, - "split_out": split_out, - "fill_value": fill_value, - "blockwise": blockwise, - }, - ) - - for name, expect in zip(group_names, expected_groups): - actual[name] = expect - - return actual diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 8cbee54cf41..ce7f8d448ae 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -30,7 +30,7 @@ ) if XARRAY_NUMPY_GROUPIES: - from ._numpy_groupies import xarray_reduce + from dask_groupby.xarray import xarray_reduce def check_reduce_dims(reduce_dims, dimensions): @@ -890,7 +890,7 @@ def reduce_array(ar): if XARRAY_NUMPY_GROUPIES: - def sum(self, dim=None, *, fill_value=None, blockwise=True): + def sum(self, dim=None, *, fill_value=None, blockwise=False): return xarray_reduce( self._obj, self._group, @@ -901,7 +901,7 @@ def sum(self, dim=None, *, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def mean(self, dim=None, *, fill_value=None, blockwise=True): + def mean(self, dim=None, *, fill_value=None, blockwise=False): return xarray_reduce( self._obj, self._group, @@ -912,7 +912,7 @@ def mean(self, dim=None, *, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def std(self, dim=None, *, fill_value=None, blockwise=True): + def std(self, dim=None, *, fill_value=None, blockwise=False): return xarray_reduce( self._obj, self._group, @@ -923,7 +923,7 @@ def std(self, dim=None, *, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def var(self, dim=None, *, fill_value=None, blockwise=True): + def var(self, dim=None, *, fill_value=None, blockwise=False): return xarray_reduce( self._obj, self._group, @@ -934,7 +934,7 @@ def var(self, dim=None, *, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def max(self, dim=None, *, fill_value=None, blockwise=True): + def max(self, dim=None, *, fill_value=None, blockwise=False): return xarray_reduce( self._obj, self._group, @@ -945,7 +945,7 @@ def max(self, dim=None, *, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def min(self, dim=None, *, fill_value=None, blockwise=True): + def min(self, dim=None, *, fill_value=None, blockwise=False): return xarray_reduce( self._obj, self._group, @@ -956,7 +956,7 @@ def min(self, dim=None, *, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def argmin(self, dim=None, *, fill_value=None, blockwise=True): + def argmin(self, dim=None, *, fill_value=None, blockwise=False): return xarray_reduce( self._obj, self._group, @@ -967,7 +967,7 @@ def argmin(self, dim=None, *, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def argmax(self, dim=None, *, fill_value=None, blockwise=True): + def argmax(self, dim=None, *, fill_value=None, blockwise=False): return xarray_reduce( self._obj, self._group, @@ -978,7 +978,7 @@ def argmax(self, dim=None, *, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def frist(self, dim=None, *, fill_value=None, blockwise=True): + def frist(self, dim=None, *, fill_value=None, blockwise=False): return xarray_reduce( self._obj, self._group, @@ -989,7 +989,7 @@ def frist(self, dim=None, *, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def last(self, dim=None, *, fill_value=None, blockwise=True): + def last(self, dim=None, *, fill_value=None, blockwise=False): return xarray_reduce( self._obj, self._group, @@ -1107,7 +1107,7 @@ def _combine(self, applied): if XARRAY_NUMPY_GROUPIES: - def sum(self, dim=None, *, fill_value=None, blockwise=True): + def sum(self, dim=None, *, fill_value=None, blockwise=False): return xarray_reduce( self._obj, self._group, @@ -1118,7 +1118,7 @@ def sum(self, dim=None, *, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def mean(self, dim=None, *, fill_value=None, blockwise=True): + def mean(self, dim=None, *, fill_value=None, blockwise=False): return xarray_reduce( self._obj, self._group, @@ -1129,7 +1129,7 @@ def mean(self, dim=None, *, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def std(self, dim=None, *, fill_value=None, blockwise=True): + def std(self, dim=None, *, fill_value=None, blockwise=False): return xarray_reduce( self._obj, self._group, @@ -1140,7 +1140,7 @@ def std(self, dim=None, *, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def var(self, dim=None, *, fill_value=None, blockwise=True): + def var(self, dim=None, *, fill_value=None, blockwise=False): return xarray_reduce( self._obj, self._group, @@ -1151,7 +1151,7 @@ def var(self, dim=None, *, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def max(self, dim=None, *, fill_value=None, blockwise=True): + def max(self, dim=None, *, fill_value=None, blockwise=False): return xarray_reduce( self._obj, self._group, @@ -1162,7 +1162,7 @@ def max(self, dim=None, *, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def min(self, dim=None, *, fill_value=None, blockwise=True): + def min(self, dim=None, *, fill_value=None, blockwise=False): return xarray_reduce( self._obj, self._group, @@ -1173,7 +1173,7 @@ def min(self, dim=None, *, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def argmin(self, dim=None, *, fill_value=None, blockwise=True): + def argmin(self, dim=None, *, fill_value=None, blockwise=False): return xarray_reduce( self._obj, self._group, @@ -1184,7 +1184,7 @@ def argmin(self, dim=None, *, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def argmax(self, dim=None, *, fill_value=None, blockwise=True): + def argmax(self, dim=None, *, fill_value=None, blockwise=False): return xarray_reduce( self._obj, self._group, @@ -1195,7 +1195,7 @@ def argmax(self, dim=None, *, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def frist(self, dim=None, *, fill_value=None, blockwise=True): + def frist(self, dim=None, *, fill_value=None, blockwise=False): return xarray_reduce( self._obj, self._group, @@ -1206,7 +1206,7 @@ def frist(self, dim=None, *, fill_value=None, blockwise=True): expected_groups=(self._unique_coord.data,), ) - def last(self, dim=None, *, fill_value=None, blockwise=True): + def last(self, dim=None, *, fill_value=None, blockwise=False): return xarray_reduce( self._obj, self._group, From e6bcce9033db6da748f4b89057ef165cd6fcb6f7 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 24 Aug 2021 12:35:51 -0600 Subject: [PATCH 007/138] some fixes --- xarray/core/groupby.py | 121 +++++++++++++++++++++-------------------- 1 file changed, 61 insertions(+), 60 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index ce7f8d448ae..ea10ae446e9 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -890,114 +890,125 @@ def reduce_array(ar): if XARRAY_NUMPY_GROUPIES: - def sum(self, dim=None, *, fill_value=None, blockwise=False): + def count(self, dim=None, *, keep_attrs=True, fill_value=None): + return xarray_reduce( + self._obj, + self._group, + func="count", + blockwise=False, + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + ) + + def sum(self, dim=None, *, keep_attrs=True, fill_value=None): return xarray_reduce( self._obj, self._group, func="sum", - blockwise=blockwise, + blockwise=False, dim=dim, fill_value=fill_value, - expected_groups=(self._unique_coord.data,), + keep_attrs=keep_attrs, ) - def mean(self, dim=None, *, fill_value=None, blockwise=False): + def mean(self, dim=None, *, keep_attrs=True, fill_value=None): return xarray_reduce( self._obj, self._group, func="mean", - blockwise=blockwise, + blockwise=False, dim=dim, fill_value=fill_value, - expected_groups=(self._unique_coord.data,), + keep_attrs=keep_attrs, ) - def std(self, dim=None, *, fill_value=None, blockwise=False): + def std(self, dim=None, *, keep_attrs=True, fill_value=None): return xarray_reduce( self._obj, self._group, func="std", - blockwise=blockwise, + blockwise=False, dim=dim, fill_value=fill_value, - expected_groups=(self._unique_coord.data,), + keep_attrs=keep_attrs, ) - def var(self, dim=None, *, fill_value=None, blockwise=False): + def var(self, dim=None, *, keep_attrs=True, fill_value=None): return xarray_reduce( self._obj, self._group, func="var", - blockwise=blockwise, + blockwise=False, dim=dim, fill_value=fill_value, - expected_groups=(self._unique_coord.data,), + keep_attrs=keep_attrs, ) - def max(self, dim=None, *, fill_value=None, blockwise=False): + def max(self, dim=None, *, keep_attrs=True, fill_value=None): return xarray_reduce( self._obj, self._group, func="max", - blockwise=blockwise, + blockwise=False, dim=dim, fill_value=fill_value, - expected_groups=(self._unique_coord.data,), + keep_attrs=keep_attrs, ) - def min(self, dim=None, *, fill_value=None, blockwise=False): + def min(self, dim=None, *, keep_attrs=True, fill_value=None): return xarray_reduce( self._obj, self._group, func="min", - blockwise=blockwise, + blockwise=False, dim=dim, fill_value=fill_value, - expected_groups=(self._unique_coord.data,), + keep_attrs=keep_attrs, ) - def argmin(self, dim=None, *, fill_value=None, blockwise=False): + def argmin(self, dim=None, *, keep_attrs=True, fill_value=None): return xarray_reduce( self._obj, self._group, func="argmin", - blockwise=blockwise, + blockwise=False, dim=dim, fill_value=fill_value, - expected_groups=(self._unique_coord.data,), + keep_attrs=keep_attrs, ) - def argmax(self, dim=None, *, fill_value=None, blockwise=False): + def argmax(self, dim=None, *, keep_attrs=True, fill_value=None): return xarray_reduce( self._obj, self._group, func="argmax", - blockwise=blockwise, + blockwise=False, dim=dim, fill_value=fill_value, - expected_groups=(self._unique_coord.data,), + keep_attrs=keep_attrs, ) - def frist(self, dim=None, *, fill_value=None, blockwise=False): + def first(self, dim=None, *, keep_attrs=True, fill_value=None): return xarray_reduce( self._obj, self._group, func="first", - blockwise=blockwise, + blockwise=False, dim=dim, fill_value=fill_value, - expected_groups=(self._unique_coord.data,), + keep_attrs=keep_attrs, ) - def last(self, dim=None, *, fill_value=None, blockwise=False): + def last(self, dim=None, *, keep_attrs=True, fill_value=None): return xarray_reduce( self._obj, self._group, func="last", - blockwise=blockwise, + blockwise=False, dim=dim, fill_value=fill_value, - expected_groups=(self._unique_coord.data,), + keep_attrs=keep_attrs, ) else: @@ -1107,114 +1118,104 @@ def _combine(self, applied): if XARRAY_NUMPY_GROUPIES: - def sum(self, dim=None, *, fill_value=None, blockwise=False): + def sum(self, dim=None, *, keep_attrs=True, fill_value=None): return xarray_reduce( self._obj, self._group, func="sum", - blockwise=blockwise, + blockwise=False, dim=dim, fill_value=fill_value, - expected_groups=(self._unique_coord.data,), ) - def mean(self, dim=None, *, fill_value=None, blockwise=False): + def mean(self, dim=None, *, keep_attrs=True, fill_value=None): return xarray_reduce( self._obj, self._group, func="mean", - blockwise=blockwise, + blockwise=False, dim=dim, fill_value=fill_value, - expected_groups=(self._unique_coord.data,), ) - def std(self, dim=None, *, fill_value=None, blockwise=False): + def std(self, dim=None, *, keep_attrs=True, fill_value=None): return xarray_reduce( self._obj, self._group, func="std", - blockwise=blockwise, + blockwise=False, dim=dim, fill_value=fill_value, - expected_groups=(self._unique_coord.data,), ) - def var(self, dim=None, *, fill_value=None, blockwise=False): + def var(self, dim=None, *, keep_attrs=True, fill_value=None): return xarray_reduce( self._obj, self._group, func="var", - blockwise=blockwise, + blockwise=False, dim=dim, fill_value=fill_value, - expected_groups=(self._unique_coord.data,), ) - def max(self, dim=None, *, fill_value=None, blockwise=False): + def max(self, dim=None, *, keep_attrs=True, fill_value=None): return xarray_reduce( self._obj, self._group, func="max", - blockwise=blockwise, + blockwise=False, dim=dim, fill_value=fill_value, - expected_groups=(self._unique_coord.data,), ) - def min(self, dim=None, *, fill_value=None, blockwise=False): + def min(self, dim=None, *, keep_attrs=True, fill_value=None): return xarray_reduce( self._obj, self._group, func="min", - blockwise=blockwise, + blockwise=False, dim=dim, fill_value=fill_value, - expected_groups=(self._unique_coord.data,), ) - def argmin(self, dim=None, *, fill_value=None, blockwise=False): + def argmin(self, dim=None, *, keep_attrs=True, fill_value=None): return xarray_reduce( self._obj, self._group, func="argmin", - blockwise=blockwise, + blockwise=False, dim=dim, fill_value=fill_value, - expected_groups=(self._unique_coord.data,), ) - def argmax(self, dim=None, *, fill_value=None, blockwise=False): + def argmax(self, dim=None, *, keep_attrs=True, fill_value=None): return xarray_reduce( self._obj, self._group, func="argmax", - blockwise=blockwise, + blockwise=False, dim=dim, fill_value=fill_value, - expected_groups=(self._unique_coord.data,), ) - def frist(self, dim=None, *, fill_value=None, blockwise=False): + def first(self, dim=None, *, keep_attrs=True, fill_value=None): return xarray_reduce( self._obj, self._group, func="first", - blockwise=blockwise, + blockwise=False, dim=dim, fill_value=fill_value, - expected_groups=(self._unique_coord.data,), ) - def last(self, dim=None, *, fill_value=None, blockwise=False): + def last(self, dim=None, *, keep_attrs=True, fill_value=None): return xarray_reduce( self._obj, self._group, func="last", - blockwise=blockwise, + blockwise=False, dim=dim, fill_value=fill_value, - expected_groups=(self._unique_coord.data,), ) else: From cdf7612b6deac9f4b1b793d2f0014507c6e289f9 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 3 Oct 2021 12:50:27 +0530 Subject: [PATCH 008/138] Fix resample test --- xarray/core/groupby.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index ea10ae446e9..4e8aacf39c1 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -1227,12 +1227,12 @@ def _reduce_method( if include_skipna: def wrapped_func(self, dim=None, axis=None, skipna=None, **kwargs): - return self.reduce(func, dim, axis, skipna=skipna, **kwargs) + return self.reduce(func, dim=dim, skipna=skipna, **kwargs) else: def wrapped_func(self, dim=None, axis=None, **kwargs): # type: ignore[misc] - return self.reduce(func, dim, axis, **kwargs) + return self.reduce(func, dim=dim, **kwargs) return wrapped_func From af4cc5d44ff95e0a4aace6e867de26b1f94e63fd Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 3 Oct 2021 16:05:31 +0530 Subject: [PATCH 009/138] Fix reduce methods --- xarray/core/groupby.py | 398 ++++++++++------------------------------- 1 file changed, 96 insertions(+), 302 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 4e8aacf39c1..e92c71be8fd 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -533,6 +533,100 @@ def _maybe_unstack(self, obj): obj._indexes = propagate_indexes(obj._indexes, exclude=self._inserted_dims) return obj + @classmethod + def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool): + if XARRAY_NUMPY_GROUPIES: + + def wrapped_func( + self, + dim=None, + axis=None, + skipna=True, + fill_value=None, + keep_attrs=True, + min_count=None, + **kwargs, + ): # type: ignore[misc] + + # TODO: only do this for resample, not general groupers... + # this creates a label DataArray since resample doesn't do that somehow + if isinstance(self._group_indices[0], slice): + from .dataarray import DataArray + + tostack = [] + for idx, slicer in zip( + self._unique_coord.data, self._group_indices + ): + if slicer.stop is None: + stop = self._obj.sizes[self._group_dim] + else: + stop = slicer.stop + tostack.append(np.full((stop - slicer.start,), fill_value=idx)) + group = DataArray( + np.hstack(tostack), + dims=(self._group_dim,), + name=self._unique_coord.name, + ) + else: + group = self._group + + # TODO: avoid stacking by default + if self._stacked_dim is not None: + obj = self._obj.unstack(self._stacked_dim) + group = group.unstack(self._stacked_dim) + else: + obj = self._obj + + result = xarray_reduce( + obj, + group, + func=func.__name__, + method=self._dask_groupby_kwargs, + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + expected_groups=(self._unique_coord.values,), + skipna=skipna, + min_count=min_count, + ) + result = self._maybe_restore_empty_groups(result) + # TODO: make this cleaner; the renaming happens in DatasetResample.map + if self._unique_coord.name == "__resample_dim__": + result = result.rename({"__resample_dim__": self._group_dim}) + return result + + else: + if include_skipna: + + def wrapped_func(self, dim=None, axis=None, skipna=None, **kwargs): # type: ignore[misc] + return self.reduce(func, dim=dim, skipna=skipna, **kwargs) + + else: + + def wrapped_func(self, dim=None, axis=None, **kwargs): # type: ignore[misc] + return self.reduce(func, dim=dim, **kwargs) + + return wrapped_func + + _reduce_extra_args_docstring = dedent( + """\ + dim : str or sequence of str, optional + Dimension(s) over which to apply `{name}`. + axis : int or sequence of int, optional + Axis(es) over which to apply `{name}`. Only one of the 'dim' + and 'axis' arguments can be supplied. If neither are supplied, then + `{name}` is calculated over axes.""" + ) + + _cum_extra_args_docstring = dedent( + """\ + dim : str or sequence of str, optional + Dimension over which to apply `{name}`. + axis : int or sequence of int, optional + Axis over which to apply `{name}`. Only one of the 'dim' + and 'axis' arguments can be supplied.""" + ) + def fillna(self, value): """Fill missing values in this object by group. @@ -888,166 +982,6 @@ def reduce_array(ar): return self.map(reduce_array, shortcut=shortcut) - if XARRAY_NUMPY_GROUPIES: - - def count(self, dim=None, *, keep_attrs=True, fill_value=None): - return xarray_reduce( - self._obj, - self._group, - func="count", - blockwise=False, - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - ) - - def sum(self, dim=None, *, keep_attrs=True, fill_value=None): - return xarray_reduce( - self._obj, - self._group, - func="sum", - blockwise=False, - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - ) - - def mean(self, dim=None, *, keep_attrs=True, fill_value=None): - return xarray_reduce( - self._obj, - self._group, - func="mean", - blockwise=False, - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - ) - - def std(self, dim=None, *, keep_attrs=True, fill_value=None): - return xarray_reduce( - self._obj, - self._group, - func="std", - blockwise=False, - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - ) - - def var(self, dim=None, *, keep_attrs=True, fill_value=None): - return xarray_reduce( - self._obj, - self._group, - func="var", - blockwise=False, - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - ) - - def max(self, dim=None, *, keep_attrs=True, fill_value=None): - return xarray_reduce( - self._obj, - self._group, - func="max", - blockwise=False, - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - ) - - def min(self, dim=None, *, keep_attrs=True, fill_value=None): - return xarray_reduce( - self._obj, - self._group, - func="min", - blockwise=False, - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - ) - - def argmin(self, dim=None, *, keep_attrs=True, fill_value=None): - return xarray_reduce( - self._obj, - self._group, - func="argmin", - blockwise=False, - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - ) - - def argmax(self, dim=None, *, keep_attrs=True, fill_value=None): - return xarray_reduce( - self._obj, - self._group, - func="argmax", - blockwise=False, - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - ) - - def first(self, dim=None, *, keep_attrs=True, fill_value=None): - return xarray_reduce( - self._obj, - self._group, - func="first", - blockwise=False, - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - ) - - def last(self, dim=None, *, keep_attrs=True, fill_value=None): - return xarray_reduce( - self._obj, - self._group, - func="last", - blockwise=False, - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - ) - - else: - - @classmethod - def _reduce_method( - cls, func: Callable, include_skipna: bool, numeric_only: bool - ): - if include_skipna: - - def wrapped_func(self, dim=None, axis=None, skipna=None, **kwargs): - return self.reduce(func, dim, axis, skipna=skipna, **kwargs) - - else: - - def wrapped_func(self, dim=None, axis=None, **kwargs): # type: ignore[misc] - return self.reduce(func, dim, axis, **kwargs) - - return wrapped_func - - _reduce_extra_args_docstring = dedent( - """\ - dim : str or sequence of str, optional - Dimension(s) over which to apply `{name}`. - axis : int or sequence of int, optional - Axis(es) over which to apply `{name}`. Only one of the 'dim' - and 'axis' arguments can be supplied. If neither are supplied, then - `{name}` is calculated over axes.""" - ) - - _cum_extra_args_docstring = dedent( - """\ - dim : str or sequence of str, optional - Dimension over which to apply `{name}`. - axis : int or sequence of int, optional - Axis over which to apply `{name}`. Only one of the 'dim' - and 'axis' arguments can be supplied.""" - ) - class DatasetGroupBy(GroupBy, DatasetGroupbyArithmetic): @@ -1116,145 +1050,6 @@ def _combine(self, applied): combined = self._maybe_unstack(combined) return combined - if XARRAY_NUMPY_GROUPIES: - - def sum(self, dim=None, *, keep_attrs=True, fill_value=None): - return xarray_reduce( - self._obj, - self._group, - func="sum", - blockwise=False, - dim=dim, - fill_value=fill_value, - ) - - def mean(self, dim=None, *, keep_attrs=True, fill_value=None): - return xarray_reduce( - self._obj, - self._group, - func="mean", - blockwise=False, - dim=dim, - fill_value=fill_value, - ) - - def std(self, dim=None, *, keep_attrs=True, fill_value=None): - return xarray_reduce( - self._obj, - self._group, - func="std", - blockwise=False, - dim=dim, - fill_value=fill_value, - ) - - def var(self, dim=None, *, keep_attrs=True, fill_value=None): - return xarray_reduce( - self._obj, - self._group, - func="var", - blockwise=False, - dim=dim, - fill_value=fill_value, - ) - - def max(self, dim=None, *, keep_attrs=True, fill_value=None): - return xarray_reduce( - self._obj, - self._group, - func="max", - blockwise=False, - dim=dim, - fill_value=fill_value, - ) - - def min(self, dim=None, *, keep_attrs=True, fill_value=None): - return xarray_reduce( - self._obj, - self._group, - func="min", - blockwise=False, - dim=dim, - fill_value=fill_value, - ) - - def argmin(self, dim=None, *, keep_attrs=True, fill_value=None): - return xarray_reduce( - self._obj, - self._group, - func="argmin", - blockwise=False, - dim=dim, - fill_value=fill_value, - ) - - def argmax(self, dim=None, *, keep_attrs=True, fill_value=None): - return xarray_reduce( - self._obj, - self._group, - func="argmax", - blockwise=False, - dim=dim, - fill_value=fill_value, - ) - - def first(self, dim=None, *, keep_attrs=True, fill_value=None): - return xarray_reduce( - self._obj, - self._group, - func="first", - blockwise=False, - dim=dim, - fill_value=fill_value, - ) - - def last(self, dim=None, *, keep_attrs=True, fill_value=None): - return xarray_reduce( - self._obj, - self._group, - func="last", - blockwise=False, - dim=dim, - fill_value=fill_value, - ) - - else: - - @classmethod - def _reduce_method( - cls, func: Callable, include_skipna: bool, numeric_only: bool - ): - if include_skipna: - - def wrapped_func(self, dim=None, axis=None, skipna=None, **kwargs): - return self.reduce(func, dim=dim, skipna=skipna, **kwargs) - - else: - - def wrapped_func(self, dim=None, axis=None, **kwargs): # type: ignore[misc] - return self.reduce(func, dim=dim, **kwargs) - - return wrapped_func - - _reduce_extra_args_docstring = dedent( - """\ - dim : str or sequence of str, optional - Dimension(s) over which to apply `{name}`. - axis : int or sequence of int, optional - Axis(es) over which to apply `{name}`. Only one of the 'dim' - and 'axis' arguments can be supplied. If neither are supplied, then - `{name}` is calculated over axes.""" - ) - - _cum_extra_args_docstring = dedent( - """\ - dim : str or sequence of str, optional - Dimension over which to apply `{name}`. - axis : int or sequence of int, optional - Axis over which to apply `{name}`. Only one of the 'dim' - and 'axis' arguments can be supplied.""" - ) - def reduce(self, func, dim=None, keep_attrs=None, **kwargs): """Reduce the items in this group by applying `func` along some dimension(s). @@ -1307,6 +1102,5 @@ def assign(self, **kwargs): return self.map(lambda ds: ds.assign(**kwargs)) -if not XARRAY_NUMPY_GROUPIES: - ops.inject_reduce_methods(DataArrayGroupBy) - ops.inject_reduce_methods(DatasetGroupBy) +ops.inject_reduce_methods(DataArrayGroupBy) +ops.inject_reduce_methods(DatasetGroupBy) From 58c1c6b7f6158af51553249389f6da1651c97b08 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 3 Oct 2021 16:06:57 +0530 Subject: [PATCH 010/138] Add _dask_groupby_kwargs --- xarray/core/groupby.py | 2 ++ xarray/core/resample.py | 3 +++ 2 files changed, 5 insertions(+) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index e92c71be8fd..43e15982002 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -269,6 +269,7 @@ class GroupBy: "_stacked_dim", "_unique_coord", "_dims", + "_dask_groupby_kwargs", ) def __init__( @@ -406,6 +407,7 @@ def __init__( self._inserted_dims = inserted_dims self._full_index = full_index self._restore_coord_dims = restore_coord_dims + self._dask_groupby_kwargs = {} # self._by = by # cached attributes diff --git a/xarray/core/resample.py b/xarray/core/resample.py index c7749a7e5ca..e966c62b3cd 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -174,6 +174,8 @@ def __init__(self, *args, dim=None, resample_dim=None, **kwargs): super().__init__(*args, **kwargs) + self._dask_groupby_kwargs = dict(method="blockwise") + def map(self, func, shortcut=False, args=(), **kwargs): """Apply a function to each array in the group and concatenate them together into a new array. @@ -262,6 +264,7 @@ def __init__(self, *args, dim=None, resample_dim=None, **kwargs): self._resample_dim = resample_dim super().__init__(*args, **kwargs) + self._dask_groupby_kwargs = dict(method="blockwise") def map(self, func, args=(), shortcut=None, **kwargs): """Apply a function over each Dataset in the groups generated for From 69fd5638c45fbfe374aba6494586bd8b48aee652 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 3 Oct 2021 17:30:10 +0530 Subject: [PATCH 011/138] Avoid forwarding DummyGroup objects --- xarray/core/groupby.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 43e15982002..4f81f7ce25c 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -570,7 +570,10 @@ def wrapped_func( name=self._unique_coord.name, ) else: - group = self._group + if isinstance(self._group, _DummyGroup): + group = self._group.name + else: + group = self._group # TODO: avoid stacking by default if self._stacked_dim is not None: From b1e3ab25e85ad3df302209c22639e6af11b1d443 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 3 Oct 2021 17:37:07 +0530 Subject: [PATCH 012/138] Raise error when reducing along indexed dimensions with squeeze=True --- xarray/core/groupby.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 4f81f7ce25c..9e68f951aaa 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -270,6 +270,7 @@ class GroupBy: "_unique_coord", "_dims", "_dask_groupby_kwargs", + "_squeeze", ) def __init__( @@ -408,6 +409,7 @@ def __init__( self._full_index = full_index self._restore_coord_dims = restore_coord_dims self._dask_groupby_kwargs = {} + self._squeeze = squeeze # self._by = by # cached attributes @@ -550,6 +552,18 @@ def wrapped_func( **kwargs, ): # type: ignore[misc] + # weird backcompat + # reducing along a unique indexed dimension with squeeze=True + # should raise an error + if ( + dim is None or dim == self._group.name + ) and self._group.name in self._obj.indexes: + index = self._obj.indexes[self._group.name] + if index.is_unique and self._squeeze: + raise ValueError( + f"cannot reduce over dimensions {self._group.name!r}" + ) + # TODO: only do this for resample, not general groupers... # this creates a label DataArray since resample doesn't do that somehow if isinstance(self._group_indices[0], slice): From 462e61b79f6ecfef24275622e84378673f9e9950 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 4 Oct 2021 10:16:15 +0530 Subject: [PATCH 013/138] Don't pass numeric_only to DataArray.reduce Tests pass! --- xarray/core/groupby.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 9e68f951aaa..bed3cb68d01 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -618,12 +618,32 @@ def wrapped_func( if include_skipna: def wrapped_func(self, dim=None, axis=None, skipna=None, **kwargs): # type: ignore[misc] - return self.reduce(func, dim=dim, skipna=skipna, **kwargs) + # DataArray.reduce not deal with numeric_only + from .dataarray import DataArray + + if isinstance(self._obj, DataArray): + add_kwargs = {} + else: + add_kwargs = {"numeric_only": numeric_only} + return self.reduce( + func, + dim=dim, + skipna=skipna, + **add_kwargs, + **kwargs, + ) else: def wrapped_func(self, dim=None, axis=None, **kwargs): # type: ignore[misc] - return self.reduce(func, dim=dim, **kwargs) + from .dataarray import DataArray + + # DataArray.reduce not deal with numeric_only + if isinstance(self._obj, DataArray): + add_kwargs = {} + else: + add_kwargs = {"numeric_only": numeric_only} + return self.reduce(func, dim=dim, **add_kwargs, **kwargs) return wrapped_func From 1d9a36053181aeedbc90c3eec5a3457327630771 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 4 Oct 2021 10:21:33 +0530 Subject: [PATCH 014/138] Add CI for now --- .github/workflows/ci.yaml | 1 + ci/requirements/environment.yml | 2 ++ 2 files changed, 3 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index e774803dda7..725e046cc87 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -53,6 +53,7 @@ jobs: fi echo "PYTHON_VERSION=${{ matrix.python-version }}" >> $GITHUB_ENV + echo "XARRAY_NUMPY_GROUPIES=1" >> $GITHUB_ENV - name: Cache conda uses: actions/cache@v2 diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index f64ca3677cc..c84dcc73665 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -46,3 +46,5 @@ dependencies: - zarr - pip: - numbagg + - numpy_groupies + - git+https://github.com:dcherian/dask_groupby.git From f4748ee80fc993f9fd9a8b4dd76d242c778b64ae Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 4 Oct 2021 10:28:08 +0530 Subject: [PATCH 015/138] typo --- ci/requirements/environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index c84dcc73665..762a37ec26b 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -47,4 +47,4 @@ dependencies: - pip: - numbagg - numpy_groupies - - git+https://github.com:dcherian/dask_groupby.git + - git+https://github.com/dcherian/dask_groupby.git From b97ffcb37b416e93c5481d6ce363f74e61e88dbc Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 4 Oct 2021 10:44:04 +0530 Subject: [PATCH 016/138] Fix windows env --- ci/requirements/environment-windows.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index 78ead40d5a2..7fa65b5c58f 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -42,3 +42,5 @@ dependencies: - zarr - pip: - numbagg + - dask_groupby + - git+https://github.com/dcherian/dask_groupby.git From 9b44db9e6d3db00ab8301fe426768925e8a2ff2f Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 4 Oct 2021 10:45:39 +0530 Subject: [PATCH 017/138] Fix keep_attrs test --- xarray/core/groupby.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index bed3cb68d01..c375709f4ec 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -547,11 +547,14 @@ def wrapped_func( axis=None, skipna=True, fill_value=None, - keep_attrs=True, + keep_attrs=None, min_count=None, **kwargs, ): # type: ignore[misc] + if keep_attrs is None: + keep_attrs = _get_keep_attrs(True) + # weird backcompat # reducing along a unique indexed dimension with squeeze=True # should raise an error From 262a3f5dd3a22f82a719d61ac7a95758fa1d2e9b Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 4 Oct 2021 11:10:44 +0530 Subject: [PATCH 018/138] Update ci/requirements/environment-windows.yml --- ci/requirements/environment-windows.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index 7fa65b5c58f..e1d08dc08a5 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -42,5 +42,5 @@ dependencies: - zarr - pip: - numbagg - - dask_groupby + - numpy_groupies - git+https://github.com/dcherian/dask_groupby.git From e3b3a00c6a1e1e8cc8e20c1f2a1051efaafe9297 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 5 Oct 2021 14:48:25 +0530 Subject: [PATCH 019/138] Fix resampling --- xarray/core/groupby.py | 3 ++- xarray/tests/test_groupby.py | 8 +++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index c375709f4ec..696946bab4b 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -603,13 +603,14 @@ def wrapped_func( obj, group, func=func.__name__, - method=self._dask_groupby_kwargs, dim=dim, fill_value=fill_value, keep_attrs=keep_attrs, expected_groups=(self._unique_coord.values,), skipna=skipna, min_count=min_count, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, ) result = self._maybe_restore_empty_groups(result) # TODO: make this cleaner; the renaming happens in DatasetResample.map diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index d48726e8304..4d2d1e8c98b 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1159,10 +1159,12 @@ def test_groupby_bins(self): expected = DataArray( [1, 5], dims="dim_0_bins", coords={"dim_0_bins": bin_coords} ) - # the problem with this is that it overwrites the dimensions of array! - # actual = array.groupby('dim_0', bins=bins).sum() - actual = array.groupby_bins("dim_0", bins).map(lambda x: x.sum()) + actual = array.groupby_bins("dim_0", bins=bins).sum() assert_identical(expected, actual) + + actual = array.groupby_bins("dim_0", bins=bins).map(lambda x: x.sum()) + assert_identical(expected, actual) + # make sure original array dims are unchanged assert len(array.dim_0) == 4 From faee02c7cb95dcc97a71e2fb0b2627c0ca49f2ed Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 5 Nov 2021 12:37:36 -0600 Subject: [PATCH 020/138] fix env stuff + remove env var --- .github/workflows/ci.yaml | 1 - ci/requirements/environment-windows.yml | 2 +- ci/requirements/environment.yml | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 725e046cc87..e774803dda7 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -53,7 +53,6 @@ jobs: fi echo "PYTHON_VERSION=${{ matrix.python-version }}" >> $GITHUB_ENV - echo "XARRAY_NUMPY_GROUPIES=1" >> $GITHUB_ENV - name: Cache conda uses: actions/cache@v2 diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index e42c01c6106..1f4302c0d4b 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -44,4 +44,4 @@ dependencies: - pip: - numbagg - numpy_groupies - - git+https://github.com/dcherian/dask_groupby.git + - dask_groupby diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index 5892f2bf108..ee25a48e5fc 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -48,4 +48,4 @@ dependencies: - pip: - numbagg - numpy_groupies - - git+https://github.com/dcherian/dask_groupby.git + - dask_groupby From 3608e9fa135ac93231cbf4f8bff007887a10e19d Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 5 Nov 2021 15:26:28 -0600 Subject: [PATCH 021/138] get working again --- setup.cfg | 2 + xarray/core/_reductions.py | 1257 +++++++++++++++++++--------- xarray/core/groupby.py | 189 ++--- xarray/core/options.py | 6 + xarray/util/generate_reductions.py | 32 +- 5 files changed, 932 insertions(+), 554 deletions(-) diff --git a/setup.cfg b/setup.cfg index bd123262cf7..f380892b204 100644 --- a/setup.cfg +++ b/setup.cfg @@ -100,6 +100,8 @@ accel = scipy bottleneck numbagg + numpy_groupies + dask_groupby parallel = dask[complete] diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 67fbbd482d0..de8cb20317e 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -5,6 +5,7 @@ from typing import Any, Callable, Hashable, Optional, Sequence, Union from . import duck_array_ops +from .options import OPTIONS from .types import T_DataArray, T_Dataset if sys.version_info >= (3, 8): @@ -33,6 +34,7 @@ def count( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -93,18 +95,29 @@ def count( :ref:`groupby` User guide on groupby operations. """ - return self.reduce( - duck_array_ops.count, - dim=dim, - numeric_only=False, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="count", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.count, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) def all( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -151,12 +164,6 @@ def all( da (time) bool True True True True True False >>> ds.groupby("labels").all() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) bool False True True See Also -------- @@ -165,18 +172,29 @@ def all( :ref:`groupby` User guide on groupby operations. """ - return self.reduce( - duck_array_ops.array_all, - dim=dim, - numeric_only=False, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="all", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.array_all, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) def any( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -223,12 +241,6 @@ def any( da (time) bool True True True True True False >>> ds.groupby("labels").any() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) bool True True True See Also -------- @@ -237,19 +249,30 @@ def any( :ref:`groupby` User guide on groupby operations. """ - return self.reduce( - duck_array_ops.array_any, - dim=dim, - numeric_only=False, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="any", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.array_any, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) def max( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -325,20 +348,32 @@ def max( :ref:`groupby` User guide on groupby operations. """ - return self.reduce( - duck_array_ops.max, - dim=dim, - skipna=skipna, - numeric_only=False, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="max", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.max, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) def min( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -414,20 +449,32 @@ def min( :ref:`groupby` User guide on groupby operations. """ - return self.reduce( - duck_array_ops.min, - dim=dim, - skipna=skipna, - numeric_only=False, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="min", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.min, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) def mean( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -503,14 +550,25 @@ def mean( :ref:`groupby` User guide on groupby operations. """ - return self.reduce( - duck_array_ops.mean, - dim=dim, - skipna=skipna, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="mean", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.mean, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) def prod( self: DatasetReduce, @@ -518,6 +576,7 @@ def prod( skipna: bool = True, min_count: Optional[int] = None, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -609,15 +668,27 @@ def prod( :ref:`groupby` User guide on groupby operations. """ - return self.reduce( - duck_array_ops.prod, - dim=dim, - skipna=skipna, - min_count=min_count, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="prod", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + min_count=min_count, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.prod, + dim=dim, + skipna=skipna, + min_count=min_count, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) def sum( self: DatasetReduce, @@ -625,6 +696,7 @@ def sum( skipna: bool = True, min_count: Optional[int] = None, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -716,21 +788,34 @@ def sum( :ref:`groupby` User guide on groupby operations. """ - return self.reduce( - duck_array_ops.sum, - dim=dim, - skipna=skipna, - min_count=min_count, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="sum", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + min_count=min_count, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.sum, + dim=dim, + skipna=skipna, + min_count=min_count, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) def std( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -806,20 +891,32 @@ def std( :ref:`groupby` User guide on groupby operations. """ - return self.reduce( - duck_array_ops.std, - dim=dim, - skipna=skipna, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="std", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.std, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) def var( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -895,20 +992,32 @@ def var( :ref:`groupby` User guide on groupby operations. """ - return self.reduce( - duck_array_ops.var, - dim=dim, - skipna=skipna, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="var", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) def median( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -960,22 +1069,10 @@ def median( da (time) float64 1.0 2.0 3.0 1.0 2.0 nan >>> ds.groupby("labels").median() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").median(skipna=False) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 2.0 2.0 See Also -------- @@ -984,14 +1081,25 @@ def median( :ref:`groupby` User guide on groupby operations. """ - return self.reduce( - duck_array_ops.median, - dim=dim, - skipna=skipna, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="median", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) class DatasetResampleReductions: @@ -1001,6 +1109,7 @@ def count( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -1061,18 +1170,29 @@ def count( :ref:`resampling` User guide on resampling operations. """ - return self.reduce( - duck_array_ops.count, - dim=dim, - numeric_only=False, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="count", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.count, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) def all( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -1119,12 +1239,6 @@ def all( da (time) bool True True True True True False >>> ds.resample(time="3M").all() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) bool True True False See Also -------- @@ -1133,18 +1247,29 @@ def all( :ref:`resampling` User guide on resampling operations. """ - return self.reduce( - duck_array_ops.array_all, - dim=dim, - numeric_only=False, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="all", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.array_all, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) def any( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -1191,12 +1316,6 @@ def any( da (time) bool True True True True True False >>> ds.resample(time="3M").any() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) bool True True True See Also -------- @@ -1205,19 +1324,30 @@ def any( :ref:`resampling` User guide on resampling operations. """ - return self.reduce( - duck_array_ops.array_any, - dim=dim, - numeric_only=False, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="any", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.array_any, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) def max( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -1293,20 +1423,32 @@ def max( :ref:`resampling` User guide on resampling operations. """ - return self.reduce( - duck_array_ops.max, - dim=dim, - skipna=skipna, - numeric_only=False, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="max", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.max, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) def min( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -1382,20 +1524,32 @@ def min( :ref:`resampling` User guide on resampling operations. """ - return self.reduce( - duck_array_ops.min, - dim=dim, - skipna=skipna, - numeric_only=False, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="min", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.min, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) def mean( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -1471,14 +1625,25 @@ def mean( :ref:`resampling` User guide on resampling operations. """ - return self.reduce( - duck_array_ops.mean, - dim=dim, - skipna=skipna, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="mean", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.mean, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) def prod( self: DatasetReduce, @@ -1486,6 +1651,7 @@ def prod( skipna: bool = True, min_count: Optional[int] = None, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -1577,15 +1743,27 @@ def prod( :ref:`resampling` User guide on resampling operations. """ - return self.reduce( - duck_array_ops.prod, - dim=dim, - skipna=skipna, - min_count=min_count, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="prod", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + min_count=min_count, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.prod, + dim=dim, + skipna=skipna, + min_count=min_count, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) def sum( self: DatasetReduce, @@ -1593,6 +1771,7 @@ def sum( skipna: bool = True, min_count: Optional[int] = None, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -1684,21 +1863,34 @@ def sum( :ref:`resampling` User guide on resampling operations. """ - return self.reduce( - duck_array_ops.sum, - dim=dim, - skipna=skipna, - min_count=min_count, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="sum", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + min_count=min_count, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.sum, + dim=dim, + skipna=skipna, + min_count=min_count, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) def std( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -1774,20 +1966,32 @@ def std( :ref:`resampling` User guide on resampling operations. """ - return self.reduce( - duck_array_ops.std, - dim=dim, - skipna=skipna, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="std", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.std, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) def var( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -1863,20 +2067,32 @@ def var( :ref:`resampling` User guide on resampling operations. """ - return self.reduce( - duck_array_ops.var, - dim=dim, - skipna=skipna, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="var", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) def median( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -1928,22 +2144,10 @@ def median( da (time) float64 1.0 2.0 3.0 1.0 2.0 nan >>> ds.resample(time="3M").median() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").median(skipna=False) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 2.0 nan See Also -------- @@ -1952,14 +2156,25 @@ def median( :ref:`resampling` User guide on resampling operations. """ - return self.reduce( - duck_array_ops.median, - dim=dim, - skipna=skipna, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="median", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) class DataArrayReduce(Protocol): @@ -1982,6 +2197,7 @@ def count( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -2037,17 +2253,28 @@ def count( :ref:`groupby` User guide on groupby operations. """ - return self.reduce( - duck_array_ops.count, - dim=dim, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="count", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.count, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) def all( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -2091,10 +2318,6 @@ def all( labels (time) >> da.groupby("labels").all() - - array([False, True, True]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' See Also -------- @@ -2103,17 +2326,28 @@ def all( :ref:`groupby` User guide on groupby operations. """ - return self.reduce( - duck_array_ops.array_all, - dim=dim, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="all", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.array_all, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) def any( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -2157,10 +2391,6 @@ def any( labels (time) >> da.groupby("labels").any() - - array([ True, True, True]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' See Also -------- @@ -2169,18 +2399,29 @@ def any( :ref:`groupby` User guide on groupby operations. """ - return self.reduce( - duck_array_ops.array_any, - dim=dim, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="any", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.array_any, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) def max( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -2249,19 +2490,31 @@ def max( :ref:`groupby` User guide on groupby operations. """ - return self.reduce( - duck_array_ops.max, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="max", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.max, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) def min( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -2330,19 +2583,31 @@ def min( :ref:`groupby` User guide on groupby operations. """ - return self.reduce( - duck_array_ops.min, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="min", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.min, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) def mean( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -2411,13 +2676,24 @@ def mean( :ref:`groupby` User guide on groupby operations. """ - return self.reduce( - duck_array_ops.mean, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="mean", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.mean, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) def prod( self: DataArrayReduce, @@ -2425,6 +2701,7 @@ def prod( skipna: bool = True, min_count: Optional[int] = None, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -2507,14 +2784,26 @@ def prod( :ref:`groupby` User guide on groupby operations. """ - return self.reduce( - duck_array_ops.prod, - dim=dim, - skipna=skipna, - min_count=min_count, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="prod", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + min_count=min_count, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.prod, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) def sum( self: DataArrayReduce, @@ -2522,6 +2811,7 @@ def sum( skipna: bool = True, min_count: Optional[int] = None, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -2604,20 +2894,33 @@ def sum( :ref:`groupby` User guide on groupby operations. """ - return self.reduce( - duck_array_ops.sum, - dim=dim, - skipna=skipna, - min_count=min_count, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="sum", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + min_count=min_count, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.sum, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) def std( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -2686,19 +2989,31 @@ def std( :ref:`groupby` User guide on groupby operations. """ - return self.reduce( - duck_array_ops.std, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="std", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.std, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) def var( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -2767,19 +3082,31 @@ def var( :ref:`groupby` User guide on groupby operations. """ - return self.reduce( - duck_array_ops.var, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="var", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) def median( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -2828,18 +3155,10 @@ def median( labels (time) >> da.groupby("labels").median() - - array([1., 2., 2.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").median(skipna=False) - - array([nan, 2., 2.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' See Also -------- @@ -2848,13 +3167,24 @@ def median( :ref:`groupby` User guide on groupby operations. """ - return self.reduce( - duck_array_ops.median, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="median", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) class DataArrayResampleReductions: @@ -2864,6 +3194,7 @@ def count( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -2919,17 +3250,28 @@ def count( :ref:`resampling` User guide on resampling operations. """ - return self.reduce( - duck_array_ops.count, - dim=dim, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="count", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.count, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) def all( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -2973,10 +3315,6 @@ def all( labels (time) >> da.resample(time="3M").all() - - array([ True, True, False]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 See Also -------- @@ -2985,17 +3323,28 @@ def all( :ref:`resampling` User guide on resampling operations. """ - return self.reduce( - duck_array_ops.array_all, - dim=dim, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="all", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.array_all, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) def any( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -3039,10 +3388,6 @@ def any( labels (time) >> da.resample(time="3M").any() - - array([ True, True, True]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 See Also -------- @@ -3051,18 +3396,29 @@ def any( :ref:`resampling` User guide on resampling operations. """ - return self.reduce( - duck_array_ops.array_any, - dim=dim, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="any", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.array_any, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) def max( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -3131,19 +3487,31 @@ def max( :ref:`resampling` User guide on resampling operations. """ - return self.reduce( - duck_array_ops.max, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="max", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.max, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) def min( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -3212,19 +3580,31 @@ def min( :ref:`resampling` User guide on resampling operations. """ - return self.reduce( - duck_array_ops.min, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="min", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.min, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) def mean( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -3293,13 +3673,24 @@ def mean( :ref:`resampling` User guide on resampling operations. """ - return self.reduce( - duck_array_ops.mean, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="mean", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.mean, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) def prod( self: DataArrayReduce, @@ -3307,6 +3698,7 @@ def prod( skipna: bool = True, min_count: Optional[int] = None, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -3389,14 +3781,26 @@ def prod( :ref:`resampling` User guide on resampling operations. """ - return self.reduce( - duck_array_ops.prod, - dim=dim, - skipna=skipna, - min_count=min_count, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="prod", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + min_count=min_count, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.prod, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) def sum( self: DataArrayReduce, @@ -3404,6 +3808,7 @@ def sum( skipna: bool = True, min_count: Optional[int] = None, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -3486,20 +3891,33 @@ def sum( :ref:`resampling` User guide on resampling operations. """ - return self.reduce( - duck_array_ops.sum, - dim=dim, - skipna=skipna, - min_count=min_count, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="sum", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + min_count=min_count, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.sum, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) def std( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -3568,19 +3986,31 @@ def std( :ref:`resampling` User guide on resampling operations. """ - return self.reduce( - duck_array_ops.std, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="std", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.std, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) def var( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -3649,19 +4079,31 @@ def var( :ref:`resampling` User guide on resampling operations. """ - return self.reduce( - duck_array_ops.var, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="var", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) def median( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -3710,18 +4152,10 @@ def median( labels (time) >> da.resample(time="3M").median() - - array([1., 2., 2.]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").median(skipna=False) - - array([ 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 See Also -------- @@ -3730,10 +4164,21 @@ def median( :ref:`resampling` User guide on resampling operations. """ - return self.reduce( - duck_array_ops.median, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="median", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 86c912e304f..24656104138 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -1,8 +1,5 @@ import datetime -import os import warnings -from textwrap import dedent -from typing import Callable import numpy as np import pandas as pd @@ -25,14 +22,6 @@ ) from .variable import IndexVariable, Variable, as_variable -XARRAY_NUMPY_GROUPIES = os.environ.get("XARRAY_NUMPY_GROUPIES", "False").lower() in ( - "true", - "1", -) - -if XARRAY_NUMPY_GROUPIES: - from dask_groupby.xarray import xarray_reduce - def check_reduce_dims(reduce_dims, dimensions): @@ -538,138 +527,62 @@ def _maybe_unstack(self, obj): obj._indexes = propagate_indexes(obj._indexes, exclude=self._inserted_dims) return obj - @classmethod - def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool): - if XARRAY_NUMPY_GROUPIES: - - def wrapped_func( - self, - dim=None, - axis=None, - skipna=True, - fill_value=None, - keep_attrs=None, - min_count=None, - **kwargs, - ): # type: ignore[misc] - - if keep_attrs is None: - keep_attrs = _get_keep_attrs(True) - - # weird backcompat - # reducing along a unique indexed dimension with squeeze=True - # should raise an error - if ( - dim is None or dim == self._group.name - ) and self._group.name in self._obj.indexes: - index = self._obj.indexes[self._group.name] - if index.is_unique and self._squeeze: - raise ValueError( - f"cannot reduce over dimensions {self._group.name!r}" - ) - - # TODO: only do this for resample, not general groupers... - # this creates a label DataArray since resample doesn't do that somehow - if isinstance(self._group_indices[0], slice): - from .dataarray import DataArray - - tostack = [] - for idx, slicer in zip( - self._unique_coord.data, self._group_indices - ): - if slicer.stop is None: - stop = self._obj.sizes[self._group_dim] - else: - stop = slicer.stop - tostack.append(np.full((stop - slicer.start,), fill_value=idx)) - group = DataArray( - np.hstack(tostack), - dims=(self._group_dim,), - name=self._unique_coord.name, - ) - else: - if isinstance(self._group, _DummyGroup): - group = self._group.name - else: - group = self._group - - # TODO: avoid stacking by default - if self._stacked_dim is not None: - obj = self._obj.unstack(self._stacked_dim) - group = group.unstack(self._stacked_dim) - else: - obj = self._obj - - result = xarray_reduce( - obj, - group, - func=func.__name__, - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - expected_groups=(self._unique_coord.values,), - skipna=skipna, - min_count=min_count, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, - ) - result = self._maybe_restore_empty_groups(result) - # TODO: make this cleaner; the renaming happens in DatasetResample.map - if self._unique_coord.name == "__resample_dim__": - result = result.rename({"__resample_dim__": self._group_dim}) - return result + def _dask_groupby_reduce(self, dim, **kwargs): + from dask_groupby.xarray import xarray_reduce + # weird backcompat + # reducing along a unique indexed dimension with squeeze=True + # should raise an error + if ( + dim is None or dim == self._group.name + ) and self._group.name in self._obj.xindexes: + index = self._obj.indexes[self._group.name] + if index.is_unique and self._squeeze: + raise ValueError(f"cannot reduce over dimensions {self._group.name!r}") + + # TODO: only do this for resample, not general groupers... + # this creates a label DataArray since resample doesn't do that somehow + if isinstance(self._group_indices[0], slice): + from .dataarray import DataArray + + tostack = [] + for idx, slicer in zip(self._unique_coord.data, self._group_indices): + if slicer.stop is None: + stop = self._obj.sizes[self._group_dim] + else: + stop = slicer.stop + tostack.append(np.full((stop - slicer.start,), fill_value=idx)) + group = DataArray( + np.hstack(tostack), + dims=(self._group_dim,), + name=self._unique_coord.name, + ) else: - if include_skipna: - - def wrapped_func(self, dim=None, axis=None, skipna=None, **kwargs): # type: ignore[misc] - # DataArray.reduce not deal with numeric_only - from .dataarray import DataArray - - if isinstance(self._obj, DataArray): - add_kwargs = {} - else: - add_kwargs = {"numeric_only": numeric_only} - return self.reduce( - func, - dim=dim, - skipna=skipna, - **add_kwargs, - **kwargs, - ) - + if isinstance(self._group, _DummyGroup): + group = self._group.name else: + group = self._group - def wrapped_func(self, dim=None, axis=None, **kwargs): # type: ignore[misc] - from .dataarray import DataArray - - # DataArray.reduce not deal with numeric_only - if isinstance(self._obj, DataArray): - add_kwargs = {} - else: - add_kwargs = {"numeric_only": numeric_only} - return self.reduce(func, dim=dim, **add_kwargs, **kwargs) - - return wrapped_func - - _reduce_extra_args_docstring = dedent( - """\ - dim : str or sequence of str, optional - Dimension(s) over which to apply `{name}`. - axis : int or sequence of int, optional - Axis(es) over which to apply `{name}`. Only one of the 'dim' - and 'axis' arguments can be supplied. If neither are supplied, then - `{name}` is calculated over axes.""" - ) + # TODO: avoid stacking by default + if self._stacked_dim is not None: + obj = self._obj.unstack(self._stacked_dim) + group = group.unstack(self._stacked_dim) + else: + obj = self._obj - _cum_extra_args_docstring = dedent( - """\ - dim : str or sequence of str, optional - Dimension over which to apply `{name}`. - axis : int or sequence of int, optional - Axis over which to apply `{name}`. Only one of the 'dim' - and 'axis' arguments can be supplied.""" - ) + result = xarray_reduce( + obj, + group, + dim=dim, + expected_groups=(self._unique_coord.values,), + **kwargs, + ) + + result = self._maybe_restore_empty_groups(result) + # TODO: make this cleaner; the renaming happens in DatasetResample.map + if self._unique_coord.name == "__resample_dim__": + result = result.rename(dict(__resample_dim__=self._group_dim)) + return result def fillna(self, value): """Fill missing values in this object by group. diff --git a/xarray/core/options.py b/xarray/core/options.py index 90018c51807..df640d16025 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -35,6 +35,7 @@ class T_Options(TypedDict): keep_attrs: Literal["default", True, False] warn_for_unclosed_files: bool use_bottleneck: bool + use_numpy_groupies: bool OPTIONS: T_Options = { @@ -52,6 +53,7 @@ class T_Options(TypedDict): "file_cache_maxsize": 128, "keep_attrs": "default", "use_bottleneck": True, + "use_numpy_groupies": True, "warn_for_unclosed_files": False, } @@ -76,6 +78,7 @@ def _positive_integer(value): "file_cache_maxsize": _positive_integer, "keep_attrs": lambda choice: choice in [True, False, "default"], "use_bottleneck": lambda value: isinstance(value, bool), + "use_numpy_groupies": lambda value: isinstance(value, bool), "warn_for_unclosed_files": lambda value: isinstance(value, bool), } @@ -183,6 +186,9 @@ class set_options: use_bottleneck : bool, default: True Whether to use ``bottleneck`` to accelerate 1D reductions and 1D rolling reduction operations. + use_numpy_groupies : bool, default: True + Whether to use ``numpy_groupies`` and ``dask_groupby`` to + accelerate groupby and resampling reductions. warn_for_unclosed_files : bool, default: False Whether or not to issue a warning when unclosed files are deallocated. This is mostly useful for debugging. diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 72449195d1e..cd5665fb7c5 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -26,6 +26,7 @@ from typing import Any, Callable, Hashable, Optional, Sequence, Union from . import duck_array_ops +from .options import OPTIONS from .types import T_DataArray, T_Dataset if sys.version_info >= (3, 8): @@ -98,6 +99,7 @@ def {method}( self: {obj}Reduce, dim: Union[None, Hashable, Sequence[Hashable]] = None,{skip_na.kwarg}{min_count.kwarg} keep_attrs: bool = None, + fill_value=None, **kwargs, ) -> T_{obj}: """ @@ -132,12 +134,22 @@ def {method}( :ref:`{docref}` User guide on {docref} operations. """ - return self.reduce( - duck_array_ops.{array_method}, - dim=dim,{skip_na.call}{min_count.call}{numeric_only_call} - keep_attrs=keep_attrs, - **kwargs, - )''' + if OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="{method}", + dim=dim, + fill_value=fill_value, + keep_attrs=keep_attrs,{skip_na.call}{min_count.call} + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.{array_method}, + dim=dim,{skip_na.call}{min_count.call}{numeric_only_call} + keep_attrs=keep_attrs, + **kwargs, + )''' def generate_groupby_example(obj: str, cls: str, method: str): @@ -211,9 +223,9 @@ def generate_method( if obj == "Dataset": if method in NUMERIC_ONLY_METHODS: - numeric_only_call = "\n numeric_only=True," + numeric_only_call = "\n numeric_only=True," else: - numeric_only_call = "\n numeric_only=False," + numeric_only_call = "\n numeric_only=False," else: numeric_only_call = "" @@ -222,7 +234,7 @@ def generate_method( skip_na = kwarg( docs=textwrap.indent(_SKIPNA_DOCSTRING, " "), kwarg="\n skipna: bool = True,", - call="\n skipna=skipna,", + call="\n skipna=skipna,", ) else: skip_na = kwarg(docs="", kwarg="", call="") @@ -231,7 +243,7 @@ def generate_method( min_count = kwarg( docs=textwrap.indent(_MINCOUNT_DOCSTRING, " "), kwarg="\n min_count: Optional[int] = None,", - call="\n min_count=min_count,", + call="\n min_count=min_count,", ) else: min_count = kwarg(docs="", kwarg="", call="") From ad25f78be0250de7cc574c7941b2149015dde26c Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 5 Nov 2021 19:52:36 -0600 Subject: [PATCH 022/138] Add to asv env --- asv_bench/asv.conf.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index 83a2aa9f010..9eb81b2c166 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -65,6 +65,8 @@ "bottleneck": ["", null], "dask": [""], "distributed": [""], + "dask_groupby": [""], + "numpy_groupies": [""], }, From 932b9a5d668278019bbd75ea23582ff28a463b91 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 5 Nov 2021 21:27:23 -0600 Subject: [PATCH 023/138] Separate out median --- xarray/core/groupby.py | 172 +++++++++++++++++++++++++++++++++++++++ xarray/core/resample.py | 173 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 345 insertions(+) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 24656104138..894127038f5 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -1,5 +1,6 @@ import datetime import warnings +from typing import Hashable, Sequence, Union import numpy as np import pandas as pd @@ -12,6 +13,7 @@ from .indexes import propagate_indexes from .options import _get_keep_attrs from .pycompat import integer_types +from .types import T_DataArray, T_Dataset from .utils import ( either_dict_or_kwargs, hashable, @@ -943,6 +945,87 @@ def reduce_array(ar): class DataArrayGroupBy(DataArrayGroupByBase, DataArrayGroupByReductions): __slots__ = () + def median( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``median`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").median() + + array([1., 2., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.groupby("labels").median(skipna=False) + + array([nan, 2., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + See Also + -------- + numpy.median + DataArray.median + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + class DatasetGroupByBase(GroupBy, DatasetGroupbyArithmetic): @@ -1065,3 +1148,92 @@ def assign(self, **kwargs): class DatasetGroupBy(DatasetGroupByBase, DatasetGroupByReductions): __slots__ = () + + def median( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``median`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").median() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 1.0 2.0 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.groupby("labels").median(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 2.0 + + See Also + -------- + numpy.median + Dataset.median + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) diff --git a/xarray/core/resample.py b/xarray/core/resample.py index 4e1579ca109..8ab2f499e87 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -1,7 +1,10 @@ import warnings +from typing import Hashable, Sequence, Union +from . import duck_array_ops from ._reductions import DataArrayResampleReductions, DatasetResampleReductions from .groupby import DataArrayGroupByBase, DatasetGroupByBase +from .types import T_DataArray, T_Dataset RESAMPLE_DIM = "__resample_dim__" @@ -177,6 +180,87 @@ def __init__(self, *args, dim=None, resample_dim=None, **kwargs): self._dask_groupby_kwargs = dict(method="blockwise") + def median( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``median`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").median() + + array([1., 2., 2.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.resample(time="3M").median(skipna=False) + + array([ 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + See Also + -------- + numpy.median + DataArray.median + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + def map(self, func, shortcut=False, args=(), **kwargs): """Apply a function to each array in the group and concatenate them together into a new array. @@ -267,6 +351,95 @@ def __init__(self, *args, dim=None, resample_dim=None, **kwargs): super().__init__(*args, **kwargs) self._dask_groupby_kwargs = dict(method="blockwise") + def median( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = True, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``median`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").median() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 2.0 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.resample(time="3M").median(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 2.0 nan + + See Also + -------- + numpy.median + Dataset.median + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + def map(self, func, args=(), shortcut=None, **kwargs): """Apply a function over each Dataset in the groups generated for resampling and concatenate them together into a new Dataset. From ac85e72607594daf014e6afa50966289d5c1cc7f Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 5 Nov 2021 21:31:22 -0600 Subject: [PATCH 024/138] make dask_groupby actually optional --- xarray/core/_reductions.py | 474 ++++++----------------------- xarray/util/generate_reductions.py | 12 +- 2 files changed, 95 insertions(+), 391 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index de8cb20317e..2f61ff04b8e 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -14,6 +14,12 @@ from typing_extensions import Protocol +try: + import dask_groupby +except ImportError: + dask_groupby = None + + class DatasetReduce(Protocol): def reduce( self, @@ -95,7 +101,8 @@ def count( :ref:`groupby` User guide on groupby operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="count", dim=dim, @@ -172,7 +179,8 @@ def all( :ref:`groupby` User guide on groupby operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="all", dim=dim, @@ -249,7 +257,8 @@ def any( :ref:`groupby` User guide on groupby operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="any", dim=dim, @@ -348,7 +357,8 @@ def max( :ref:`groupby` User guide on groupby operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="max", dim=dim, @@ -449,7 +459,8 @@ def min( :ref:`groupby` User guide on groupby operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="min", dim=dim, @@ -550,7 +561,8 @@ def mean( :ref:`groupby` User guide on groupby operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="mean", dim=dim, @@ -668,7 +680,8 @@ def prod( :ref:`groupby` User guide on groupby operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="prod", dim=dim, @@ -788,7 +801,8 @@ def sum( :ref:`groupby` User guide on groupby operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="sum", dim=dim, @@ -891,7 +905,8 @@ def std( :ref:`groupby` User guide on groupby operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="std", dim=dim, @@ -992,7 +1007,8 @@ def var( :ref:`groupby` User guide on groupby operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="var", dim=dim, @@ -1012,95 +1028,6 @@ def var( **kwargs, ) - def median( - self: DatasetReduce, - dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, - keep_attrs: bool = None, - fill_value=None, - **kwargs, - ) -> T_Dataset: - """ - Reduce this Dataset's data by applying ``median`` along some dimension(s). - - Parameters - ---------- - dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False (default), the new object will be - returned without attributes. - **kwargs : dict - Additional keyword arguments passed on to the appropriate array - function for calculating ``median`` on this object's data. - - Returns - ------- - reduced : Dataset - New Dataset with ``median`` applied to its data and the - indicated dimension(s) removed - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").median() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.groupby("labels").median(skipna=False) - - See Also - -------- - numpy.median - Dataset.median - :ref:`groupby` - User guide on groupby operations. - """ - if OPTIONS["use_numpy_groupies"]: - return self._dask_groupby_reduce( - func="median", - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - skipna=skipna, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, - ) - else: - return self.reduce( - duck_array_ops.median, - dim=dim, - skipna=skipna, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) - class DatasetResampleReductions: __slots__ = () @@ -1170,7 +1097,8 @@ def count( :ref:`resampling` User guide on resampling operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="count", dim=dim, @@ -1247,7 +1175,8 @@ def all( :ref:`resampling` User guide on resampling operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="all", dim=dim, @@ -1324,7 +1253,8 @@ def any( :ref:`resampling` User guide on resampling operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="any", dim=dim, @@ -1423,7 +1353,8 @@ def max( :ref:`resampling` User guide on resampling operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="max", dim=dim, @@ -1524,7 +1455,8 @@ def min( :ref:`resampling` User guide on resampling operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="min", dim=dim, @@ -1625,7 +1557,8 @@ def mean( :ref:`resampling` User guide on resampling operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="mean", dim=dim, @@ -1743,7 +1676,8 @@ def prod( :ref:`resampling` User guide on resampling operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="prod", dim=dim, @@ -1863,7 +1797,8 @@ def sum( :ref:`resampling` User guide on resampling operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="sum", dim=dim, @@ -1966,7 +1901,8 @@ def std( :ref:`resampling` User guide on resampling operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="std", dim=dim, @@ -2067,7 +2003,8 @@ def var( :ref:`resampling` User guide on resampling operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="var", dim=dim, @@ -2087,95 +2024,6 @@ def var( **kwargs, ) - def median( - self: DatasetReduce, - dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, - keep_attrs: bool = None, - fill_value=None, - **kwargs, - ) -> T_Dataset: - """ - Reduce this Dataset's data by applying ``median`` along some dimension(s). - - Parameters - ---------- - dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False (default), the new object will be - returned without attributes. - **kwargs : dict - Additional keyword arguments passed on to the appropriate array - function for calculating ``median`` on this object's data. - - Returns - ------- - reduced : Dataset - New Dataset with ``median`` applied to its data and the - indicated dimension(s) removed - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").median() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.resample(time="3M").median(skipna=False) - - See Also - -------- - numpy.median - Dataset.median - :ref:`resampling` - User guide on resampling operations. - """ - if OPTIONS["use_numpy_groupies"]: - return self._dask_groupby_reduce( - func="median", - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - skipna=skipna, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, - ) - else: - return self.reduce( - duck_array_ops.median, - dim=dim, - skipna=skipna, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) - class DataArrayReduce(Protocol): def reduce( @@ -2253,7 +2101,8 @@ def count( :ref:`groupby` User guide on groupby operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="count", dim=dim, @@ -2326,7 +2175,8 @@ def all( :ref:`groupby` User guide on groupby operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="all", dim=dim, @@ -2399,7 +2249,8 @@ def any( :ref:`groupby` User guide on groupby operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="any", dim=dim, @@ -2490,7 +2341,8 @@ def max( :ref:`groupby` User guide on groupby operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="max", dim=dim, @@ -2583,7 +2435,8 @@ def min( :ref:`groupby` User guide on groupby operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="min", dim=dim, @@ -2676,7 +2529,8 @@ def mean( :ref:`groupby` User guide on groupby operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="mean", dim=dim, @@ -2784,7 +2638,8 @@ def prod( :ref:`groupby` User guide on groupby operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="prod", dim=dim, @@ -2894,7 +2749,8 @@ def sum( :ref:`groupby` User guide on groupby operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="sum", dim=dim, @@ -2989,7 +2845,8 @@ def std( :ref:`groupby` User guide on groupby operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="std", dim=dim, @@ -3082,7 +2939,8 @@ def var( :ref:`groupby` User guide on groupby operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="var", dim=dim, @@ -3101,91 +2959,6 @@ def var( **kwargs, ) - def median( - self: DataArrayReduce, - dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, - keep_attrs: bool = None, - fill_value=None, - **kwargs, - ) -> T_DataArray: - """ - Reduce this DataArray's data by applying ``median`` along some dimension(s). - - Parameters - ---------- - dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False (default), the new object will be - returned without attributes. - **kwargs : dict - Additional keyword arguments passed on to the appropriate array - function for calculating ``median`` on this object's data. - - Returns - ------- - reduced : DataArray - New DataArray with ``median`` applied to its data and the - indicated dimension(s) removed - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").median() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.groupby("labels").median(skipna=False) - - See Also - -------- - numpy.median - DataArray.median - :ref:`groupby` - User guide on groupby operations. - """ - if OPTIONS["use_numpy_groupies"]: - return self._dask_groupby_reduce( - func="median", - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - skipna=skipna, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, - ) - else: - return self.reduce( - duck_array_ops.median, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) - class DataArrayResampleReductions: __slots__ = () @@ -3250,7 +3023,8 @@ def count( :ref:`resampling` User guide on resampling operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="count", dim=dim, @@ -3323,7 +3097,8 @@ def all( :ref:`resampling` User guide on resampling operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="all", dim=dim, @@ -3396,7 +3171,8 @@ def any( :ref:`resampling` User guide on resampling operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="any", dim=dim, @@ -3487,7 +3263,8 @@ def max( :ref:`resampling` User guide on resampling operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="max", dim=dim, @@ -3580,7 +3357,8 @@ def min( :ref:`resampling` User guide on resampling operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="min", dim=dim, @@ -3673,7 +3451,8 @@ def mean( :ref:`resampling` User guide on resampling operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="mean", dim=dim, @@ -3781,7 +3560,8 @@ def prod( :ref:`resampling` User guide on resampling operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="prod", dim=dim, @@ -3891,7 +3671,8 @@ def sum( :ref:`resampling` User guide on resampling operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="sum", dim=dim, @@ -3986,7 +3767,8 @@ def std( :ref:`resampling` User guide on resampling operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="std", dim=dim, @@ -4079,7 +3861,8 @@ def var( :ref:`resampling` User guide on resampling operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="var", dim=dim, @@ -4097,88 +3880,3 @@ def var( keep_attrs=keep_attrs, **kwargs, ) - - def median( - self: DataArrayReduce, - dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, - keep_attrs: bool = None, - fill_value=None, - **kwargs, - ) -> T_DataArray: - """ - Reduce this DataArray's data by applying ``median`` along some dimension(s). - - Parameters - ---------- - dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False (default), the new object will be - returned without attributes. - **kwargs : dict - Additional keyword arguments passed on to the appropriate array - function for calculating ``median`` on this object's data. - - Returns - ------- - reduced : DataArray - New DataArray with ``median`` applied to its data and the - indicated dimension(s) removed - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").median() - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.resample(time="3M").median(skipna=False) - - See Also - -------- - numpy.median - DataArray.median - :ref:`resampling` - User guide on resampling operations. - """ - if OPTIONS["use_numpy_groupies"]: - return self._dask_groupby_reduce( - func="median", - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - skipna=skipna, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, - ) - else: - return self.reduce( - duck_array_ops.median, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index cd5665fb7c5..66b51dbe1bb 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -32,7 +32,13 @@ if sys.version_info >= (3, 8): from typing import Protocol else: - from typing_extensions import Protocol''' + from typing_extensions import Protocol + + +try: + import dask_groupby +except ImportError: + dask_groupby = None''' OBJ_PREAMBLE = """ @@ -79,7 +85,6 @@ class {obj}{cls}Reductions: "sum", "std", "var", - "median", ] NAN_CUM_METHODS = ["cumsum", "cumprod"] MIN_COUNT_METHODS = ["prod", "sum"] @@ -134,7 +139,8 @@ def {method}( :ref:`{docref}` User guide on {docref} operations. """ - if OPTIONS["use_numpy_groupies"]: + + if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="{method}", dim=dim, From d238459ae2bbe2a38f855e12ca4752c2fd28051c Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 5 Nov 2021 21:50:30 -0600 Subject: [PATCH 025/138] any,all --- ci/requirements/environment-windows.yml | 2 +- ci/requirements/environment.yml | 2 +- xarray/core/_reductions.py | 40 +++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index 1f4302c0d4b..c65557e5803 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -44,4 +44,4 @@ dependencies: - pip: - numbagg - numpy_groupies - - dask_groupby + - git+https://github.com:dcherian/dask_groupby.git diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index ee25a48e5fc..cd88d26c8c6 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -48,4 +48,4 @@ dependencies: - pip: - numbagg - numpy_groupies - - dask_groupby + - git+https://github.com:dcherian/dask_groupby.git diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 2f61ff04b8e..b221038da61 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -171,6 +171,12 @@ def all( da (time) bool True True True True True False >>> ds.groupby("labels").all() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) bool False True True See Also -------- @@ -249,6 +255,12 @@ def any( da (time) bool True True True True True False >>> ds.groupby("labels").any() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) bool True True True See Also -------- @@ -1167,6 +1179,12 @@ def all( da (time) bool True True True True True False >>> ds.resample(time="3M").all() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) bool True True False See Also -------- @@ -1245,6 +1263,12 @@ def any( da (time) bool True True True True True False >>> ds.resample(time="3M").any() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) bool True True True See Also -------- @@ -2167,6 +2191,10 @@ def all( labels (time) >> da.groupby("labels").all() + + array([False, True, True]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' See Also -------- @@ -2241,6 +2269,10 @@ def any( labels (time) >> da.groupby("labels").any() + + array([ True, True, True]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' See Also -------- @@ -3089,6 +3121,10 @@ def all( labels (time) >> da.resample(time="3M").all() + + array([ True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 See Also -------- @@ -3163,6 +3199,10 @@ def any( labels (time) >> da.resample(time="3M").any() + + array([ True, True, True]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 See Also -------- From a2168df2d595c4220a989fb99569196e0951d92f Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 5 Nov 2021 22:09:22 -0600 Subject: [PATCH 026/138] typo again --- ci/requirements/environment-windows.yml | 2 +- ci/requirements/environment.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index c65557e5803..e42c01c6106 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -44,4 +44,4 @@ dependencies: - pip: - numbagg - numpy_groupies - - git+https://github.com:dcherian/dask_groupby.git + - git+https://github.com/dcherian/dask_groupby.git diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index cd88d26c8c6..5892f2bf108 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -48,4 +48,4 @@ dependencies: - pip: - numbagg - numpy_groupies - - git+https://github.com:dcherian/dask_groupby.git + - git+https://github.com/dcherian/dask_groupby.git From 6b9a81a6fbe3ba460d905f0e92105d8e25af3ebb Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 7 Nov 2021 20:35:52 -0700 Subject: [PATCH 027/138] Better generator for reductions. --- xarray/core/_reductions.py | 158 ++++------ xarray/util/generate_reductions.py | 443 ++++++++++++++++------------- 2 files changed, 303 insertions(+), 298 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 67fbbd482d0..a2108f54a64 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -26,6 +26,19 @@ def reduce( ... +class DataArrayReduce(Protocol): + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> T_DataArray: + ... + + class DatasetGroupByReductions: __slots__ = () @@ -42,8 +55,7 @@ def count( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -114,8 +126,7 @@ def all( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -186,8 +197,7 @@ def any( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -259,8 +269,7 @@ def max( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -348,8 +357,7 @@ def min( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -437,8 +445,7 @@ def mean( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -527,8 +534,7 @@ def prod( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -634,8 +640,7 @@ def sum( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -740,8 +745,7 @@ def std( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -829,8 +833,7 @@ def var( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -918,8 +921,7 @@ def median( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1010,8 +1012,7 @@ def count( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -1082,8 +1083,7 @@ def all( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -1154,8 +1154,7 @@ def any( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -1227,8 +1226,7 @@ def max( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1316,8 +1314,7 @@ def min( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1405,8 +1402,7 @@ def mean( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1495,8 +1491,7 @@ def prod( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1602,8 +1597,7 @@ def sum( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1708,8 +1702,7 @@ def std( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1797,8 +1790,7 @@ def var( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1886,8 +1878,7 @@ def median( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1962,19 +1953,6 @@ def median( ) -class DataArrayReduce(Protocol): - def reduce( - self, - func: Callable[..., Any], - dim: Union[None, Hashable, Sequence[Hashable]] = None, - axis: Union[None, int, Sequence[int]] = None, - keep_attrs: bool = None, - keepdims: bool = False, - **kwargs: Any, - ) -> T_DataArray: - ... - - class DataArrayGroupByReductions: __slots__ = () @@ -1991,8 +1969,7 @@ def count( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2057,8 +2034,7 @@ def all( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2123,8 +2099,7 @@ def any( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2190,8 +2165,7 @@ def max( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2271,8 +2245,7 @@ def min( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2352,8 +2325,7 @@ def mean( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2434,8 +2406,7 @@ def prod( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2531,8 +2502,7 @@ def sum( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2627,8 +2597,7 @@ def std( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2708,8 +2677,7 @@ def var( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2789,8 +2757,7 @@ def median( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2873,8 +2840,7 @@ def count( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2939,8 +2905,7 @@ def all( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -3005,8 +2970,7 @@ def any( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -3072,8 +3036,7 @@ def max( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -3153,8 +3116,7 @@ def min( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -3234,8 +3196,7 @@ def mean( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -3316,8 +3277,7 @@ def prod( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -3413,8 +3373,7 @@ def sum( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -3509,8 +3468,7 @@ def std( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -3590,8 +3548,7 @@ def var( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -3671,8 +3628,7 @@ def median( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 72449195d1e..fbda0b6d6de 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -15,8 +15,6 @@ import collections import textwrap -from functools import partial -from typing import Callable, Optional MODULE_PREAMBLE = '''\ """Mixin classes with reduction operations.""" @@ -53,50 +51,10 @@ def reduce( class {obj}{cls}Reductions: __slots__ = ()""" -_SKIPNA_DOCSTRING = """ -skipna : bool, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been - implemented (object, datetime64 or timedelta64).""" - -_MINCOUNT_DOCSTRING = """ -min_count : int, default: None - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. Changed in version 0.17.0: if specified on an integer - array and skipna=True, the result will be a float array.""" - - -BOOL_REDUCE_METHODS = ["all", "any"] -NAN_REDUCE_METHODS = [ - "max", - "min", - "mean", - "prod", - "sum", - "std", - "var", - "median", -] -NAN_CUM_METHODS = ["cumsum", "cumprod"] -MIN_COUNT_METHODS = ["prod", "sum"] -NUMERIC_ONLY_METHODS = [ - "mean", - "std", - "var", - "sum", - "prod", - "median", - "cumsum", - "cumprod", -] - -TEMPLATE_REDUCTION = ''' +TEMPLATE_REDUCTION_SIGNATURE = ''' def {method}( self: {obj}Reduce, - dim: Union[None, Hashable, Sequence[Hashable]] = None,{skip_na.kwarg}{min_count.kwarg} + dim: Union[None, Hashable, Sequence[Hashable]] = None,{extra_kwargs} keep_attrs: bool = None, **kwargs, ) -> T_{obj}: @@ -104,193 +62,284 @@ def {method}( Reduce this {obj}'s data by applying ``{method}`` along some dimension(s). Parameters - ---------- - dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``{method}``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. {extra_dim}{extra_args}{skip_na.docs}{min_count.docs} - keep_attrs : bool, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False (default), the new object will be - returned without attributes. - **kwargs : dict - Additional keyword arguments passed on to the appropriate array - function for calculating ``{method}`` on this object's data. + ----------''' +TEMPLATE_RETURNS = """ Returns ------- reduced : {obj} New {obj} with ``{method}`` applied to its data and the - indicated dimension(s) removed - - Examples - --------{example} + indicated dimension(s) removed""" +TEMPLATE_SEE_ALSO = ''' See Also -------- numpy.{method} {obj}.{method} :ref:`{docref}` - User guide on {docref} operations. - """ - return self.reduce( - duck_array_ops.{array_method}, - dim=dim,{skip_na.call}{min_count.call}{numeric_only_call} - keep_attrs=keep_attrs, - **kwargs, - )''' + User guide on {docref_description}. + """''' +_DIM_DOCSTRING = """dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``{method}``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions.""" -def generate_groupby_example(obj: str, cls: str, method: str): - """Generate examples for method.""" - dx = "ds" if obj == "Dataset" else "da" - if cls == "Resample": - calculation = f'{dx}.resample(time="3M").{method}' - elif cls == "GroupBy": - calculation = f'{dx}.groupby("labels").{method}' - else: - raise ValueError +_SKIPNA_DOCSTRING = """skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64).""" - if method in BOOL_REDUCE_METHODS: - np_array = """ - ... np.array([True, True, True, True, True, False], dtype=bool),""" +_MINCOUNT_DOCSTRING = """min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array.""" - else: - np_array = """ - ... np.array([1, 2, 3, 1, 2, np.nan]),""" +_KEEP_ATTRS_DOCSTRING = """keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes.""" - create_da = f""" - >>> da = xr.DataArray({np_array} - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... )""" +_KWARGS_DOCSTRING = """**kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``{method}`` on this object's data.""" - if obj == "Dataset": - maybe_dataset = """ - >>> ds = xr.Dataset(dict(da=da)) - >>> ds""" - else: - maybe_dataset = """ - >>> da""" +NAN_CUM_METHODS = ["cumsum", "cumprod"] - if method in NAN_REDUCE_METHODS: - maybe_skipna = f""" +NUMERIC_ONLY_METHODS = [ + "cumsum", + "cumprod", +] +extra_kwarg = collections.namedtuple("extra_kwarg", "docs kwarg call example") +skip_na = extra_kwarg( + docs=_SKIPNA_DOCSTRING, + kwarg="skipna: bool = True,", + call="skipna=skipna,", + example="""\n Use ``skipna`` to control whether NaNs are ignored. - >>> {calculation}(skipna=False)""" - else: - maybe_skipna = "" + >>> {calculation}(skipna=False)""", +) +min_count = extra_kwarg( + docs=_MINCOUNT_DOCSTRING, + kwarg="min_count: Optional[int] = None,", + call="min_count=min_count,", + example="""\n + Specify ``min_count`` for finer control over when NaNs are ignored. - if method in MIN_COUNT_METHODS: - maybe_mincount = f""" + >>> {calculation}(skipna=True, min_count=2)""", +) - Specify ``min_count`` for finer control over when NaNs are ignored. - >>> {calculation}(skipna=True, min_count=2)""" - else: - maybe_mincount = "" +class Method: + def __init__( + self, + name, + bool_reduce=False, + extra_kwargs=tuple(), + numeric_only=False, + ): + self.name = name + self.extra_kwargs = extra_kwargs + self.numeric_only = numeric_only + + if bool_reduce: + self.array_method = f"array_{name}" + self.np_example_array = """ + ... np.array([True, True, True, True, True, False], dtype=bool),""" - return f"""{create_da}{maybe_dataset} + else: + self.array_method = name + self.np_example_array = """ + ... np.array([1, 2, 3, 1, 2, np.nan]),""" - >>> {calculation}(){maybe_skipna}{maybe_mincount}""" +class DataStructure: + def __init__(self, name, docstring_create, example_var_name, numeric_only=False): + self.name = name + self.docstring_create = docstring_create + self.example_var_name = example_var_name + self.numeric_only = numeric_only -def generate_method( - obj: str, - docref: str, - method: str, - skipna: bool, - example_generator: Callable, - array_method: Optional[str] = None, -): - if not array_method: - array_method = method - if obj == "Dataset": - if method in NUMERIC_ONLY_METHODS: - numeric_only_call = "\n numeric_only=True," +class ClassReductionGenerator: + def __init__( + self, + cls, + datastructure, + methods, + docref, + docref_description, + example_call_preamble, + ): + self.datastructure = datastructure + self.cls = cls + self.methods = methods + self.docref = docref + self.docref_description = docref_description + self.example_call_preamble = example_call_preamble + self.preamble = CLASS_PREAMBLE.format(obj=datastructure.name, cls=cls) + + def generate_methods(self): + yield [self.preamble] + for method in self.methods: + yield self.generate_method(method) + + def generate_method(self, method): + template_kwargs = dict(obj=self.datastructure.name, method=method.name) + + if method.extra_kwargs: + extra_kwargs = "\n " + "\n ".join( + [kwarg.kwarg for kwarg in method.extra_kwargs if kwarg.kwarg] + ) else: - numeric_only_call = "\n numeric_only=False," - else: - numeric_only_call = "" - - kwarg = collections.namedtuple("kwarg", "docs kwarg call") - if skipna: - skip_na = kwarg( - docs=textwrap.indent(_SKIPNA_DOCSTRING, " "), - kwarg="\n skipna: bool = True,", - call="\n skipna=skipna,", - ) - else: - skip_na = kwarg(docs="", kwarg="", call="") - - if method in MIN_COUNT_METHODS: - min_count = kwarg( - docs=textwrap.indent(_MINCOUNT_DOCSTRING, " "), - kwarg="\n min_count: Optional[int] = None,", - call="\n min_count=min_count,", - ) - else: - min_count = kwarg(docs="", kwarg="", call="") - - return TEMPLATE_REDUCTION.format( - obj=obj, - docref=docref, - method=method, - array_method=array_method, - extra_dim="""If ``None``, will reduce over all dimensions - present in the grouped variable.""", - extra_args="", - skip_na=skip_na, - min_count=min_count, - numeric_only_call=numeric_only_call, - example=example_generator(obj=obj, method=method), - ) - - -def render(obj: str, cls: str, docref: str, example_generator: Callable): - yield CLASS_PREAMBLE.format(obj=obj, cls=cls) - yield generate_method( - obj, - method="count", - docref=docref, - skipna=False, - example_generator=example_generator, - ) - for method in BOOL_REDUCE_METHODS: - yield generate_method( - obj, - method=method, - docref=docref, - skipna=False, - array_method=f"array_{method}", - example_generator=example_generator, + extra_kwargs = "" + + yield TEMPLATE_REDUCTION_SIGNATURE.format( + **template_kwargs, + extra_kwargs=extra_kwargs, ) - for method in NAN_REDUCE_METHODS: - yield generate_method( - obj, - method=method, - docref=docref, - skipna=True, - example_generator=example_generator, + + for text in [ + _DIM_DOCSTRING.format(method=method.name), + *(kwarg.docs for kwarg in method.extra_kwargs if kwarg.docs), + _KEEP_ATTRS_DOCSTRING, + _KWARGS_DOCSTRING.format(method=method.name), + ]: + if text: + yield textwrap.indent(text, 8 * " ") + + yield TEMPLATE_RETURNS.format(**template_kwargs) + + yield textwrap.indent(self.generate_example(method=method), "") + + yield TEMPLATE_SEE_ALSO.format( + **template_kwargs, + docref=self.docref, + docref_description=self.docref_description, ) + yield self.generate_code(method) + + def generate_example(self, method): + create_da = f""" + >>> da = xr.DataArray({method.np_example_array} + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... )""" + + calculation = f"{self.datastructure.example_var_name}{self.example_call_preamble}.{method.name}" + if method.extra_kwargs: + extra_examples = "".join( + kwarg.example for kwarg in method.extra_kwargs if kwarg.example + ).format(calculation=calculation, method=method.name) + else: + extra_examples = "" + + return f""" + Examples + --------{create_da}{self.datastructure.docstring_create} + + >>> {calculation}(){extra_examples}""" + + def generate_code(self, method): + extra_kwargs = [kwarg.call for kwarg in method.extra_kwargs if kwarg.call] + + if self.datastructure.numeric_only: + extra_kwargs.append(f"numeric_only={method.numeric_only},") + + if extra_kwargs: + extra_kwargs = "\n " + "\n ".join(extra_kwargs) + else: + extra_kwargs = "" + return f""" return self.reduce( + duck_array_ops.{method.array_method}, + dim=dim,{extra_kwargs} + keep_attrs=keep_attrs, + **kwargs, + )""" + + +METHODS = ( + Method("count"), + Method("all", bool_reduce=True), + Method("any", bool_reduce=True), + Method("max", extra_kwargs=(skip_na,)), + Method("min", extra_kwargs=(skip_na,)), + Method("mean", extra_kwargs=(skip_na,), numeric_only=True), + Method("prod", extra_kwargs=(skip_na, min_count), numeric_only=True), + Method("sum", extra_kwargs=(skip_na, min_count), numeric_only=True), + Method("std", extra_kwargs=(skip_na,), numeric_only=True), + Method("var", extra_kwargs=(skip_na,), numeric_only=True), + Method("median", extra_kwargs=(skip_na,), numeric_only=True), +) + +DatasetObject = DataStructure( + name="Dataset", + docstring_create=""" + >>> ds = xr.Dataset(dict(da=da)) + >>> ds""", + example_var_name="ds", + numeric_only=True, +) +DataArrayObject = DataStructure( + name="DataArray", + docstring_create=""" + >>> da""", + example_var_name="da", + numeric_only=False, +) + +DataArrayGroupByGenerator = ClassReductionGenerator( + cls="GroupBy", + datastructure=DataArrayObject, + methods=METHODS, + docref="groupby", + docref_description="groupby operations", + example_call_preamble='.groupby("labels")', +) +DataArrayResampleGenerator = ClassReductionGenerator( + cls="Resample", + datastructure=DataArrayObject, + methods=METHODS, + docref="resampling", + docref_description="resampling operations", + example_call_preamble='.resample(time="3M")', +) +DatasetGroupByGenerator = ClassReductionGenerator( + cls="GroupBy", + datastructure=DatasetObject, + methods=METHODS, + docref="groupby", + docref_description="groupby operations", + example_call_preamble='.groupby("labels")', +) +DatasetResampleGenerator = ClassReductionGenerator( + cls="Resample", + datastructure=DatasetObject, + methods=METHODS, + docref="resampling", + docref_description="resampling operations", + example_call_preamble='.resample(time="3M")', +) + if __name__ == "__main__": print(MODULE_PREAMBLE) - for obj in ["Dataset", "DataArray"]: - print(OBJ_PREAMBLE.format(obj=obj)) - for cls, docref in ( - ("GroupBy", "groupby"), - ("Resample", "resampling"), - ): - for line in render( - obj=obj, - cls=cls, - docref=docref, - example_generator=partial(generate_groupby_example, cls=cls), - ): + print(OBJ_PREAMBLE.format(obj="Dataset")) + print(OBJ_PREAMBLE.format(obj="DataArray")) + for gen in [ + DatasetGroupByGenerator, + DatasetResampleGenerator, + DataArrayGroupByGenerator, + DataArrayResampleGenerator, + ]: + for lines in gen.generate_methods(): + for line in lines: print(line) From 569c67f28b3e7ff4c475793325a4388220932d02 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 7 Nov 2021 20:54:42 -0700 Subject: [PATCH 028/138] Add ddof for var, std --- xarray/core/_reductions.py | 112 +++++++++++++++++++++++++++++ xarray/util/generate_reductions.py | 17 ++++- 2 files changed, 127 insertions(+), 2 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index a2108f54a64..8b462f893ba 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -735,6 +735,7 @@ def std( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, + ddof: int = 0, keep_attrs: bool = None, **kwargs, ) -> T_Dataset: @@ -751,6 +752,9 @@ def std( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or skipna=True has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -803,6 +807,16 @@ def std( Data variables: da (labels) float64 nan 0.0 1.0 + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.groupby("labels").std(skipna=True, ddof=1) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 1.414 + See Also -------- numpy.std @@ -814,6 +828,7 @@ def std( duck_array_ops.std, dim=dim, skipna=skipna, + ddof=ddof, numeric_only=True, keep_attrs=keep_attrs, **kwargs, @@ -823,6 +838,7 @@ def var( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, + ddof: int = 0, keep_attrs: bool = None, **kwargs, ) -> T_Dataset: @@ -839,6 +855,9 @@ def var( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or skipna=True has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -891,6 +910,16 @@ def var( Data variables: da (labels) float64 nan 0.0 1.0 + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.groupby("labels").var(skipna=True, ddof=1) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 2.0 + See Also -------- numpy.var @@ -902,6 +931,7 @@ def var( duck_array_ops.var, dim=dim, skipna=skipna, + ddof=ddof, numeric_only=True, keep_attrs=keep_attrs, **kwargs, @@ -1692,6 +1722,7 @@ def std( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, + ddof: int = 0, keep_attrs: bool = None, **kwargs, ) -> T_Dataset: @@ -1708,6 +1739,9 @@ def std( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or skipna=True has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -1760,6 +1794,16 @@ def std( Data variables: da (time) float64 0.0 0.8165 nan + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.resample(time="3M").std(skipna=True, ddof=1) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 nan 1.0 nan + See Also -------- numpy.std @@ -1771,6 +1815,7 @@ def std( duck_array_ops.std, dim=dim, skipna=skipna, + ddof=ddof, numeric_only=True, keep_attrs=keep_attrs, **kwargs, @@ -1780,6 +1825,7 @@ def var( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, + ddof: int = 0, keep_attrs: bool = None, **kwargs, ) -> T_Dataset: @@ -1796,6 +1842,9 @@ def var( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or skipna=True has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -1848,6 +1897,16 @@ def var( Data variables: da (time) float64 0.0 0.6667 nan + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.resample(time="3M").var(skipna=True, ddof=1) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 nan 1.0 nan + See Also -------- numpy.var @@ -1859,6 +1918,7 @@ def var( duck_array_ops.var, dim=dim, skipna=skipna, + ddof=ddof, numeric_only=True, keep_attrs=keep_attrs, **kwargs, @@ -2587,6 +2647,7 @@ def std( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, + ddof: int = 0, keep_attrs: bool = None, **kwargs, ) -> T_DataArray: @@ -2603,6 +2664,9 @@ def std( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or skipna=True has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2648,6 +2712,14 @@ def std( Coordinates: * labels (labels) object 'a' 'b' 'c' + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.groupby("labels").std(skipna=True, ddof=1) + + array([ nan, 0. , 1.41421356]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + See Also -------- numpy.std @@ -2659,6 +2731,7 @@ def std( duck_array_ops.std, dim=dim, skipna=skipna, + ddof=ddof, keep_attrs=keep_attrs, **kwargs, ) @@ -2667,6 +2740,7 @@ def var( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, + ddof: int = 0, keep_attrs: bool = None, **kwargs, ) -> T_DataArray: @@ -2683,6 +2757,9 @@ def var( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or skipna=True has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2728,6 +2805,14 @@ def var( Coordinates: * labels (labels) object 'a' 'b' 'c' + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.groupby("labels").var(skipna=True, ddof=1) + + array([nan, 0., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + See Also -------- numpy.var @@ -2739,6 +2824,7 @@ def var( duck_array_ops.var, dim=dim, skipna=skipna, + ddof=ddof, keep_attrs=keep_attrs, **kwargs, ) @@ -3458,6 +3544,7 @@ def std( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, + ddof: int = 0, keep_attrs: bool = None, **kwargs, ) -> T_DataArray: @@ -3474,6 +3561,9 @@ def std( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or skipna=True has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -3519,6 +3609,14 @@ def std( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.resample(time="3M").std(skipna=True, ddof=1) + + array([nan, 1., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + See Also -------- numpy.std @@ -3530,6 +3628,7 @@ def std( duck_array_ops.std, dim=dim, skipna=skipna, + ddof=ddof, keep_attrs=keep_attrs, **kwargs, ) @@ -3538,6 +3637,7 @@ def var( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, + ddof: int = 0, keep_attrs: bool = None, **kwargs, ) -> T_DataArray: @@ -3554,6 +3654,9 @@ def var( skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or skipna=True has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -3599,6 +3702,14 @@ def var( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.resample(time="3M").var(skipna=True, ddof=1) + + array([nan, 1., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + See Also -------- numpy.var @@ -3610,6 +3721,7 @@ def var( duck_array_ops.var, dim=dim, skipna=skipna, + ddof=ddof, keep_attrs=keep_attrs, **kwargs, ) diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index fbda0b6d6de..dda6415262c 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -97,6 +97,10 @@ def {method}( array's dtype. Changed in version 0.17.0: if specified on an integer array and skipna=True, the result will be a float array.""" +_DDOF_DOCSTRING = """ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements.""" + _KEEP_ATTRS_DOCSTRING = """keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -132,6 +136,15 @@ def {method}( >>> {calculation}(skipna=True, min_count=2)""", ) +ddof = extra_kwarg( + docs=_DDOF_DOCSTRING, + kwarg="ddof: int = 0,", + call="ddof=ddof,", + example="""\n + Specify ``ddof=1`` for an unbiased estimate. + + >>> {calculation}(skipna=True, ddof=1)""", +) class Method: @@ -275,8 +288,8 @@ def generate_code(self, method): Method("mean", extra_kwargs=(skip_na,), numeric_only=True), Method("prod", extra_kwargs=(skip_na, min_count), numeric_only=True), Method("sum", extra_kwargs=(skip_na, min_count), numeric_only=True), - Method("std", extra_kwargs=(skip_na,), numeric_only=True), - Method("var", extra_kwargs=(skip_na,), numeric_only=True), + Method("std", extra_kwargs=(skip_na, ddof), numeric_only=True), + Method("var", extra_kwargs=(skip_na, ddof), numeric_only=True), Method("median", extra_kwargs=(skip_na,), numeric_only=True), ) From 816e7941e47b14280103d5d10da94139f394c0cd Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 7 Nov 2021 20:56:37 -0700 Subject: [PATCH 029/138] Generate DataArray, Dataset reductions too. --- doc/user-guide/computation.rst | 2 + xarray/core/_reductions.py | 1984 +++++++++++++++++++++++++-- xarray/core/arithmetic.py | 2 - xarray/core/dataarray.py | 5 +- xarray/core/dataset.py | 3 +- xarray/tests/test_duck_array_ops.py | 81 -- xarray/util/generate_reductions.py | 44 +- 7 files changed, 1928 insertions(+), 193 deletions(-) diff --git a/doc/user-guide/computation.rst b/doc/user-guide/computation.rst index fc3c457308f..a4ba606feeb 100644 --- a/doc/user-guide/computation.rst +++ b/doc/user-guide/computation.rst @@ -107,6 +107,8 @@ Xarray also provides the ``max_gap`` keyword argument to limit the interpolation data gaps of length ``max_gap`` or smaller. See :py:meth:`~xarray.DataArray.interpolate_na` for more. +.. _agg: + Aggregation =========== diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 8b462f893ba..1314b6877db 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -39,6 +39,1798 @@ def reduce( ... +class DatasetReductions: + __slots__ = () + + def count( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``count`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.count() + + Dimensions: () + Data variables: + da int64 5 + + See Also + -------- + numpy.count + Dataset.count + :ref:`agg` + User guide on reduction or aggregation operations. + """ + return self.reduce( + duck_array_ops.count, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def all( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``all`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.all() + + Dimensions: () + Data variables: + da bool False + + See Also + -------- + numpy.all + Dataset.all + :ref:`agg` + User guide on reduction or aggregation operations. + """ + return self.reduce( + duck_array_ops.array_all, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def any( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``any`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.any() + + Dimensions: () + Data variables: + da bool True + + See Also + -------- + numpy.any + Dataset.any + :ref:`agg` + User guide on reduction or aggregation operations. + """ + return self.reduce( + duck_array_ops.array_any, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def max( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``max`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.max() + + Dimensions: () + Data variables: + da float64 3.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.max(skipna=False) + + Dimensions: () + Data variables: + da float64 nan + + See Also + -------- + numpy.max + Dataset.max + :ref:`agg` + User guide on reduction or aggregation operations. + """ + return self.reduce( + duck_array_ops.max, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def min( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``min`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.min() + + Dimensions: () + Data variables: + da float64 1.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.min(skipna=False) + + Dimensions: () + Data variables: + da float64 nan + + See Also + -------- + numpy.min + Dataset.min + :ref:`agg` + User guide on reduction or aggregation operations. + """ + return self.reduce( + duck_array_ops.min, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def mean( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``mean`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.mean() + + Dimensions: () + Data variables: + da float64 1.8 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.mean(skipna=False) + + Dimensions: () + Data variables: + da float64 nan + + See Also + -------- + numpy.mean + Dataset.mean + :ref:`agg` + User guide on reduction or aggregation operations. + """ + return self.reduce( + duck_array_ops.mean, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def prod( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + min_count: Optional[int] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``prod`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.prod() + + Dimensions: () + Data variables: + da float64 12.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.prod(skipna=False) + + Dimensions: () + Data variables: + da float64 nan + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> ds.prod(skipna=True, min_count=2) + + Dimensions: () + Data variables: + da float64 12.0 + + See Also + -------- + numpy.prod + Dataset.prod + :ref:`agg` + User guide on reduction or aggregation operations. + """ + return self.reduce( + duck_array_ops.prod, + dim=dim, + skipna=skipna, + min_count=min_count, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def sum( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + min_count: Optional[int] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``sum`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.sum() + + Dimensions: () + Data variables: + da float64 9.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.sum(skipna=False) + + Dimensions: () + Data variables: + da float64 nan + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> ds.sum(skipna=True, min_count=2) + + Dimensions: () + Data variables: + da float64 9.0 + + See Also + -------- + numpy.sum + Dataset.sum + :ref:`agg` + User guide on reduction or aggregation operations. + """ + return self.reduce( + duck_array_ops.sum, + dim=dim, + skipna=skipna, + min_count=min_count, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def std( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + ddof: int = 0, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``std`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.std() + + Dimensions: () + Data variables: + da float64 0.7483 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.std(skipna=False) + + Dimensions: () + Data variables: + da float64 nan + + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.std(skipna=True, ddof=1) + + Dimensions: () + Data variables: + da float64 0.8367 + + See Also + -------- + numpy.std + Dataset.std + :ref:`agg` + User guide on reduction or aggregation operations. + """ + return self.reduce( + duck_array_ops.std, + dim=dim, + skipna=skipna, + ddof=ddof, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def var( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + ddof: int = 0, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``var`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.var() + + Dimensions: () + Data variables: + da float64 0.56 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.var(skipna=False) + + Dimensions: () + Data variables: + da float64 nan + + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.var(skipna=True, ddof=1) + + Dimensions: () + Data variables: + da float64 0.7 + + See Also + -------- + numpy.var + Dataset.var + :ref:`agg` + User guide on reduction or aggregation operations. + """ + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + ddof=ddof, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def median( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with ``median`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.median() + + Dimensions: () + Data variables: + da float64 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.median(skipna=False) + + Dimensions: () + Data variables: + da float64 nan + + See Also + -------- + numpy.median + Dataset.median + :ref:`agg` + User guide on reduction or aggregation operations. + """ + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + +class DataArrayReductions: + __slots__ = () + + def count( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``count`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.count() + + array(5) + + See Also + -------- + numpy.count + DataArray.count + :ref:`agg` + User guide on reduction or aggregation operations. + """ + return self.reduce( + duck_array_ops.count, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def all( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``all`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.all() + + array(False) + + See Also + -------- + numpy.all + DataArray.all + :ref:`agg` + User guide on reduction or aggregation operations. + """ + return self.reduce( + duck_array_ops.array_all, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def any( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``any`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.any() + + array(True) + + See Also + -------- + numpy.any + DataArray.any + :ref:`agg` + User guide on reduction or aggregation operations. + """ + return self.reduce( + duck_array_ops.array_any, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def max( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``max`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.max() + + array(3.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.max(skipna=False) + + array(nan) + + See Also + -------- + numpy.max + DataArray.max + :ref:`agg` + User guide on reduction or aggregation operations. + """ + return self.reduce( + duck_array_ops.max, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def min( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``min`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.min() + + array(1.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.min(skipna=False) + + array(nan) + + See Also + -------- + numpy.min + DataArray.min + :ref:`agg` + User guide on reduction or aggregation operations. + """ + return self.reduce( + duck_array_ops.min, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def mean( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``mean`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.mean() + + array(1.8) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.mean(skipna=False) + + array(nan) + + See Also + -------- + numpy.mean + DataArray.mean + :ref:`agg` + User guide on reduction or aggregation operations. + """ + return self.reduce( + duck_array_ops.mean, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def prod( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + min_count: Optional[int] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``prod`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.prod() + + array(12.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.prod(skipna=False) + + array(nan) + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> da.prod(skipna=True, min_count=2) + + array(12.) + + See Also + -------- + numpy.prod + DataArray.prod + :ref:`agg` + User guide on reduction or aggregation operations. + """ + return self.reduce( + duck_array_ops.prod, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) + + def sum( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + min_count: Optional[int] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``sum`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.sum() + + array(9.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.sum(skipna=False) + + array(nan) + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> da.sum(skipna=True, min_count=2) + + array(9.) + + See Also + -------- + numpy.sum + DataArray.sum + :ref:`agg` + User guide on reduction or aggregation operations. + """ + return self.reduce( + duck_array_ops.sum, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) + + def std( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + ddof: int = 0, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``std`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.std() + + array(0.74833148) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.std(skipna=False) + + array(nan) + + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.std(skipna=True, ddof=1) + + array(0.83666003) + + See Also + -------- + numpy.std + DataArray.std + :ref:`agg` + User guide on reduction or aggregation operations. + """ + return self.reduce( + duck_array_ops.std, + dim=dim, + skipna=skipna, + ddof=ddof, + keep_attrs=keep_attrs, + **kwargs, + ) + + def var( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + ddof: int = 0, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``var`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.var() + + array(0.56) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.var(skipna=False) + + array(nan) + + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.var(skipna=True, ddof=1) + + array(0.7) + + See Also + -------- + numpy.var + DataArray.var + :ref:`agg` + User guide on reduction or aggregation operations. + """ + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + ddof=ddof, + keep_attrs=keep_attrs, + **kwargs, + ) + + def median( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``median`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.median() + + array(2.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.median(skipna=False) + + array(nan) + + See Also + -------- + numpy.median + DataArray.median + :ref:`agg` + User guide on reduction or aggregation operations. + """ + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + class DatasetGroupByReductions: __slots__ = () @@ -258,7 +2050,7 @@ def any( def max( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, keep_attrs: bool = None, **kwargs, ) -> T_Dataset: @@ -270,10 +2062,10 @@ def max( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -346,7 +2138,7 @@ def max( def min( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, keep_attrs: bool = None, **kwargs, ) -> T_Dataset: @@ -358,10 +2150,10 @@ def min( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -434,7 +2226,7 @@ def min( def mean( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, keep_attrs: bool = None, **kwargs, ) -> T_Dataset: @@ -446,10 +2238,10 @@ def mean( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -522,7 +2314,7 @@ def mean( def prod( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, @@ -535,10 +2327,10 @@ def prod( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -628,7 +2420,7 @@ def prod( def sum( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, @@ -641,10 +2433,10 @@ def sum( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -734,7 +2526,7 @@ def sum( def std( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, @@ -747,10 +2539,10 @@ def std( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). ddof : int, default: 0 “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, @@ -837,7 +2629,7 @@ def std( def var( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, @@ -850,10 +2642,10 @@ def var( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). ddof : int, default: 0 “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, @@ -940,7 +2732,7 @@ def var( def median( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, keep_attrs: bool = None, **kwargs, ) -> T_Dataset: @@ -952,10 +2744,10 @@ def median( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -1245,7 +3037,7 @@ def any( def max( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, keep_attrs: bool = None, **kwargs, ) -> T_Dataset: @@ -1257,10 +3049,10 @@ def max( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -1333,7 +3125,7 @@ def max( def min( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, keep_attrs: bool = None, **kwargs, ) -> T_Dataset: @@ -1345,10 +3137,10 @@ def min( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -1421,7 +3213,7 @@ def min( def mean( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, keep_attrs: bool = None, **kwargs, ) -> T_Dataset: @@ -1433,10 +3225,10 @@ def mean( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -1509,7 +3301,7 @@ def mean( def prod( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, @@ -1522,10 +3314,10 @@ def prod( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -1615,7 +3407,7 @@ def prod( def sum( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, @@ -1628,10 +3420,10 @@ def sum( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -1721,7 +3513,7 @@ def sum( def std( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, @@ -1734,10 +3526,10 @@ def std( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). ddof : int, default: 0 “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, @@ -1824,7 +3616,7 @@ def std( def var( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, @@ -1837,10 +3629,10 @@ def var( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). ddof : int, default: 0 “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, @@ -1927,7 +3719,7 @@ def var( def median( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, keep_attrs: bool = None, **kwargs, ) -> T_Dataset: @@ -1939,10 +3731,10 @@ def median( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -2214,7 +4006,7 @@ def any( def max( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, keep_attrs: bool = None, **kwargs, ) -> T_DataArray: @@ -2226,10 +4018,10 @@ def max( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -2294,7 +4086,7 @@ def max( def min( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, keep_attrs: bool = None, **kwargs, ) -> T_DataArray: @@ -2306,10 +4098,10 @@ def min( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -2374,7 +4166,7 @@ def min( def mean( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, keep_attrs: bool = None, **kwargs, ) -> T_DataArray: @@ -2386,10 +4178,10 @@ def mean( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -2454,7 +4246,7 @@ def mean( def prod( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, @@ -2467,10 +4259,10 @@ def prod( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -2550,7 +4342,7 @@ def prod( def sum( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, @@ -2563,10 +4355,10 @@ def sum( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -2646,7 +4438,7 @@ def sum( def std( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, @@ -2659,10 +4451,10 @@ def std( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). ddof : int, default: 0 “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, @@ -2739,7 +4531,7 @@ def std( def var( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, @@ -2752,10 +4544,10 @@ def var( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). ddof : int, default: 0 “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, @@ -2832,7 +4624,7 @@ def var( def median( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, keep_attrs: bool = None, **kwargs, ) -> T_DataArray: @@ -2844,10 +4636,10 @@ def median( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -3111,7 +4903,7 @@ def any( def max( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, keep_attrs: bool = None, **kwargs, ) -> T_DataArray: @@ -3123,10 +4915,10 @@ def max( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -3191,7 +4983,7 @@ def max( def min( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, keep_attrs: bool = None, **kwargs, ) -> T_DataArray: @@ -3203,10 +4995,10 @@ def min( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -3271,7 +5063,7 @@ def min( def mean( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, keep_attrs: bool = None, **kwargs, ) -> T_DataArray: @@ -3283,10 +5075,10 @@ def mean( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -3351,7 +5143,7 @@ def mean( def prod( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, @@ -3364,10 +5156,10 @@ def prod( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -3447,7 +5239,7 @@ def prod( def sum( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, @@ -3460,10 +5252,10 @@ def sum( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -3543,7 +5335,7 @@ def sum( def std( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, @@ -3556,10 +5348,10 @@ def std( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). ddof : int, default: 0 “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, @@ -3636,7 +5428,7 @@ def std( def var( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, @@ -3649,10 +5441,10 @@ def var( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). ddof : int, default: 0 “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, @@ -3729,7 +5521,7 @@ def var( def median( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, keep_attrs: bool = None, **kwargs, ) -> T_DataArray: @@ -3741,10 +5533,10 @@ def median( dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. - skipna : bool, optional + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original diff --git a/xarray/core/arithmetic.py b/xarray/core/arithmetic.py index 814e9a59877..bf8d6ccaeb6 100644 --- a/xarray/core/arithmetic.py +++ b/xarray/core/arithmetic.py @@ -105,7 +105,6 @@ class VariableArithmetic( class DatasetArithmetic( ImplementsDatasetReduce, - IncludeReduceMethods, IncludeCumMethods, SupportsArithmetic, DatasetOpsMixin, @@ -116,7 +115,6 @@ class DatasetArithmetic( class DataArrayArithmetic( ImplementsArrayReduce, - IncludeReduceMethods, IncludeCumMethods, IncludeNumpySameMethods, SupportsArithmetic, diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 89f916db7f4..a991eb05e1d 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -35,6 +35,7 @@ utils, weighted, ) +from ._reductions import DataArrayReductions from .accessor_dt import CombinedDatetimelikeAccessor from .accessor_str import StringAccessor from .alignment import ( @@ -215,7 +216,9 @@ def __setitem__(self, key, value) -> None: _THIS_ARRAY = ReprObject("") -class DataArray(AbstractArray, DataWithCoords, DataArrayArithmetic): +class DataArray( + AbstractArray, DataWithCoords, DataArrayArithmetic, DataArrayReductions +): """N-dimensional array with labeled coordinates and dimensions. DataArray provides a wrapper around numpy ndarrays that uses diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index e882495dce5..e3339f2562f 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -50,6 +50,7 @@ utils, weighted, ) +from ._reductions import DatasetReductions from .alignment import _broadcast_helper, _get_broadcast_dims_map_common_coords, align from .arithmetic import DatasetArithmetic from .common import DataWithCoords, _contains_datetime_like_objects, get_chunksizes @@ -574,7 +575,7 @@ def __setitem__(self, key, value) -> None: self.dataset[pos_indexers] = value -class Dataset(DataWithCoords, DatasetArithmetic, Mapping): +class Dataset(DataWithCoords, DatasetReductions, DatasetArithmetic, Mapping): """A multi-dimensional, in memory, array database. A dataset resembles an in-memory representation of a NetCDF file, diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index c032a781e47..392597f1bda 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -676,87 +676,6 @@ def test_multiple_dims(dtype, dask, skipna, func): assert_allclose(actual, expected) -def test_docs(): - # with min_count - actual = DataArray.sum.__doc__ - expected = dedent( - """\ - Reduce this DataArray's data by applying `sum` along some dimension(s). - - Parameters - ---------- - dim : str or sequence of str, optional - Dimension(s) over which to apply `sum`. - axis : int or sequence of int, optional - Axis(es) over which to apply `sum`. Only one of the 'dim' - and 'axis' arguments can be supplied. If neither are supplied, then - `sum` is calculated over axes. - skipna : bool, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been - implemented (object, datetime64 or timedelta64). - min_count : int, default: None - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. New in version 0.10.8: Added with the default being - None. Changed in version 0.17.0: if specified on an integer array - and skipna=True, the result will be a float array. - keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original - object to the new one. If False (default), the new object will be - returned without attributes. - **kwargs : dict - Additional keyword arguments passed on to the appropriate array - function for calculating `sum` on this object's data. - - Returns - ------- - reduced : DataArray - New DataArray object with `sum` applied to its data and the - indicated dimension(s) removed. - """ - ) - assert actual == expected - - # without min_count - actual = DataArray.std.__doc__ - expected = dedent( - """\ - Reduce this DataArray's data by applying `std` along some dimension(s). - - Parameters - ---------- - dim : str or sequence of str, optional - Dimension(s) over which to apply `std`. - axis : int or sequence of int, optional - Axis(es) over which to apply `std`. Only one of the 'dim' - and 'axis' arguments can be supplied. If neither are supplied, then - `std` is calculated over axes. - skipna : bool, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original - object to the new one. If False (default), the new object will be - returned without attributes. - **kwargs : dict - Additional keyword arguments passed on to the appropriate array - function for calculating `std` on this object's data. - - Returns - ------- - reduced : DataArray - New DataArray object with `std` applied to its data and the - indicated dimension(s) removed. - """ - ) - assert actual == expected - - def test_datetime_to_numeric_datetime64(): times = pd.date_range("2000", periods=5, freq="7D").values result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h") diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index dda6415262c..284cca38603 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -12,9 +12,9 @@ while replacing the doctests. """ - import collections import textwrap +from dataclasses import dataclass MODULE_PREAMBLE = '''\ """Mixin classes with reduction operations.""" @@ -84,10 +84,10 @@ def {method}( Name of dimension[s] along which to apply ``{method}``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions.""" -_SKIPNA_DOCSTRING = """skipna : bool, optional +_SKIPNA_DOCSTRING = """skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64).""" _MINCOUNT_DOCSTRING = """min_count : int, default: None @@ -120,7 +120,7 @@ def {method}( extra_kwarg = collections.namedtuple("extra_kwarg", "docs kwarg call example") skip_na = extra_kwarg( docs=_SKIPNA_DOCSTRING, - kwarg="skipna: bool = True,", + kwarg="skipna: bool = None,", call="skipna=skipna,", example="""\n Use ``skipna`` to control whether NaNs are ignored. @@ -170,14 +170,6 @@ def __init__( ... np.array([1, 2, 3, 1, 2, np.nan]),""" -class DataStructure: - def __init__(self, name, docstring_create, example_var_name, numeric_only=False): - self.name = name - self.docstring_create = docstring_create - self.example_var_name = example_var_name - self.numeric_only = numeric_only - - class ClassReductionGenerator: def __init__( self, @@ -293,6 +285,15 @@ def generate_code(self, method): Method("median", extra_kwargs=(skip_na,), numeric_only=True), ) + +@dataclass +class DataStructure: + name: str + docstring_create: str + example_var_name: str + numeric_only: bool = False + + DatasetObject = DataStructure( name="Dataset", docstring_create=""" @@ -309,6 +310,23 @@ def generate_code(self, method): numeric_only=False, ) +DatasetGenerator = ClassReductionGenerator( + cls="", + datastructure=DatasetObject, + methods=METHODS, + docref="agg", + docref_description="reduction or aggregation operations", + example_call_preamble="", +) +DataArrayGenerator = ClassReductionGenerator( + cls="", + datastructure=DataArrayObject, + methods=METHODS, + docref="agg", + docref_description="reduction or aggregation operations", + example_call_preamble="", +) + DataArrayGroupByGenerator = ClassReductionGenerator( cls="GroupBy", datastructure=DataArrayObject, @@ -348,6 +366,8 @@ def generate_code(self, method): print(OBJ_PREAMBLE.format(obj="Dataset")) print(OBJ_PREAMBLE.format(obj="DataArray")) for gen in [ + DatasetGenerator, + DataArrayGenerator, DatasetGroupByGenerator, DatasetResampleGenerator, DataArrayGroupByGenerator, From a04ed824a55b757937a4db4aa65729dccf62c1a7 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 8 Nov 2021 12:35:48 -0700 Subject: [PATCH 030/138] Small changes --- xarray/util/generate_reductions.py | 36 ++++++++++++++++-------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 284cca38603..69d1c7db529 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -75,6 +75,7 @@ def {method}( See Also -------- numpy.{method} + dask.array.{method} {obj}.{method} :ref:`{docref}` User guide on {docref_description}. @@ -108,7 +109,8 @@ def {method}( _KWARGS_DOCSTRING = """**kwargs : dict Additional keyword arguments passed on to the appropriate array - function for calculating ``{method}`` on this object's data.""" + function for calculating ``{method}`` on this object's data. + These could include dask-specific kwargs like ``split_every``.""" NAN_CUM_METHODS = ["cumsum", "cumprod"] @@ -118,7 +120,7 @@ def {method}( ] extra_kwarg = collections.namedtuple("extra_kwarg", "docs kwarg call example") -skip_na = extra_kwarg( +skipna = extra_kwarg( docs=_SKIPNA_DOCSTRING, kwarg="skipna: bool = None,", call="skipna=skipna,", @@ -271,18 +273,18 @@ def generate_code(self, method): )""" -METHODS = ( +REDUCTION_METHODS = ( Method("count"), Method("all", bool_reduce=True), Method("any", bool_reduce=True), - Method("max", extra_kwargs=(skip_na,)), - Method("min", extra_kwargs=(skip_na,)), - Method("mean", extra_kwargs=(skip_na,), numeric_only=True), - Method("prod", extra_kwargs=(skip_na, min_count), numeric_only=True), - Method("sum", extra_kwargs=(skip_na, min_count), numeric_only=True), - Method("std", extra_kwargs=(skip_na, ddof), numeric_only=True), - Method("var", extra_kwargs=(skip_na, ddof), numeric_only=True), - Method("median", extra_kwargs=(skip_na,), numeric_only=True), + Method("max", extra_kwargs=(skipna,)), + Method("min", extra_kwargs=(skipna,)), + Method("mean", extra_kwargs=(skipna,), numeric_only=True), + Method("prod", extra_kwargs=(skipna, min_count), numeric_only=True), + Method("sum", extra_kwargs=(skipna, min_count), numeric_only=True), + Method("std", extra_kwargs=(skipna, ddof), numeric_only=True), + Method("var", extra_kwargs=(skipna, ddof), numeric_only=True), + Method("median", extra_kwargs=(skipna,), numeric_only=True), ) @@ -313,7 +315,7 @@ class DataStructure: DatasetGenerator = ClassReductionGenerator( cls="", datastructure=DatasetObject, - methods=METHODS, + methods=REDUCTION_METHODS, docref="agg", docref_description="reduction or aggregation operations", example_call_preamble="", @@ -321,7 +323,7 @@ class DataStructure: DataArrayGenerator = ClassReductionGenerator( cls="", datastructure=DataArrayObject, - methods=METHODS, + methods=REDUCTION_METHODS, docref="agg", docref_description="reduction or aggregation operations", example_call_preamble="", @@ -330,7 +332,7 @@ class DataStructure: DataArrayGroupByGenerator = ClassReductionGenerator( cls="GroupBy", datastructure=DataArrayObject, - methods=METHODS, + methods=REDUCTION_METHODS, docref="groupby", docref_description="groupby operations", example_call_preamble='.groupby("labels")', @@ -338,7 +340,7 @@ class DataStructure: DataArrayResampleGenerator = ClassReductionGenerator( cls="Resample", datastructure=DataArrayObject, - methods=METHODS, + methods=REDUCTION_METHODS, docref="resampling", docref_description="resampling operations", example_call_preamble='.resample(time="3M")', @@ -346,7 +348,7 @@ class DataStructure: DatasetGroupByGenerator = ClassReductionGenerator( cls="GroupBy", datastructure=DatasetObject, - methods=METHODS, + methods=REDUCTION_METHODS, docref="groupby", docref_description="groupby operations", example_call_preamble='.groupby("labels")', @@ -354,7 +356,7 @@ class DataStructure: DatasetResampleGenerator = ClassReductionGenerator( cls="Resample", datastructure=DatasetObject, - methods=METHODS, + methods=REDUCTION_METHODS, docref="resampling", docref_description="resampling operations", example_call_preamble='.resample(time="3M")', From 7f39cc0d8c664e3fcf354536ed3a95882064b4b6 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 8 Nov 2021 12:39:00 -0700 Subject: [PATCH 031/138] Minor docstring improvements. --- xarray/core/_reductions.py | 176 +++++++++++++++++++++++++---- xarray/util/generate_reductions.py | 10 +- 2 files changed, 163 insertions(+), 23 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 1314b6877db..1d6d4f88970 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -63,6 +63,7 @@ def count( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -99,7 +100,8 @@ def count( See Also -------- numpy.count - Dataset.count + dask.array.count + DataArray.count :ref:`agg` User guide on reduction or aggregation operations. """ @@ -132,6 +134,7 @@ def all( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -168,7 +171,8 @@ def all( See Also -------- numpy.all - Dataset.all + dask.array.all + DataArray.all :ref:`agg` User guide on reduction or aggregation operations. """ @@ -201,6 +205,7 @@ def any( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -237,7 +242,8 @@ def any( See Also -------- numpy.any - Dataset.any + dask.array.any + DataArray.any :ref:`agg` User guide on reduction or aggregation operations. """ @@ -276,6 +282,7 @@ def max( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -320,7 +327,8 @@ def max( See Also -------- numpy.max - Dataset.max + dask.array.max + DataArray.max :ref:`agg` User guide on reduction or aggregation operations. """ @@ -360,6 +368,7 @@ def min( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -404,7 +413,8 @@ def min( See Also -------- numpy.min - Dataset.min + dask.array.min + DataArray.min :ref:`agg` User guide on reduction or aggregation operations. """ @@ -444,6 +454,7 @@ def mean( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -488,7 +499,8 @@ def mean( See Also -------- numpy.mean - Dataset.mean + dask.array.mean + DataArray.mean :ref:`agg` User guide on reduction or aggregation operations. """ @@ -535,6 +547,7 @@ def prod( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -587,7 +600,8 @@ def prod( See Also -------- numpy.prod - Dataset.prod + dask.array.prod + DataArray.prod :ref:`agg` User guide on reduction or aggregation operations. """ @@ -635,6 +649,7 @@ def sum( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -687,7 +702,8 @@ def sum( See Also -------- numpy.sum - Dataset.sum + dask.array.sum + DataArray.sum :ref:`agg` User guide on reduction or aggregation operations. """ @@ -732,6 +748,7 @@ def std( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -784,7 +801,8 @@ def std( See Also -------- numpy.std - Dataset.std + dask.array.std + DataArray.std :ref:`agg` User guide on reduction or aggregation operations. """ @@ -829,6 +847,7 @@ def var( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -881,7 +900,8 @@ def var( See Also -------- numpy.var - Dataset.var + dask.array.var + DataArray.var :ref:`agg` User guide on reduction or aggregation operations. """ @@ -922,6 +942,7 @@ def median( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -966,7 +987,8 @@ def median( See Also -------- numpy.median - Dataset.median + dask.array.median + DataArray.median :ref:`agg` User guide on reduction or aggregation operations. """ @@ -1004,6 +1026,7 @@ def count( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1035,7 +1058,8 @@ def count( See Also -------- numpy.count - DataArray.count + dask.array.count + Dataset.count :ref:`agg` User guide on reduction or aggregation operations. """ @@ -1067,6 +1091,7 @@ def all( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1098,7 +1123,8 @@ def all( See Also -------- numpy.all - DataArray.all + dask.array.all + Dataset.all :ref:`agg` User guide on reduction or aggregation operations. """ @@ -1130,6 +1156,7 @@ def any( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1161,7 +1188,8 @@ def any( See Also -------- numpy.any - DataArray.any + dask.array.any + Dataset.any :ref:`agg` User guide on reduction or aggregation operations. """ @@ -1199,6 +1227,7 @@ def max( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1236,7 +1265,8 @@ def max( See Also -------- numpy.max - DataArray.max + dask.array.max + Dataset.max :ref:`agg` User guide on reduction or aggregation operations. """ @@ -1275,6 +1305,7 @@ def min( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1312,7 +1343,8 @@ def min( See Also -------- numpy.min - DataArray.min + dask.array.min + Dataset.min :ref:`agg` User guide on reduction or aggregation operations. """ @@ -1351,6 +1383,7 @@ def mean( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1388,7 +1421,8 @@ def mean( See Also -------- numpy.mean - DataArray.mean + dask.array.mean + Dataset.mean :ref:`agg` User guide on reduction or aggregation operations. """ @@ -1434,6 +1468,7 @@ def prod( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1477,7 +1512,8 @@ def prod( See Also -------- numpy.prod - DataArray.prod + dask.array.prod + Dataset.prod :ref:`agg` User guide on reduction or aggregation operations. """ @@ -1524,6 +1560,7 @@ def sum( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1567,7 +1604,8 @@ def sum( See Also -------- numpy.sum - DataArray.sum + dask.array.sum + Dataset.sum :ref:`agg` User guide on reduction or aggregation operations. """ @@ -1611,6 +1649,7 @@ def std( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1654,7 +1693,8 @@ def std( See Also -------- numpy.std - DataArray.std + dask.array.std + Dataset.std :ref:`agg` User guide on reduction or aggregation operations. """ @@ -1698,6 +1738,7 @@ def var( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1741,7 +1782,8 @@ def var( See Also -------- numpy.var - DataArray.var + dask.array.var + Dataset.var :ref:`agg` User guide on reduction or aggregation operations. """ @@ -1781,6 +1823,7 @@ def median( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1818,7 +1861,8 @@ def median( See Also -------- numpy.median - DataArray.median + dask.array.median + Dataset.median :ref:`agg` User guide on reduction or aggregation operations. """ @@ -1855,6 +1899,7 @@ def count( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1893,6 +1938,7 @@ def count( See Also -------- numpy.count + dask.array.count Dataset.count :ref:`groupby` User guide on groupby operations. @@ -1926,6 +1972,7 @@ def all( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1964,6 +2011,7 @@ def all( See Also -------- numpy.all + dask.array.all Dataset.all :ref:`groupby` User guide on groupby operations. @@ -1997,6 +2045,7 @@ def any( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2035,6 +2084,7 @@ def any( See Also -------- numpy.any + dask.array.any Dataset.any :ref:`groupby` User guide on groupby operations. @@ -2074,6 +2124,7 @@ def max( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2122,6 +2173,7 @@ def max( See Also -------- numpy.max + dask.array.max Dataset.max :ref:`groupby` User guide on groupby operations. @@ -2162,6 +2214,7 @@ def min( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2210,6 +2263,7 @@ def min( See Also -------- numpy.min + dask.array.min Dataset.min :ref:`groupby` User guide on groupby operations. @@ -2250,6 +2304,7 @@ def mean( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2298,6 +2353,7 @@ def mean( See Also -------- numpy.mean + dask.array.mean Dataset.mean :ref:`groupby` User guide on groupby operations. @@ -2345,6 +2401,7 @@ def prod( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2403,6 +2460,7 @@ def prod( See Also -------- numpy.prod + dask.array.prod Dataset.prod :ref:`groupby` User guide on groupby operations. @@ -2451,6 +2509,7 @@ def sum( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2509,6 +2568,7 @@ def sum( See Also -------- numpy.sum + dask.array.sum Dataset.sum :ref:`groupby` User guide on groupby operations. @@ -2554,6 +2614,7 @@ def std( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2612,6 +2673,7 @@ def std( See Also -------- numpy.std + dask.array.std Dataset.std :ref:`groupby` User guide on groupby operations. @@ -2657,6 +2719,7 @@ def var( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2715,6 +2778,7 @@ def var( See Also -------- numpy.var + dask.array.var Dataset.var :ref:`groupby` User guide on groupby operations. @@ -2756,6 +2820,7 @@ def median( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2804,6 +2869,7 @@ def median( See Also -------- numpy.median + dask.array.median Dataset.median :ref:`groupby` User guide on groupby operations. @@ -2842,6 +2908,7 @@ def count( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2880,6 +2947,7 @@ def count( See Also -------- numpy.count + dask.array.count Dataset.count :ref:`resampling` User guide on resampling operations. @@ -2913,6 +2981,7 @@ def all( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2951,6 +3020,7 @@ def all( See Also -------- numpy.all + dask.array.all Dataset.all :ref:`resampling` User guide on resampling operations. @@ -2984,6 +3054,7 @@ def any( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3022,6 +3093,7 @@ def any( See Also -------- numpy.any + dask.array.any Dataset.any :ref:`resampling` User guide on resampling operations. @@ -3061,6 +3133,7 @@ def max( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3109,6 +3182,7 @@ def max( See Also -------- numpy.max + dask.array.max Dataset.max :ref:`resampling` User guide on resampling operations. @@ -3149,6 +3223,7 @@ def min( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3197,6 +3272,7 @@ def min( See Also -------- numpy.min + dask.array.min Dataset.min :ref:`resampling` User guide on resampling operations. @@ -3237,6 +3313,7 @@ def mean( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3285,6 +3362,7 @@ def mean( See Also -------- numpy.mean + dask.array.mean Dataset.mean :ref:`resampling` User guide on resampling operations. @@ -3332,6 +3410,7 @@ def prod( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3390,6 +3469,7 @@ def prod( See Also -------- numpy.prod + dask.array.prod Dataset.prod :ref:`resampling` User guide on resampling operations. @@ -3438,6 +3518,7 @@ def sum( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3496,6 +3577,7 @@ def sum( See Also -------- numpy.sum + dask.array.sum Dataset.sum :ref:`resampling` User guide on resampling operations. @@ -3541,6 +3623,7 @@ def std( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3599,6 +3682,7 @@ def std( See Also -------- numpy.std + dask.array.std Dataset.std :ref:`resampling` User guide on resampling operations. @@ -3644,6 +3728,7 @@ def var( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3702,6 +3787,7 @@ def var( See Also -------- numpy.var + dask.array.var Dataset.var :ref:`resampling` User guide on resampling operations. @@ -3743,6 +3829,7 @@ def median( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3791,6 +3878,7 @@ def median( See Also -------- numpy.median + dask.array.median Dataset.median :ref:`resampling` User guide on resampling operations. @@ -3829,6 +3917,7 @@ def count( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3862,6 +3951,7 @@ def count( See Also -------- numpy.count + dask.array.count DataArray.count :ref:`groupby` User guide on groupby operations. @@ -3894,6 +3984,7 @@ def all( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3927,6 +4018,7 @@ def all( See Also -------- numpy.all + dask.array.all DataArray.all :ref:`groupby` User guide on groupby operations. @@ -3959,6 +4051,7 @@ def any( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3992,6 +4085,7 @@ def any( See Also -------- numpy.any + dask.array.any DataArray.any :ref:`groupby` User guide on groupby operations. @@ -4030,6 +4124,7 @@ def max( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -4071,6 +4166,7 @@ def max( See Also -------- numpy.max + dask.array.max DataArray.max :ref:`groupby` User guide on groupby operations. @@ -4110,6 +4206,7 @@ def min( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -4151,6 +4248,7 @@ def min( See Also -------- numpy.min + dask.array.min DataArray.min :ref:`groupby` User guide on groupby operations. @@ -4190,6 +4288,7 @@ def mean( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -4231,6 +4330,7 @@ def mean( See Also -------- numpy.mean + dask.array.mean DataArray.mean :ref:`groupby` User guide on groupby operations. @@ -4277,6 +4377,7 @@ def prod( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -4326,6 +4427,7 @@ def prod( See Also -------- numpy.prod + dask.array.prod DataArray.prod :ref:`groupby` User guide on groupby operations. @@ -4373,6 +4475,7 @@ def sum( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -4422,6 +4525,7 @@ def sum( See Also -------- numpy.sum + dask.array.sum DataArray.sum :ref:`groupby` User guide on groupby operations. @@ -4466,6 +4570,7 @@ def std( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -4515,6 +4620,7 @@ def std( See Also -------- numpy.std + dask.array.std DataArray.std :ref:`groupby` User guide on groupby operations. @@ -4559,6 +4665,7 @@ def var( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -4608,6 +4715,7 @@ def var( See Also -------- numpy.var + dask.array.var DataArray.var :ref:`groupby` User guide on groupby operations. @@ -4648,6 +4756,7 @@ def median( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -4689,6 +4798,7 @@ def median( See Also -------- numpy.median + dask.array.median DataArray.median :ref:`groupby` User guide on groupby operations. @@ -4726,6 +4836,7 @@ def count( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -4759,6 +4870,7 @@ def count( See Also -------- numpy.count + dask.array.count DataArray.count :ref:`resampling` User guide on resampling operations. @@ -4791,6 +4903,7 @@ def all( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -4824,6 +4937,7 @@ def all( See Also -------- numpy.all + dask.array.all DataArray.all :ref:`resampling` User guide on resampling operations. @@ -4856,6 +4970,7 @@ def any( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -4889,6 +5004,7 @@ def any( See Also -------- numpy.any + dask.array.any DataArray.any :ref:`resampling` User guide on resampling operations. @@ -4927,6 +5043,7 @@ def max( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -4968,6 +5085,7 @@ def max( See Also -------- numpy.max + dask.array.max DataArray.max :ref:`resampling` User guide on resampling operations. @@ -5007,6 +5125,7 @@ def min( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -5048,6 +5167,7 @@ def min( See Also -------- numpy.min + dask.array.min DataArray.min :ref:`resampling` User guide on resampling operations. @@ -5087,6 +5207,7 @@ def mean( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -5128,6 +5249,7 @@ def mean( See Also -------- numpy.mean + dask.array.mean DataArray.mean :ref:`resampling` User guide on resampling operations. @@ -5174,6 +5296,7 @@ def prod( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -5223,6 +5346,7 @@ def prod( See Also -------- numpy.prod + dask.array.prod DataArray.prod :ref:`resampling` User guide on resampling operations. @@ -5270,6 +5394,7 @@ def sum( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -5319,6 +5444,7 @@ def sum( See Also -------- numpy.sum + dask.array.sum DataArray.sum :ref:`resampling` User guide on resampling operations. @@ -5363,6 +5489,7 @@ def std( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -5412,6 +5539,7 @@ def std( See Also -------- numpy.std + dask.array.std DataArray.std :ref:`resampling` User guide on resampling operations. @@ -5456,6 +5584,7 @@ def var( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -5505,6 +5634,7 @@ def var( See Also -------- numpy.var + dask.array.var DataArray.var :ref:`resampling` User guide on resampling operations. @@ -5545,6 +5675,7 @@ def median( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -5586,6 +5717,7 @@ def median( See Also -------- numpy.median + dask.array.median DataArray.median :ref:`resampling` User guide on resampling operations. diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 69d1c7db529..f35f7d99847 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -76,7 +76,7 @@ def {method}( -------- numpy.{method} dask.array.{method} - {obj}.{method} + {see_also_obj}.{method} :ref:`{docref}` User guide on {docref_description}. """''' @@ -181,6 +181,7 @@ def __init__( docref, docref_description, example_call_preamble, + see_also_obj=None, ): self.datastructure = datastructure self.cls = cls @@ -189,6 +190,10 @@ def __init__( self.docref_description = docref_description self.example_call_preamble = example_call_preamble self.preamble = CLASS_PREAMBLE.format(obj=datastructure.name, cls=cls) + if not see_also_obj: + self.see_also_obj = self.datastructure.name + else: + self.see_also_obj = see_also_obj def generate_methods(self): yield [self.preamble] @@ -227,6 +232,7 @@ def generate_method(self, method): **template_kwargs, docref=self.docref, docref_description=self.docref_description, + see_also_obj=self.see_also_obj, ) yield self.generate_code(method) @@ -319,6 +325,7 @@ class DataStructure: docref="agg", docref_description="reduction or aggregation operations", example_call_preamble="", + see_also_obj="DataArray", ) DataArrayGenerator = ClassReductionGenerator( cls="", @@ -327,6 +334,7 @@ class DataStructure: docref="agg", docref_description="reduction or aggregation operations", example_call_preamble="", + see_also_obj="Dataset", ) DataArrayGroupByGenerator = ClassReductionGenerator( From 99bfe128066ec3ef1b297650a47e2dd0a45801a8 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 8 Nov 2021 12:44:23 -0700 Subject: [PATCH 032/138] Fixes #5898 --- xarray/core/_reductions.py | 144 +++++++++++++++++++++++++++++ xarray/util/generate_reductions.py | 9 ++ 2 files changed, 153 insertions(+) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 1d6d4f88970..458910ac308 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -496,6 +496,10 @@ def mean( Data variables: da float64 nan + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.mean @@ -597,6 +601,10 @@ def prod( Data variables: da float64 12.0 + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.prod @@ -699,6 +707,10 @@ def sum( Data variables: da float64 9.0 + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.sum @@ -798,6 +810,10 @@ def std( Data variables: da float64 0.8367 + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.std @@ -897,6 +913,10 @@ def var( Data variables: da float64 0.7 + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.var @@ -984,6 +1004,10 @@ def median( Data variables: da float64 nan + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.median @@ -1418,6 +1442,10 @@ def mean( array(nan) + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.mean @@ -1509,6 +1537,10 @@ def prod( array(12.) + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.prod @@ -1601,6 +1633,10 @@ def sum( array(9.) + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.sum @@ -1690,6 +1726,10 @@ def std( array(0.83666003) + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.std @@ -1779,6 +1819,10 @@ def var( array(0.7) + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.var @@ -1858,6 +1902,10 @@ def median( array(nan) + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.median @@ -2350,6 +2398,10 @@ def mean( Data variables: da (labels) float64 nan 2.0 2.0 + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.mean @@ -2457,6 +2509,10 @@ def prod( Data variables: da (labels) float64 nan 4.0 3.0 + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.prod @@ -2565,6 +2621,10 @@ def sum( Data variables: da (labels) float64 nan 4.0 4.0 + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.sum @@ -2670,6 +2730,10 @@ def std( Data variables: da (labels) float64 nan 0.0 1.414 + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.std @@ -2775,6 +2839,10 @@ def var( Data variables: da (labels) float64 nan 0.0 2.0 + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.var @@ -2866,6 +2934,10 @@ def median( Data variables: da (labels) float64 nan 2.0 2.0 + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.median @@ -3359,6 +3431,10 @@ def mean( Data variables: da (time) float64 1.0 2.0 nan + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.mean @@ -3466,6 +3542,10 @@ def prod( Data variables: da (time) float64 nan 6.0 nan + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.prod @@ -3574,6 +3654,10 @@ def sum( Data variables: da (time) float64 nan 6.0 nan + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.sum @@ -3679,6 +3763,10 @@ def std( Data variables: da (time) float64 nan 1.0 nan + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.std @@ -3784,6 +3872,10 @@ def var( Data variables: da (time) float64 nan 1.0 nan + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.var @@ -3875,6 +3967,10 @@ def median( Data variables: da (time) float64 1.0 2.0 nan + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.median @@ -4327,6 +4423,10 @@ def mean( Coordinates: * labels (labels) object 'a' 'b' 'c' + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.mean @@ -4424,6 +4524,10 @@ def prod( Coordinates: * labels (labels) object 'a' 'b' 'c' + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.prod @@ -4522,6 +4626,10 @@ def sum( Coordinates: * labels (labels) object 'a' 'b' 'c' + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.sum @@ -4617,6 +4725,10 @@ def std( Coordinates: * labels (labels) object 'a' 'b' 'c' + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.std @@ -4712,6 +4824,10 @@ def var( Coordinates: * labels (labels) object 'a' 'b' 'c' + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.var @@ -4795,6 +4911,10 @@ def median( Coordinates: * labels (labels) object 'a' 'b' 'c' + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.median @@ -5246,6 +5366,10 @@ def mean( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.mean @@ -5343,6 +5467,10 @@ def prod( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.prod @@ -5441,6 +5569,10 @@ def sum( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.sum @@ -5536,6 +5668,10 @@ def std( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.std @@ -5631,6 +5767,10 @@ def var( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.var @@ -5714,6 +5854,10 @@ def median( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Notes + ----- + Non-numeric variables will be removed prior to reducing. + See Also -------- numpy.median diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index f35f7d99847..c572a96dcf4 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -81,6 +81,11 @@ def {method}( User guide on {docref_description}. """''' +TEMPLATE_NOTES = """ + Notes + ----- + {notes}""" + _DIM_DOCSTRING = """dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``{method}``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions.""" @@ -118,6 +123,7 @@ def {method}( "cumsum", "cumprod", ] +_NUMERIC_ONLY_NOTES = "Non-numeric variables will be removed prior to reducing." extra_kwarg = collections.namedtuple("extra_kwarg", "docs kwarg call example") skipna = extra_kwarg( @@ -228,6 +234,9 @@ def generate_method(self, method): yield textwrap.indent(self.generate_example(method=method), "") + if method.numeric_only: + yield TEMPLATE_NOTES.format(notes=_NUMERIC_ONLY_NOTES) + yield TEMPLATE_SEE_ALSO.format( **template_kwargs, docref=self.docref, From 9bb2c321e8df2f5978c40a1c3c8f891c77e847ff Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 8 Nov 2021 13:56:53 -0700 Subject: [PATCH 033/138] Reorder docstring to match numpy --- xarray/util/generate_reductions.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index c572a96dcf4..f9a2d5e2db9 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -71,15 +71,14 @@ def {method}( New {obj} with ``{method}`` applied to its data and the indicated dimension(s) removed""" -TEMPLATE_SEE_ALSO = ''' +TEMPLATE_SEE_ALSO = """ See Also -------- numpy.{method} dask.array.{method} {see_also_obj}.{method} :ref:`{docref}` - User guide on {docref_description}. - """''' + User guide on {docref_description}.""" TEMPLATE_NOTES = """ Notes @@ -232,11 +231,6 @@ def generate_method(self, method): yield TEMPLATE_RETURNS.format(**template_kwargs) - yield textwrap.indent(self.generate_example(method=method), "") - - if method.numeric_only: - yield TEMPLATE_NOTES.format(notes=_NUMERIC_ONLY_NOTES) - yield TEMPLATE_SEE_ALSO.format( **template_kwargs, docref=self.docref, @@ -244,6 +238,13 @@ def generate_method(self, method): see_also_obj=self.see_also_obj, ) + if method.numeric_only: + yield TEMPLATE_NOTES.format(notes=_NUMERIC_ONLY_NOTES) + + yield textwrap.indent(self.generate_example(method=method), "") + + yield ' """' + yield self.generate_code(method) def generate_example(self, method): From dea8fd9f326a543807c26b6a62e84b28b5cb4cc3 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 8 Nov 2021 16:18:07 -0700 Subject: [PATCH 034/138] REfactor --- xarray/core/_reductions.py | 1344 ++++++++++++++-------------- xarray/util/generate_reductions.py | 16 +- 2 files changed, 681 insertions(+), 679 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 458910ac308..006f4e35a09 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -71,6 +71,14 @@ def count( New Dataset with ``count`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.count + dask.array.count + DataArray.count + :ref:`agg` + User guide on reduction or aggregation operations. + Examples -------- >>> da = xr.DataArray( @@ -96,14 +104,6 @@ def count( Dimensions: () Data variables: da int64 5 - - See Also - -------- - numpy.count - dask.array.count - DataArray.count - :ref:`agg` - User guide on reduction or aggregation operations. """ return self.reduce( duck_array_ops.count, @@ -142,6 +142,14 @@ def all( New Dataset with ``all`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.all + dask.array.all + DataArray.all + :ref:`agg` + User guide on reduction or aggregation operations. + Examples -------- >>> da = xr.DataArray( @@ -167,14 +175,6 @@ def all( Dimensions: () Data variables: da bool False - - See Also - -------- - numpy.all - dask.array.all - DataArray.all - :ref:`agg` - User guide on reduction or aggregation operations. """ return self.reduce( duck_array_ops.array_all, @@ -213,6 +213,14 @@ def any( New Dataset with ``any`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.any + dask.array.any + DataArray.any + :ref:`agg` + User guide on reduction or aggregation operations. + Examples -------- >>> da = xr.DataArray( @@ -238,14 +246,6 @@ def any( Dimensions: () Data variables: da bool True - - See Also - -------- - numpy.any - dask.array.any - DataArray.any - :ref:`agg` - User guide on reduction or aggregation operations. """ return self.reduce( duck_array_ops.array_any, @@ -290,6 +290,14 @@ def max( New Dataset with ``max`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.max + dask.array.max + DataArray.max + :ref:`agg` + User guide on reduction or aggregation operations. + Examples -------- >>> da = xr.DataArray( @@ -323,14 +331,6 @@ def max( Dimensions: () Data variables: da float64 nan - - See Also - -------- - numpy.max - dask.array.max - DataArray.max - :ref:`agg` - User guide on reduction or aggregation operations. """ return self.reduce( duck_array_ops.max, @@ -376,6 +376,14 @@ def min( New Dataset with ``min`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.min + dask.array.min + DataArray.min + :ref:`agg` + User guide on reduction or aggregation operations. + Examples -------- >>> da = xr.DataArray( @@ -409,14 +417,6 @@ def min( Dimensions: () Data variables: da float64 nan - - See Also - -------- - numpy.min - dask.array.min - DataArray.min - :ref:`agg` - User guide on reduction or aggregation operations. """ return self.reduce( duck_array_ops.min, @@ -462,6 +462,18 @@ def mean( New Dataset with ``mean`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.mean + dask.array.mean + DataArray.mean + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -495,18 +507,6 @@ def mean( Dimensions: () Data variables: da float64 nan - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.mean - dask.array.mean - DataArray.mean - :ref:`agg` - User guide on reduction or aggregation operations. """ return self.reduce( duck_array_ops.mean, @@ -559,6 +559,18 @@ def prod( New Dataset with ``prod`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.prod + dask.array.prod + DataArray.prod + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -600,18 +612,6 @@ def prod( Dimensions: () Data variables: da float64 12.0 - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.prod - dask.array.prod - DataArray.prod - :ref:`agg` - User guide on reduction or aggregation operations. """ return self.reduce( duck_array_ops.prod, @@ -665,6 +665,18 @@ def sum( New Dataset with ``sum`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.sum + dask.array.sum + DataArray.sum + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -706,18 +718,6 @@ def sum( Dimensions: () Data variables: da float64 9.0 - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.sum - dask.array.sum - DataArray.sum - :ref:`agg` - User guide on reduction or aggregation operations. """ return self.reduce( duck_array_ops.sum, @@ -768,6 +768,18 @@ def std( New Dataset with ``std`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.std + dask.array.std + DataArray.std + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -809,18 +821,6 @@ def std( Dimensions: () Data variables: da float64 0.8367 - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.std - dask.array.std - DataArray.std - :ref:`agg` - User guide on reduction or aggregation operations. """ return self.reduce( duck_array_ops.std, @@ -871,6 +871,18 @@ def var( New Dataset with ``var`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.var + dask.array.var + DataArray.var + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -912,18 +924,6 @@ def var( Dimensions: () Data variables: da float64 0.7 - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.var - dask.array.var - DataArray.var - :ref:`agg` - User guide on reduction or aggregation operations. """ return self.reduce( duck_array_ops.var, @@ -970,6 +970,18 @@ def median( New Dataset with ``median`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.median + dask.array.median + DataArray.median + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -1003,18 +1015,6 @@ def median( Dimensions: () Data variables: da float64 nan - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.median - dask.array.median - DataArray.median - :ref:`agg` - User guide on reduction or aggregation operations. """ return self.reduce( duck_array_ops.median, @@ -1058,6 +1058,14 @@ def count( New DataArray with ``count`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.count + dask.array.count + Dataset.count + :ref:`agg` + User guide on reduction or aggregation operations. + Examples -------- >>> da = xr.DataArray( @@ -1078,14 +1086,6 @@ def count( >>> da.count() array(5) - - See Also - -------- - numpy.count - dask.array.count - Dataset.count - :ref:`agg` - User guide on reduction or aggregation operations. """ return self.reduce( duck_array_ops.count, @@ -1123,6 +1123,14 @@ def all( New DataArray with ``all`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.all + dask.array.all + Dataset.all + :ref:`agg` + User guide on reduction or aggregation operations. + Examples -------- >>> da = xr.DataArray( @@ -1143,14 +1151,6 @@ def all( >>> da.all() array(False) - - See Also - -------- - numpy.all - dask.array.all - Dataset.all - :ref:`agg` - User guide on reduction or aggregation operations. """ return self.reduce( duck_array_ops.array_all, @@ -1188,6 +1188,14 @@ def any( New DataArray with ``any`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.any + dask.array.any + Dataset.any + :ref:`agg` + User guide on reduction or aggregation operations. + Examples -------- >>> da = xr.DataArray( @@ -1208,14 +1216,6 @@ def any( >>> da.any() array(True) - - See Also - -------- - numpy.any - dask.array.any - Dataset.any - :ref:`agg` - User guide on reduction or aggregation operations. """ return self.reduce( duck_array_ops.array_any, @@ -1259,6 +1259,14 @@ def max( New DataArray with ``max`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.max + dask.array.max + Dataset.max + :ref:`agg` + User guide on reduction or aggregation operations. + Examples -------- >>> da = xr.DataArray( @@ -1285,14 +1293,6 @@ def max( >>> da.max(skipna=False) array(nan) - - See Also - -------- - numpy.max - dask.array.max - Dataset.max - :ref:`agg` - User guide on reduction or aggregation operations. """ return self.reduce( duck_array_ops.max, @@ -1337,6 +1337,14 @@ def min( New DataArray with ``min`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.min + dask.array.min + Dataset.min + :ref:`agg` + User guide on reduction or aggregation operations. + Examples -------- >>> da = xr.DataArray( @@ -1363,14 +1371,6 @@ def min( >>> da.min(skipna=False) array(nan) - - See Also - -------- - numpy.min - dask.array.min - Dataset.min - :ref:`agg` - User guide on reduction or aggregation operations. """ return self.reduce( duck_array_ops.min, @@ -1415,6 +1415,18 @@ def mean( New DataArray with ``mean`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.mean + dask.array.mean + Dataset.mean + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -1441,18 +1453,6 @@ def mean( >>> da.mean(skipna=False) array(nan) - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.mean - dask.array.mean - Dataset.mean - :ref:`agg` - User guide on reduction or aggregation operations. """ return self.reduce( duck_array_ops.mean, @@ -1504,6 +1504,18 @@ def prod( New DataArray with ``prod`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.prod + dask.array.prod + Dataset.prod + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -1536,18 +1548,6 @@ def prod( >>> da.prod(skipna=True, min_count=2) array(12.) - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.prod - dask.array.prod - Dataset.prod - :ref:`agg` - User guide on reduction or aggregation operations. """ return self.reduce( duck_array_ops.prod, @@ -1600,6 +1600,18 @@ def sum( New DataArray with ``sum`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.sum + dask.array.sum + Dataset.sum + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -1632,18 +1644,6 @@ def sum( >>> da.sum(skipna=True, min_count=2) array(9.) - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.sum - dask.array.sum - Dataset.sum - :ref:`agg` - User guide on reduction or aggregation operations. """ return self.reduce( duck_array_ops.sum, @@ -1693,6 +1693,18 @@ def std( New DataArray with ``std`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.std + dask.array.std + Dataset.std + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -1725,18 +1737,6 @@ def std( >>> da.std(skipna=True, ddof=1) array(0.83666003) - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.std - dask.array.std - Dataset.std - :ref:`agg` - User guide on reduction or aggregation operations. """ return self.reduce( duck_array_ops.std, @@ -1786,6 +1786,18 @@ def var( New DataArray with ``var`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.var + dask.array.var + Dataset.var + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -1818,18 +1830,6 @@ def var( >>> da.var(skipna=True, ddof=1) array(0.7) - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.var - dask.array.var - Dataset.var - :ref:`agg` - User guide on reduction or aggregation operations. """ return self.reduce( duck_array_ops.var, @@ -1875,6 +1875,18 @@ def median( New DataArray with ``median`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.median + dask.array.median + Dataset.median + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -1901,18 +1913,6 @@ def median( >>> da.median(skipna=False) array(nan) - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.median - dask.array.median - Dataset.median - :ref:`agg` - User guide on reduction or aggregation operations. """ return self.reduce( duck_array_ops.median, @@ -1955,6 +1955,14 @@ def count( New Dataset with ``count`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.count + dask.array.count + Dataset.count + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -1982,14 +1990,6 @@ def count( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) int64 1 2 2 - - See Also - -------- - numpy.count - dask.array.count - Dataset.count - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.count, @@ -2028,6 +2028,14 @@ def all( New Dataset with ``all`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.all + dask.array.all + Dataset.all + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -2055,14 +2063,6 @@ def all( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) bool False True True - - See Also - -------- - numpy.all - dask.array.all - Dataset.all - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.array_all, @@ -2101,6 +2101,14 @@ def any( New Dataset with ``any`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.any + dask.array.any + Dataset.any + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -2128,14 +2136,6 @@ def any( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) bool True True True - - See Also - -------- - numpy.any - dask.array.any - Dataset.any - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.array_any, @@ -2180,6 +2180,14 @@ def max( New Dataset with ``max`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.max + dask.array.max + Dataset.max + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -2217,14 +2225,6 @@ def max( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) float64 nan 2.0 3.0 - - See Also - -------- - numpy.max - dask.array.max - Dataset.max - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.max, @@ -2270,6 +2270,14 @@ def min( New Dataset with ``min`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.min + dask.array.min + Dataset.min + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -2307,14 +2315,6 @@ def min( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) float64 nan 2.0 1.0 - - See Also - -------- - numpy.min - dask.array.min - Dataset.min - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.min, @@ -2360,6 +2360,18 @@ def mean( New Dataset with ``mean`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.mean + dask.array.mean + Dataset.mean + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -2397,18 +2409,6 @@ def mean( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) float64 nan 2.0 2.0 - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.mean - dask.array.mean - Dataset.mean - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.mean, @@ -2461,6 +2461,18 @@ def prod( New Dataset with ``prod`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.prod + dask.array.prod + Dataset.prod + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -2508,18 +2520,6 @@ def prod( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) float64 nan 4.0 3.0 - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.prod - dask.array.prod - Dataset.prod - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.prod, @@ -2573,6 +2573,18 @@ def sum( New Dataset with ``sum`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.sum + dask.array.sum + Dataset.sum + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -2620,18 +2632,6 @@ def sum( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) float64 nan 4.0 4.0 - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.sum - dask.array.sum - Dataset.sum - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.sum, @@ -2682,6 +2682,18 @@ def std( New Dataset with ``std`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.std + dask.array.std + Dataset.std + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -2729,18 +2741,6 @@ def std( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) float64 nan 0.0 1.414 - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.std - dask.array.std - Dataset.std - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.std, @@ -2791,6 +2791,18 @@ def var( New Dataset with ``var`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.var + dask.array.var + Dataset.var + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -2838,18 +2850,6 @@ def var( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) float64 nan 0.0 2.0 - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.var - dask.array.var - Dataset.var - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.var, @@ -2896,6 +2896,18 @@ def median( New Dataset with ``median`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.median + dask.array.median + Dataset.median + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -2933,18 +2945,6 @@ def median( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) float64 nan 2.0 2.0 - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.median - dask.array.median - Dataset.median - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.median, @@ -2988,6 +2988,14 @@ def count( New Dataset with ``count`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.count + dask.array.count + Dataset.count + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -3015,14 +3023,6 @@ def count( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) int64 1 3 1 - - See Also - -------- - numpy.count - dask.array.count - Dataset.count - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.count, @@ -3061,6 +3061,14 @@ def all( New Dataset with ``all`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.all + dask.array.all + Dataset.all + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -3088,14 +3096,6 @@ def all( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) bool True True False - - See Also - -------- - numpy.all - dask.array.all - Dataset.all - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.array_all, @@ -3134,6 +3134,14 @@ def any( New Dataset with ``any`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.any + dask.array.any + Dataset.any + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -3161,14 +3169,6 @@ def any( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) bool True True True - - See Also - -------- - numpy.any - dask.array.any - Dataset.any - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.array_any, @@ -3213,6 +3213,14 @@ def max( New Dataset with ``max`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.max + dask.array.max + Dataset.max + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -3250,14 +3258,6 @@ def max( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) float64 1.0 3.0 nan - - See Also - -------- - numpy.max - dask.array.max - Dataset.max - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.max, @@ -3303,6 +3303,14 @@ def min( New Dataset with ``min`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.min + dask.array.min + Dataset.min + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -3340,14 +3348,6 @@ def min( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) float64 1.0 1.0 nan - - See Also - -------- - numpy.min - dask.array.min - Dataset.min - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.min, @@ -3393,6 +3393,18 @@ def mean( New Dataset with ``mean`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.mean + dask.array.mean + Dataset.mean + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -3430,18 +3442,6 @@ def mean( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) float64 1.0 2.0 nan - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.mean - dask.array.mean - Dataset.mean - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.mean, @@ -3494,6 +3494,18 @@ def prod( New Dataset with ``prod`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.prod + dask.array.prod + Dataset.prod + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -3541,18 +3553,6 @@ def prod( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) float64 nan 6.0 nan - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.prod - dask.array.prod - Dataset.prod - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.prod, @@ -3606,6 +3606,18 @@ def sum( New Dataset with ``sum`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.sum + dask.array.sum + Dataset.sum + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -3653,18 +3665,6 @@ def sum( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) float64 nan 6.0 nan - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.sum - dask.array.sum - Dataset.sum - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.sum, @@ -3715,6 +3715,18 @@ def std( New Dataset with ``std`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.std + dask.array.std + Dataset.std + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -3762,18 +3774,6 @@ def std( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) float64 nan 1.0 nan - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.std - dask.array.std - Dataset.std - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.std, @@ -3824,6 +3824,18 @@ def var( New Dataset with ``var`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.var + dask.array.var + Dataset.var + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -3871,18 +3883,6 @@ def var( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) float64 nan 1.0 nan - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.var - dask.array.var - Dataset.var - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.var, @@ -3929,6 +3929,18 @@ def median( New Dataset with ``median`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.median + dask.array.median + Dataset.median + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -3966,18 +3978,6 @@ def median( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) float64 1.0 2.0 nan - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.median - dask.array.median - Dataset.median - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.median, @@ -4021,6 +4021,14 @@ def count( New DataArray with ``count`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.count + dask.array.count + DataArray.count + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -4043,14 +4051,6 @@ def count( array([1, 2, 2]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.count - dask.array.count - DataArray.count - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.count, @@ -4088,6 +4088,14 @@ def all( New DataArray with ``all`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.all + dask.array.all + DataArray.all + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -4110,14 +4118,6 @@ def all( array([False, True, True]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.all - dask.array.all - DataArray.all - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.array_all, @@ -4155,6 +4155,14 @@ def any( New DataArray with ``any`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.any + dask.array.any + DataArray.any + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -4177,14 +4185,6 @@ def any( array([ True, True, True]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.any - dask.array.any - DataArray.any - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.array_any, @@ -4228,6 +4228,14 @@ def max( New DataArray with ``max`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.max + dask.array.max + DataArray.max + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -4258,14 +4266,6 @@ def max( array([nan, 2., 3.]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.max - dask.array.max - DataArray.max - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.max, @@ -4310,6 +4310,14 @@ def min( New DataArray with ``min`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.min + dask.array.min + DataArray.min + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -4340,14 +4348,6 @@ def min( array([nan, 2., 1.]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.min - dask.array.min - DataArray.min - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.min, @@ -4392,6 +4392,18 @@ def mean( New DataArray with ``mean`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.mean + dask.array.mean + DataArray.mean + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -4422,18 +4434,6 @@ def mean( array([nan, 2., 2.]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.mean - dask.array.mean - DataArray.mean - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.mean, @@ -4485,6 +4485,18 @@ def prod( New DataArray with ``prod`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.prod + dask.array.prod + DataArray.prod + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -4523,18 +4535,6 @@ def prod( array([nan, 4., 3.]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.prod - dask.array.prod - DataArray.prod - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.prod, @@ -4587,6 +4587,18 @@ def sum( New DataArray with ``sum`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.sum + dask.array.sum + DataArray.sum + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -4625,18 +4637,6 @@ def sum( array([nan, 4., 4.]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.sum - dask.array.sum - DataArray.sum - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.sum, @@ -4686,6 +4686,18 @@ def std( New DataArray with ``std`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.std + dask.array.std + DataArray.std + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -4724,18 +4736,6 @@ def std( array([ nan, 0. , 1.41421356]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.std - dask.array.std - DataArray.std - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.std, @@ -4785,6 +4785,18 @@ def var( New DataArray with ``var`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.var + dask.array.var + DataArray.var + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -4823,18 +4835,6 @@ def var( array([nan, 0., 2.]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.var - dask.array.var - DataArray.var - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.var, @@ -4880,6 +4880,18 @@ def median( New DataArray with ``median`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.median + dask.array.median + DataArray.median + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -4910,18 +4922,6 @@ def median( array([nan, 2., 2.]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.median - dask.array.median - DataArray.median - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.median, @@ -4964,6 +4964,14 @@ def count( New DataArray with ``count`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.count + dask.array.count + DataArray.count + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -4986,14 +4994,6 @@ def count( array([1, 3, 1]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.count - dask.array.count - DataArray.count - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.count, @@ -5031,6 +5031,14 @@ def all( New DataArray with ``all`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.all + dask.array.all + DataArray.all + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -5053,14 +5061,6 @@ def all( array([ True, True, False]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.all - dask.array.all - DataArray.all - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.array_all, @@ -5098,6 +5098,14 @@ def any( New DataArray with ``any`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.any + dask.array.any + DataArray.any + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -5120,14 +5128,6 @@ def any( array([ True, True, True]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.any - dask.array.any - DataArray.any - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.array_any, @@ -5171,6 +5171,14 @@ def max( New DataArray with ``max`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.max + dask.array.max + DataArray.max + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -5201,14 +5209,6 @@ def max( array([ 1., 3., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.max - dask.array.max - DataArray.max - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.max, @@ -5253,6 +5253,14 @@ def min( New DataArray with ``min`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.min + dask.array.min + DataArray.min + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -5283,14 +5291,6 @@ def min( array([ 1., 1., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.min - dask.array.min - DataArray.min - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.min, @@ -5335,6 +5335,18 @@ def mean( New DataArray with ``mean`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.mean + dask.array.mean + DataArray.mean + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -5365,18 +5377,6 @@ def mean( array([ 1., 2., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.mean - dask.array.mean - DataArray.mean - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.mean, @@ -5428,6 +5428,18 @@ def prod( New DataArray with ``prod`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.prod + dask.array.prod + DataArray.prod + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -5466,18 +5478,6 @@ def prod( array([nan, 6., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.prod - dask.array.prod - DataArray.prod - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.prod, @@ -5530,6 +5530,18 @@ def sum( New DataArray with ``sum`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.sum + dask.array.sum + DataArray.sum + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -5568,18 +5580,6 @@ def sum( array([nan, 6., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.sum - dask.array.sum - DataArray.sum - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.sum, @@ -5629,6 +5629,18 @@ def std( New DataArray with ``std`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.std + dask.array.std + DataArray.std + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -5667,18 +5679,6 @@ def std( array([nan, 1., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.std - dask.array.std - DataArray.std - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.std, @@ -5728,6 +5728,18 @@ def var( New DataArray with ``var`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.var + dask.array.var + DataArray.var + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -5766,18 +5778,6 @@ def var( array([nan, 1., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.var - dask.array.var - DataArray.var - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.var, @@ -5823,6 +5823,18 @@ def median( New DataArray with ``median`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.median + dask.array.median + DataArray.median + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -5853,18 +5865,6 @@ def median( array([ 1., 2., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - Notes - ----- - Non-numeric variables will be removed prior to reducing. - - See Also - -------- - numpy.median - dask.array.median - DataArray.median - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.median, diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index f9a2d5e2db9..fcb5a573c96 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -177,7 +177,7 @@ def __init__( ... np.array([1, 2, 3, 1, 2, np.nan]),""" -class ClassReductionGenerator: +class ReductionGenerator: def __init__( self, cls, @@ -271,6 +271,8 @@ def generate_example(self, method): >>> {calculation}(){extra_examples}""" + +class GenericReductionGenerator(ReductionGenerator): def generate_code(self, method): extra_kwargs = [kwarg.call for kwarg in method.extra_kwargs if kwarg.call] @@ -328,7 +330,7 @@ class DataStructure: numeric_only=False, ) -DatasetGenerator = ClassReductionGenerator( +DatasetGenerator = GenericReductionGenerator( cls="", datastructure=DatasetObject, methods=REDUCTION_METHODS, @@ -337,7 +339,7 @@ class DataStructure: example_call_preamble="", see_also_obj="DataArray", ) -DataArrayGenerator = ClassReductionGenerator( +DataArrayGenerator = GenericReductionGenerator( cls="", datastructure=DataArrayObject, methods=REDUCTION_METHODS, @@ -347,7 +349,7 @@ class DataStructure: see_also_obj="Dataset", ) -DataArrayGroupByGenerator = ClassReductionGenerator( +DataArrayGroupByGenerator = GenericReductionGenerator( cls="GroupBy", datastructure=DataArrayObject, methods=REDUCTION_METHODS, @@ -355,7 +357,7 @@ class DataStructure: docref_description="groupby operations", example_call_preamble='.groupby("labels")', ) -DataArrayResampleGenerator = ClassReductionGenerator( +DataArrayResampleGenerator = GenericReductionGenerator( cls="Resample", datastructure=DataArrayObject, methods=REDUCTION_METHODS, @@ -363,7 +365,7 @@ class DataStructure: docref_description="resampling operations", example_call_preamble='.resample(time="3M")', ) -DatasetGroupByGenerator = ClassReductionGenerator( +DatasetGroupByGenerator = GenericReductionGenerator( cls="GroupBy", datastructure=DatasetObject, methods=REDUCTION_METHODS, @@ -371,7 +373,7 @@ class DataStructure: docref_description="groupby operations", example_call_preamble='.groupby("labels")', ) -DatasetResampleGenerator = ClassReductionGenerator( +DatasetResampleGenerator = GenericReductionGenerator( cls="Resample", datastructure=DatasetObject, methods=REDUCTION_METHODS, From f06e6a7d5ec6575df9ac6772c6092dfa81980ae8 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 8 Nov 2021 16:24:11 -0700 Subject: [PATCH 035/138] Revert "Separate out median" This reverts commit 932b9a5d668278019bbd75ea23582ff28a463b91. --- xarray/core/groupby.py | 172 --------------------------------------- xarray/core/resample.py | 173 ---------------------------------------- 2 files changed, 345 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 894127038f5..24656104138 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -1,6 +1,5 @@ import datetime import warnings -from typing import Hashable, Sequence, Union import numpy as np import pandas as pd @@ -13,7 +12,6 @@ from .indexes import propagate_indexes from .options import _get_keep_attrs from .pycompat import integer_types -from .types import T_DataArray, T_Dataset from .utils import ( either_dict_or_kwargs, hashable, @@ -945,87 +943,6 @@ def reduce_array(ar): class DataArrayGroupBy(DataArrayGroupByBase, DataArrayGroupByReductions): __slots__ = () - def median( - self, - dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, - keep_attrs: bool = None, - **kwargs, - ) -> T_DataArray: - """ - Reduce this DataArray's data by applying ``median`` along some dimension(s). - - Parameters - ---------- - dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False (default), the new object will be - returned without attributes. - **kwargs : dict - Additional keyword arguments passed on to the appropriate array - function for calculating ``median`` on this object's data. - - Returns - ------- - reduced : DataArray - New DataArray with ``median`` applied to its data and the - indicated dimension(s) removed - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").median() - - array([1., 2., 2.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.groupby("labels").median(skipna=False) - - array([nan, 2., 2.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.median - DataArray.median - :ref:`groupby` - User guide on groupby operations. - """ - return self.reduce( - duck_array_ops.median, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) - class DatasetGroupByBase(GroupBy, DatasetGroupbyArithmetic): @@ -1148,92 +1065,3 @@ def assign(self, **kwargs): class DatasetGroupBy(DatasetGroupByBase, DatasetGroupByReductions): __slots__ = () - - def median( - self, - dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, - keep_attrs: bool = None, - **kwargs, - ) -> T_Dataset: - """ - Reduce this Dataset's data by applying ``median`` along some dimension(s). - - Parameters - ---------- - dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False (default), the new object will be - returned without attributes. - **kwargs : dict - Additional keyword arguments passed on to the appropriate array - function for calculating ``median`` on this object's data. - - Returns - ------- - reduced : Dataset - New Dataset with ``median`` applied to its data and the - indicated dimension(s) removed - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").median() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 1.0 2.0 2.0 - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.groupby("labels").median(skipna=False) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 2.0 2.0 - - See Also - -------- - numpy.median - Dataset.median - :ref:`groupby` - User guide on groupby operations. - """ - return self.reduce( - duck_array_ops.median, - dim=dim, - skipna=skipna, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) diff --git a/xarray/core/resample.py b/xarray/core/resample.py index 8ab2f499e87..4e1579ca109 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -1,10 +1,7 @@ import warnings -from typing import Hashable, Sequence, Union -from . import duck_array_ops from ._reductions import DataArrayResampleReductions, DatasetResampleReductions from .groupby import DataArrayGroupByBase, DatasetGroupByBase -from .types import T_DataArray, T_Dataset RESAMPLE_DIM = "__resample_dim__" @@ -180,87 +177,6 @@ def __init__(self, *args, dim=None, resample_dim=None, **kwargs): self._dask_groupby_kwargs = dict(method="blockwise") - def median( - self, - dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, - keep_attrs: bool = None, - **kwargs, - ) -> T_DataArray: - """ - Reduce this DataArray's data by applying ``median`` along some dimension(s). - - Parameters - ---------- - dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False (default), the new object will be - returned without attributes. - **kwargs : dict - Additional keyword arguments passed on to the appropriate array - function for calculating ``median`` on this object's data. - - Returns - ------- - reduced : DataArray - New DataArray with ``median`` applied to its data and the - indicated dimension(s) removed - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").median() - - array([1., 2., 2.]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - Use ``skipna`` to control whether NaNs are ignored. - - >>> da.resample(time="3M").median(skipna=False) - - array([ 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.median - DataArray.median - :ref:`resampling` - User guide on resampling operations. - """ - return self.reduce( - duck_array_ops.median, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) - def map(self, func, shortcut=False, args=(), **kwargs): """Apply a function to each array in the group and concatenate them together into a new array. @@ -351,95 +267,6 @@ def __init__(self, *args, dim=None, resample_dim=None, **kwargs): super().__init__(*args, **kwargs) self._dask_groupby_kwargs = dict(method="blockwise") - def median( - self, - dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, - keep_attrs: bool = None, - **kwargs, - ) -> T_Dataset: - """ - Reduce this Dataset's data by applying ``median`` along some dimension(s). - - Parameters - ---------- - dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False (default), the new object will be - returned without attributes. - **kwargs : dict - Additional keyword arguments passed on to the appropriate array - function for calculating ``median`` on this object's data. - - Returns - ------- - reduced : Dataset - New Dataset with ``median`` applied to its data and the - indicated dimension(s) removed - - Examples - -------- - >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan]), - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... ) - >>> ds = xr.Dataset(dict(da=da)) - >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").median() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 2.0 2.0 - - Use ``skipna`` to control whether NaNs are ignored. - - >>> ds.resample(time="3M").median(skipna=False) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 2.0 nan - - See Also - -------- - numpy.median - Dataset.median - :ref:`resampling` - User guide on resampling operations. - """ - return self.reduce( - duck_array_ops.median, - dim=dim, - skipna=skipna, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) - def map(self, func, args=(), shortcut=None, **kwargs): """Apply a function over each Dataset in the groups generated for resampling and concatenate them together into a new Dataset. From 0661c1b667436ca5fde59cd987a1a2db94d9a75e Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 8 Nov 2021 16:40:20 -0700 Subject: [PATCH 036/138] Refactored generator --- xarray/util/generate_reductions.py | 522 +++++++++++++++++------------ 1 file changed, 304 insertions(+), 218 deletions(-) diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 66b51dbe1bb..fcb5a573c96 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -12,11 +12,9 @@ while replacing the doctests. """ - import collections import textwrap -from functools import partial -from typing import Callable, Optional +from dataclasses import dataclass MODULE_PREAMBLE = '''\ """Mixin classes with reduction operations.""" @@ -26,19 +24,12 @@ from typing import Any, Callable, Hashable, Optional, Sequence, Union from . import duck_array_ops -from .options import OPTIONS from .types import T_DataArray, T_Dataset if sys.version_info >= (3, 8): from typing import Protocol else: - from typing_extensions import Protocol - - -try: - import dask_groupby -except ImportError: - dask_groupby = None''' + from typing_extensions import Protocol''' OBJ_PREAMBLE = """ @@ -60,124 +51,205 @@ def reduce( class {obj}{cls}Reductions: __slots__ = ()""" -_SKIPNA_DOCSTRING = """ -skipna : bool, optional +TEMPLATE_REDUCTION_SIGNATURE = ''' + def {method}( + self: {obj}Reduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None,{extra_kwargs} + keep_attrs: bool = None, + **kwargs, + ) -> T_{obj}: + """ + Reduce this {obj}'s data by applying ``{method}`` along some dimension(s). + + Parameters + ----------''' + +TEMPLATE_RETURNS = """ + Returns + ------- + reduced : {obj} + New {obj} with ``{method}`` applied to its data and the + indicated dimension(s) removed""" + +TEMPLATE_SEE_ALSO = """ + See Also + -------- + numpy.{method} + dask.array.{method} + {see_also_obj}.{method} + :ref:`{docref}` + User guide on {docref_description}.""" + +TEMPLATE_NOTES = """ + Notes + ----- + {notes}""" + +_DIM_DOCSTRING = """dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``{method}``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions.""" + +_SKIPNA_DOCSTRING = """skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64).""" -_MINCOUNT_DOCSTRING = """ -min_count : int, default: None +_MINCOUNT_DOCSTRING = """min_count : int, default: None The required number of valid values to perform the operation. If fewer than min_count non-NA values are present the result will be NA. Only used if skipna is set to True or defaults to True for the array's dtype. Changed in version 0.17.0: if specified on an integer array and skipna=True, the result will be a float array.""" +_DDOF_DOCSTRING = """ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements.""" + +_KEEP_ATTRS_DOCSTRING = """keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes.""" + +_KWARGS_DOCSTRING = """**kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``{method}`` on this object's data. + These could include dask-specific kwargs like ``split_every``.""" -BOOL_REDUCE_METHODS = ["all", "any"] -NAN_REDUCE_METHODS = [ - "max", - "min", - "mean", - "prod", - "sum", - "std", - "var", -] NAN_CUM_METHODS = ["cumsum", "cumprod"] -MIN_COUNT_METHODS = ["prod", "sum"] + NUMERIC_ONLY_METHODS = [ - "mean", - "std", - "var", - "sum", - "prod", - "median", "cumsum", "cumprod", ] +_NUMERIC_ONLY_NOTES = "Non-numeric variables will be removed prior to reducing." + +extra_kwarg = collections.namedtuple("extra_kwarg", "docs kwarg call example") +skipna = extra_kwarg( + docs=_SKIPNA_DOCSTRING, + kwarg="skipna: bool = None,", + call="skipna=skipna,", + example="""\n + Use ``skipna`` to control whether NaNs are ignored. -TEMPLATE_REDUCTION = ''' - def {method}( - self: {obj}Reduce, - dim: Union[None, Hashable, Sequence[Hashable]] = None,{skip_na.kwarg}{min_count.kwarg} - keep_attrs: bool = None, - fill_value=None, - **kwargs, - ) -> T_{obj}: - """ - Reduce this {obj}'s data by applying ``{method}`` along some dimension(s). + >>> {calculation}(skipna=False)""", +) +min_count = extra_kwarg( + docs=_MINCOUNT_DOCSTRING, + kwarg="min_count: Optional[int] = None,", + call="min_count=min_count,", + example="""\n + Specify ``min_count`` for finer control over when NaNs are ignored. - Parameters - ---------- - dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``{method}``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. {extra_dim}{extra_args}{skip_na.docs}{min_count.docs} - keep_attrs : bool, optional - If True, ``attrs`` will be copied from the original - object to the new one. If False (default), the new object will be - returned without attributes. - **kwargs : dict - Additional keyword arguments passed on to the appropriate array - function for calculating ``{method}`` on this object's data. + >>> {calculation}(skipna=True, min_count=2)""", +) +ddof = extra_kwarg( + docs=_DDOF_DOCSTRING, + kwarg="ddof: int = 0,", + call="ddof=ddof,", + example="""\n + Specify ``ddof=1`` for an unbiased estimate. - Returns - ------- - reduced : {obj} - New {obj} with ``{method}`` applied to its data and the - indicated dimension(s) removed + >>> {calculation}(skipna=True, ddof=1)""", +) - Examples - --------{example} - See Also - -------- - numpy.{method} - {obj}.{method} - :ref:`{docref}` - User guide on {docref} operations. - """ +class Method: + def __init__( + self, + name, + bool_reduce=False, + extra_kwargs=tuple(), + numeric_only=False, + ): + self.name = name + self.extra_kwargs = extra_kwargs + self.numeric_only = numeric_only + + if bool_reduce: + self.array_method = f"array_{name}" + self.np_example_array = """ + ... np.array([True, True, True, True, True, False], dtype=bool),""" - if dask_groupby and OPTIONS["use_numpy_groupies"]: - return self._dask_groupby_reduce( - func="{method}", - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs,{skip_na.call}{min_count.call} - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + else: + self.array_method = name + self.np_example_array = """ + ... np.array([1, 2, 3, 1, 2, np.nan]),""" + + +class ReductionGenerator: + def __init__( + self, + cls, + datastructure, + methods, + docref, + docref_description, + example_call_preamble, + see_also_obj=None, + ): + self.datastructure = datastructure + self.cls = cls + self.methods = methods + self.docref = docref + self.docref_description = docref_description + self.example_call_preamble = example_call_preamble + self.preamble = CLASS_PREAMBLE.format(obj=datastructure.name, cls=cls) + if not see_also_obj: + self.see_also_obj = self.datastructure.name + else: + self.see_also_obj = see_also_obj + + def generate_methods(self): + yield [self.preamble] + for method in self.methods: + yield self.generate_method(method) + + def generate_method(self, method): + template_kwargs = dict(obj=self.datastructure.name, method=method.name) + + if method.extra_kwargs: + extra_kwargs = "\n " + "\n ".join( + [kwarg.kwarg for kwarg in method.extra_kwargs if kwarg.kwarg] ) else: - return self.reduce( - duck_array_ops.{array_method}, - dim=dim,{skip_na.call}{min_count.call}{numeric_only_call} - keep_attrs=keep_attrs, - **kwargs, - )''' - - -def generate_groupby_example(obj: str, cls: str, method: str): - """Generate examples for method.""" - dx = "ds" if obj == "Dataset" else "da" - if cls == "Resample": - calculation = f'{dx}.resample(time="3M").{method}' - elif cls == "GroupBy": - calculation = f'{dx}.groupby("labels").{method}' - else: - raise ValueError - - if method in BOOL_REDUCE_METHODS: - np_array = """ - ... np.array([True, True, True, True, True, False], dtype=bool),""" + extra_kwargs = "" - else: - np_array = """ - ... np.array([1, 2, 3, 1, 2, np.nan]),""" + yield TEMPLATE_REDUCTION_SIGNATURE.format( + **template_kwargs, + extra_kwargs=extra_kwargs, + ) - create_da = f""" - >>> da = xr.DataArray({np_array} + for text in [ + _DIM_DOCSTRING.format(method=method.name), + *(kwarg.docs for kwarg in method.extra_kwargs if kwarg.docs), + _KEEP_ATTRS_DOCSTRING, + _KWARGS_DOCSTRING.format(method=method.name), + ]: + if text: + yield textwrap.indent(text, 8 * " ") + + yield TEMPLATE_RETURNS.format(**template_kwargs) + + yield TEMPLATE_SEE_ALSO.format( + **template_kwargs, + docref=self.docref, + docref_description=self.docref_description, + see_also_obj=self.see_also_obj, + ) + + if method.numeric_only: + yield TEMPLATE_NOTES.format(notes=_NUMERIC_ONLY_NOTES) + + yield textwrap.indent(self.generate_example(method=method), "") + + yield ' """' + + yield self.generate_code(method) + + def generate_example(self, method): + create_da = f""" + >>> da = xr.DataArray({method.np_example_array} ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), @@ -185,130 +257,144 @@ def generate_groupby_example(obj: str, cls: str, method: str): ... ), ... )""" - if obj == "Dataset": - maybe_dataset = """ - >>> ds = xr.Dataset(dict(da=da)) - >>> ds""" - else: - maybe_dataset = """ - >>> da""" - - if method in NAN_REDUCE_METHODS: - maybe_skipna = f""" - - Use ``skipna`` to control whether NaNs are ignored. - - >>> {calculation}(skipna=False)""" - else: - maybe_skipna = "" - - if method in MIN_COUNT_METHODS: - maybe_mincount = f""" - - Specify ``min_count`` for finer control over when NaNs are ignored. + calculation = f"{self.datastructure.example_var_name}{self.example_call_preamble}.{method.name}" + if method.extra_kwargs: + extra_examples = "".join( + kwarg.example for kwarg in method.extra_kwargs if kwarg.example + ).format(calculation=calculation, method=method.name) + else: + extra_examples = "" - >>> {calculation}(skipna=True, min_count=2)""" - else: - maybe_mincount = "" + return f""" + Examples + --------{create_da}{self.datastructure.docstring_create} - return f"""{create_da}{maybe_dataset} + >>> {calculation}(){extra_examples}""" - >>> {calculation}(){maybe_skipna}{maybe_mincount}""" +class GenericReductionGenerator(ReductionGenerator): + def generate_code(self, method): + extra_kwargs = [kwarg.call for kwarg in method.extra_kwargs if kwarg.call] -def generate_method( - obj: str, - docref: str, - method: str, - skipna: bool, - example_generator: Callable, - array_method: Optional[str] = None, -): - if not array_method: - array_method = method + if self.datastructure.numeric_only: + extra_kwargs.append(f"numeric_only={method.numeric_only},") - if obj == "Dataset": - if method in NUMERIC_ONLY_METHODS: - numeric_only_call = "\n numeric_only=True," + if extra_kwargs: + extra_kwargs = "\n " + "\n ".join(extra_kwargs) else: - numeric_only_call = "\n numeric_only=False," - else: - numeric_only_call = "" - - kwarg = collections.namedtuple("kwarg", "docs kwarg call") - if skipna: - skip_na = kwarg( - docs=textwrap.indent(_SKIPNA_DOCSTRING, " "), - kwarg="\n skipna: bool = True,", - call="\n skipna=skipna,", - ) - else: - skip_na = kwarg(docs="", kwarg="", call="") - - if method in MIN_COUNT_METHODS: - min_count = kwarg( - docs=textwrap.indent(_MINCOUNT_DOCSTRING, " "), - kwarg="\n min_count: Optional[int] = None,", - call="\n min_count=min_count,", - ) - else: - min_count = kwarg(docs="", kwarg="", call="") - - return TEMPLATE_REDUCTION.format( - obj=obj, - docref=docref, - method=method, - array_method=array_method, - extra_dim="""If ``None``, will reduce over all dimensions - present in the grouped variable.""", - extra_args="", - skip_na=skip_na, - min_count=min_count, - numeric_only_call=numeric_only_call, - example=example_generator(obj=obj, method=method), - ) - - -def render(obj: str, cls: str, docref: str, example_generator: Callable): - yield CLASS_PREAMBLE.format(obj=obj, cls=cls) - yield generate_method( - obj, - method="count", - docref=docref, - skipna=False, - example_generator=example_generator, - ) - for method in BOOL_REDUCE_METHODS: - yield generate_method( - obj, - method=method, - docref=docref, - skipna=False, - array_method=f"array_{method}", - example_generator=example_generator, - ) - for method in NAN_REDUCE_METHODS: - yield generate_method( - obj, - method=method, - docref=docref, - skipna=True, - example_generator=example_generator, - ) + extra_kwargs = "" + return f""" return self.reduce( + duck_array_ops.{method.array_method}, + dim=dim,{extra_kwargs} + keep_attrs=keep_attrs, + **kwargs, + )""" + + +REDUCTION_METHODS = ( + Method("count"), + Method("all", bool_reduce=True), + Method("any", bool_reduce=True), + Method("max", extra_kwargs=(skipna,)), + Method("min", extra_kwargs=(skipna,)), + Method("mean", extra_kwargs=(skipna,), numeric_only=True), + Method("prod", extra_kwargs=(skipna, min_count), numeric_only=True), + Method("sum", extra_kwargs=(skipna, min_count), numeric_only=True), + Method("std", extra_kwargs=(skipna, ddof), numeric_only=True), + Method("var", extra_kwargs=(skipna, ddof), numeric_only=True), + Method("median", extra_kwargs=(skipna,), numeric_only=True), +) + + +@dataclass +class DataStructure: + name: str + docstring_create: str + example_var_name: str + numeric_only: bool = False + + +DatasetObject = DataStructure( + name="Dataset", + docstring_create=""" + >>> ds = xr.Dataset(dict(da=da)) + >>> ds""", + example_var_name="ds", + numeric_only=True, +) +DataArrayObject = DataStructure( + name="DataArray", + docstring_create=""" + >>> da""", + example_var_name="da", + numeric_only=False, +) + +DatasetGenerator = GenericReductionGenerator( + cls="", + datastructure=DatasetObject, + methods=REDUCTION_METHODS, + docref="agg", + docref_description="reduction or aggregation operations", + example_call_preamble="", + see_also_obj="DataArray", +) +DataArrayGenerator = GenericReductionGenerator( + cls="", + datastructure=DataArrayObject, + methods=REDUCTION_METHODS, + docref="agg", + docref_description="reduction or aggregation operations", + example_call_preamble="", + see_also_obj="Dataset", +) + +DataArrayGroupByGenerator = GenericReductionGenerator( + cls="GroupBy", + datastructure=DataArrayObject, + methods=REDUCTION_METHODS, + docref="groupby", + docref_description="groupby operations", + example_call_preamble='.groupby("labels")', +) +DataArrayResampleGenerator = GenericReductionGenerator( + cls="Resample", + datastructure=DataArrayObject, + methods=REDUCTION_METHODS, + docref="resampling", + docref_description="resampling operations", + example_call_preamble='.resample(time="3M")', +) +DatasetGroupByGenerator = GenericReductionGenerator( + cls="GroupBy", + datastructure=DatasetObject, + methods=REDUCTION_METHODS, + docref="groupby", + docref_description="groupby operations", + example_call_preamble='.groupby("labels")', +) +DatasetResampleGenerator = GenericReductionGenerator( + cls="Resample", + datastructure=DatasetObject, + methods=REDUCTION_METHODS, + docref="resampling", + docref_description="resampling operations", + example_call_preamble='.resample(time="3M")', +) if __name__ == "__main__": print(MODULE_PREAMBLE) - for obj in ["Dataset", "DataArray"]: - print(OBJ_PREAMBLE.format(obj=obj)) - for cls, docref in ( - ("GroupBy", "groupby"), - ("Resample", "resampling"), - ): - for line in render( - obj=obj, - cls=cls, - docref=docref, - example_generator=partial(generate_groupby_example, cls=cls), - ): + print(OBJ_PREAMBLE.format(obj="Dataset")) + print(OBJ_PREAMBLE.format(obj="DataArray")) + for gen in [ + DatasetGenerator, + DataArrayGenerator, + DatasetGroupByGenerator, + DatasetResampleGenerator, + DataArrayGroupByGenerator, + DataArrayResampleGenerator, + ]: + for lines in gen.generate_methods(): + for line in lines: print(line) From 0c35c0c67dcd0852b08d4408c45e3790fc9b000e Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 8 Nov 2021 16:41:26 -0700 Subject: [PATCH 037/138] Reimplemented --- xarray/core/_reductions.py | 4129 +++++++++++++++++++++------- xarray/core/groupby.py | 3 + xarray/util/generate_reductions.py | 45 +- 3 files changed, 3235 insertions(+), 942 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index b221038da61..bb8a84156e2 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -33,14 +33,26 @@ def reduce( ... -class DatasetGroupByReductions: +class DataArrayReduce(Protocol): + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> T_DataArray: + ... + + +class DatasetReductions: __slots__ = () def count( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -50,8 +62,7 @@ def count( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -59,6 +70,7 @@ def count( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -66,6 +78,14 @@ def count( New Dataset with ``count`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.count + dask.array.count + DataArray.count + :ref:`agg` + User guide on reduction or aggregation operations. + Examples -------- >>> da = xr.DataArray( @@ -86,45 +106,24 @@ def count( Data variables: da (time) float64 1.0 2.0 3.0 1.0 2.0 nan - >>> ds.groupby("labels").count() + >>> ds.count() - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' + Dimensions: () Data variables: - da (labels) int64 1 2 2 - - See Also - -------- - numpy.count - Dataset.count - :ref:`groupby` - User guide on groupby operations. + da int64 5 """ - - if dask_groupby and OPTIONS["use_numpy_groupies"]: - return self._dask_groupby_reduce( - func="count", - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, - ) - else: - return self.reduce( - duck_array_ops.count, - dim=dim, - numeric_only=False, - keep_attrs=keep_attrs, - **kwargs, - ) + return self.reduce( + duck_array_ops.count, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) def all( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -134,8 +133,7 @@ def all( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -143,6 +141,7 @@ def all( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -150,6 +149,14 @@ def all( New Dataset with ``all`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.all + dask.array.all + DataArray.all + :ref:`agg` + User guide on reduction or aggregation operations. + Examples -------- >>> da = xr.DataArray( @@ -170,45 +177,24 @@ def all( Data variables: da (time) bool True True True True True False - >>> ds.groupby("labels").all() + >>> ds.all() - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' + Dimensions: () Data variables: - da (labels) bool False True True - - See Also - -------- - numpy.all - Dataset.all - :ref:`groupby` - User guide on groupby operations. + da bool False """ - - if dask_groupby and OPTIONS["use_numpy_groupies"]: - return self._dask_groupby_reduce( - func="all", - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, - ) - else: - return self.reduce( - duck_array_ops.array_all, - dim=dim, - numeric_only=False, - keep_attrs=keep_attrs, - **kwargs, - ) + return self.reduce( + duck_array_ops.array_all, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) def any( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -218,8 +204,7 @@ def any( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -227,6 +212,7 @@ def any( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -234,6 +220,14 @@ def any( New Dataset with ``any`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.any + dask.array.any + DataArray.any + :ref:`agg` + User guide on reduction or aggregation operations. + Examples -------- >>> da = xr.DataArray( @@ -254,46 +248,25 @@ def any( Data variables: da (time) bool True True True True True False - >>> ds.groupby("labels").any() + >>> ds.any() - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' + Dimensions: () Data variables: - da (labels) bool True True True - - See Also - -------- - numpy.any - Dataset.any - :ref:`groupby` - User guide on groupby operations. + da bool True """ - - if dask_groupby and OPTIONS["use_numpy_groupies"]: - return self._dask_groupby_reduce( - func="any", - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, - ) - else: - return self.reduce( - duck_array_ops.array_any, - dim=dim, - numeric_only=False, - keep_attrs=keep_attrs, - **kwargs, - ) + return self.reduce( + duck_array_ops.array_any, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) def max( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -303,12 +276,11 @@ def max( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -317,6 +289,7 @@ def max( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -324,6 +297,14 @@ def max( New Dataset with ``max`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.max + dask.array.max + DataArray.max + :ref:`agg` + User guide on reduction or aggregation operations. + Examples -------- >>> da = xr.DataArray( @@ -344,58 +325,34 @@ def max( Data variables: da (time) float64 1.0 2.0 3.0 1.0 2.0 nan - >>> ds.groupby("labels").max() + >>> ds.max() - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' + Dimensions: () Data variables: - da (labels) float64 1.0 2.0 3.0 + da float64 3.0 Use ``skipna`` to control whether NaNs are ignored. - >>> ds.groupby("labels").max(skipna=False) + >>> ds.max(skipna=False) - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' + Dimensions: () Data variables: - da (labels) float64 nan 2.0 3.0 - - See Also - -------- - numpy.max - Dataset.max - :ref:`groupby` - User guide on groupby operations. + da float64 nan """ - - if dask_groupby and OPTIONS["use_numpy_groupies"]: - return self._dask_groupby_reduce( - func="max", - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - skipna=skipna, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, - ) - else: - return self.reduce( - duck_array_ops.max, - dim=dim, - skipna=skipna, - numeric_only=False, - keep_attrs=keep_attrs, - **kwargs, - ) + return self.reduce( + duck_array_ops.max, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) def min( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -405,12 +362,11 @@ def min( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -419,6 +375,7 @@ def min( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -426,6 +383,14 @@ def min( New Dataset with ``min`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.min + dask.array.min + DataArray.min + :ref:`agg` + User guide on reduction or aggregation operations. + Examples -------- >>> da = xr.DataArray( @@ -446,58 +411,34 @@ def min( Data variables: da (time) float64 1.0 2.0 3.0 1.0 2.0 nan - >>> ds.groupby("labels").min() + >>> ds.min() - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' + Dimensions: () Data variables: - da (labels) float64 1.0 2.0 1.0 + da float64 1.0 Use ``skipna`` to control whether NaNs are ignored. - >>> ds.groupby("labels").min(skipna=False) + >>> ds.min(skipna=False) - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' + Dimensions: () Data variables: - da (labels) float64 nan 2.0 1.0 - - See Also - -------- - numpy.min - Dataset.min - :ref:`groupby` - User guide on groupby operations. + da float64 nan """ - - if dask_groupby and OPTIONS["use_numpy_groupies"]: - return self._dask_groupby_reduce( - func="min", - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - skipna=skipna, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, - ) - else: - return self.reduce( - duck_array_ops.min, - dim=dim, - skipna=skipna, - numeric_only=False, - keep_attrs=keep_attrs, - **kwargs, - ) + return self.reduce( + duck_array_ops.min, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) def mean( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -507,12 +448,11 @@ def mean( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -521,6 +461,7 @@ def mean( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -528,6 +469,18 @@ def mean( New Dataset with ``mean`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.mean + dask.array.mean + DataArray.mean + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -548,59 +501,35 @@ def mean( Data variables: da (time) float64 1.0 2.0 3.0 1.0 2.0 nan - >>> ds.groupby("labels").mean() + >>> ds.mean() - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' + Dimensions: () Data variables: - da (labels) float64 1.0 2.0 2.0 + da float64 1.8 Use ``skipna`` to control whether NaNs are ignored. - >>> ds.groupby("labels").mean(skipna=False) + >>> ds.mean(skipna=False) - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' + Dimensions: () Data variables: - da (labels) float64 nan 2.0 2.0 - - See Also - -------- - numpy.mean - Dataset.mean - :ref:`groupby` - User guide on groupby operations. + da float64 nan """ - - if dask_groupby and OPTIONS["use_numpy_groupies"]: - return self._dask_groupby_reduce( - func="mean", - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - skipna=skipna, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, - ) - else: - return self.reduce( - duck_array_ops.mean, - dim=dim, - skipna=skipna, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) + return self.reduce( + duck_array_ops.mean, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) def prod( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -610,12 +539,11 @@ def prod( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -630,6 +558,7 @@ def prod( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -637,6 +566,18 @@ def prod( New Dataset with ``prod`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.prod + dask.array.prod + DataArray.prod + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -657,50 +598,2021 @@ def prod( Data variables: da (time) float64 1.0 2.0 3.0 1.0 2.0 nan - >>> ds.groupby("labels").prod() + >>> ds.prod() - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' + Dimensions: () Data variables: - da (labels) float64 1.0 4.0 3.0 + da float64 12.0 Use ``skipna`` to control whether NaNs are ignored. - >>> ds.groupby("labels").prod(skipna=False) + >>> ds.prod(skipna=False) - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' + Dimensions: () Data variables: - da (labels) float64 nan 4.0 3.0 + da float64 nan Specify ``min_count`` for finer control over when NaNs are ignored. - >>> ds.groupby("labels").prod(skipna=True, min_count=2) + >>> ds.prod(skipna=True, min_count=2) - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' + Dimensions: () + Data variables: + da float64 12.0 + """ + return self.reduce( + duck_array_ops.prod, + dim=dim, + skipna=skipna, + min_count=min_count, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def sum( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + min_count: Optional[int] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``sum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.sum + dask.array.sum + DataArray.sum + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.sum() + + Dimensions: () + Data variables: + da float64 9.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.sum(skipna=False) + + Dimensions: () + Data variables: + da float64 nan + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> ds.sum(skipna=True, min_count=2) + + Dimensions: () + Data variables: + da float64 9.0 + """ + return self.reduce( + duck_array_ops.sum, + dim=dim, + skipna=skipna, + min_count=min_count, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def std( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + ddof: int = 0, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``std`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.std + dask.array.std + DataArray.std + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.std() + + Dimensions: () + Data variables: + da float64 0.7483 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.std(skipna=False) + + Dimensions: () + Data variables: + da float64 nan + + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.std(skipna=True, ddof=1) + + Dimensions: () + Data variables: + da float64 0.8367 + """ + return self.reduce( + duck_array_ops.std, + dim=dim, + skipna=skipna, + ddof=ddof, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def var( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + ddof: int = 0, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``var`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.var + dask.array.var + DataArray.var + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.var() + + Dimensions: () + Data variables: + da float64 0.56 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.var(skipna=False) + + Dimensions: () + Data variables: + da float64 nan + + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.var(skipna=True, ddof=1) + + Dimensions: () + Data variables: + da float64 0.7 + """ + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + ddof=ddof, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def median( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``median`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.median + dask.array.median + DataArray.median + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.median() + + Dimensions: () + Data variables: + da float64 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.median(skipna=False) + + Dimensions: () + Data variables: + da float64 nan + """ + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + +class DataArrayReductions: + __slots__ = () + + def count( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``count`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.count + dask.array.count + Dataset.count + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.count() + + array(5) + """ + return self.reduce( + duck_array_ops.count, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def all( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``all`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.all + dask.array.all + Dataset.all + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.all() + + array(False) + """ + return self.reduce( + duck_array_ops.array_all, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def any( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``any`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.any + dask.array.any + Dataset.any + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.any() + + array(True) + """ + return self.reduce( + duck_array_ops.array_any, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def max( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``max`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.max + dask.array.max + Dataset.max + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.max() + + array(3.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.max(skipna=False) + + array(nan) + """ + return self.reduce( + duck_array_ops.max, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def min( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``min`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.min + dask.array.min + Dataset.min + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.min() + + array(1.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.min(skipna=False) + + array(nan) + """ + return self.reduce( + duck_array_ops.min, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def mean( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``mean`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.mean + dask.array.mean + Dataset.mean + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.mean() + + array(1.8) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.mean(skipna=False) + + array(nan) + """ + return self.reduce( + duck_array_ops.mean, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def prod( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + min_count: Optional[int] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``prod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.prod + dask.array.prod + Dataset.prod + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.prod() + + array(12.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.prod(skipna=False) + + array(nan) + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> da.prod(skipna=True, min_count=2) + + array(12.) + """ + return self.reduce( + duck_array_ops.prod, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) + + def sum( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + min_count: Optional[int] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``sum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.sum + dask.array.sum + Dataset.sum + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.sum() + + array(9.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.sum(skipna=False) + + array(nan) + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> da.sum(skipna=True, min_count=2) + + array(9.) + """ + return self.reduce( + duck_array_ops.sum, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) + + def std( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + ddof: int = 0, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``std`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.std + dask.array.std + Dataset.std + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.std() + + array(0.74833148) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.std(skipna=False) + + array(nan) + + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.std(skipna=True, ddof=1) + + array(0.83666003) + """ + return self.reduce( + duck_array_ops.std, + dim=dim, + skipna=skipna, + ddof=ddof, + keep_attrs=keep_attrs, + **kwargs, + ) + + def var( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + ddof: int = 0, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``var`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.var + dask.array.var + Dataset.var + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.var() + + array(0.56) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.var(skipna=False) + + array(nan) + + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.var(skipna=True, ddof=1) + + array(0.7) + """ + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + ddof=ddof, + keep_attrs=keep_attrs, + **kwargs, + ) + + def median( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``median`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.median + dask.array.median + Dataset.median + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.median() + + array(2.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.median(skipna=False) + + array(nan) + """ + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + +class DatasetGroupByReductions: + __slots__ = () + + def count( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``count`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.count + dask.array.count + Dataset.count + :ref:`groupby` + User guide on groupby operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").count() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) int64 1 2 2 + """ + + if dask_groupby and OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="count", + dim=dim, + # fill_value=fill_value, + keep_attrs=keep_attrs, + numeric_only=False, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.count, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def all( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``all`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.all + dask.array.all + Dataset.all + :ref:`groupby` + User guide on groupby operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").all() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) bool False True True + """ + + if dask_groupby and OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="all", + dim=dim, + # fill_value=fill_value, + keep_attrs=keep_attrs, + numeric_only=False, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.array_all, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def any( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``any`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.any + dask.array.any + Dataset.any + :ref:`groupby` + User guide on groupby operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").any() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) bool True True True + """ + + if dask_groupby and OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="any", + dim=dim, + # fill_value=fill_value, + keep_attrs=keep_attrs, + numeric_only=False, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.array_any, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def max( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``max`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.max + dask.array.max + Dataset.max + :ref:`groupby` + User guide on groupby operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").max() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 3.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.groupby("labels").max(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 3.0 + """ + + if dask_groupby and OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="max", + dim=dim, + # fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + numeric_only=False, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.max, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def min( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``min`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.min + dask.array.min + Dataset.min + :ref:`groupby` + User guide on groupby operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").min() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 1.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.groupby("labels").min(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 1.0 + """ + + if dask_groupby and OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="min", + dim=dim, + # fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + numeric_only=False, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.min, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def mean( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``mean`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.mean + dask.array.mean + Dataset.mean + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").mean() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.groupby("labels").mean(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 2.0 + """ + + if dask_groupby and OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="mean", + dim=dim, + # fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + numeric_only=True, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.mean, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def prod( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + min_count: Optional[int] = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``prod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.prod + dask.array.prod + Dataset.prod + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").prod() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 4.0 3.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.groupby("labels").prod(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) float64 nan 4.0 3.0 - See Also - -------- - numpy.prod - Dataset.prod - :ref:`groupby` - User guide on groupby operations. + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> ds.groupby("labels").prod(skipna=True, min_count=2) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 4.0 3.0 """ if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="prod", dim=dim, - fill_value=fill_value, + # fill_value=fill_value, keep_attrs=keep_attrs, skipna=skipna, min_count=min_count, + numeric_only=True, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -718,10 +2630,9 @@ def prod( def sum( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -731,12 +2642,11 @@ def sum( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -751,6 +2661,7 @@ def sum( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -758,6 +2669,18 @@ def sum( New Dataset with ``sum`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.sum + dask.array.sum + Dataset.sum + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -784,7 +2707,7 @@ def sum( Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 1.0 4.0 4.0 + da (labels) float64 nan 4.0 4.0 Use ``skipna`` to control whether NaNs are ignored. @@ -805,23 +2728,17 @@ def sum( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) float64 nan 4.0 4.0 - - See Also - -------- - numpy.sum - Dataset.sum - :ref:`groupby` - User guide on groupby operations. """ if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="sum", dim=dim, - fill_value=fill_value, + # fill_value=fill_value, keep_attrs=keep_attrs, skipna=skipna, min_count=min_count, + numeric_only=True, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -839,9 +2756,9 @@ def sum( def std( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, + ddof: int = 0, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -851,13 +2768,15 @@ def std( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -865,6 +2784,7 @@ def std( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -872,6 +2792,18 @@ def std( New Dataset with ``std`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.std + dask.array.std + Dataset.std + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -893,38 +2825,25 @@ def std( da (time) float64 1.0 2.0 3.0 1.0 2.0 nan >>> ds.groupby("labels").std() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 0.0 0.0 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").std(skipna=False) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 0.0 1.0 - See Also - -------- - numpy.std - Dataset.std - :ref:`groupby` - User guide on groupby operations. + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.groupby("labels").std(skipna=True, ddof=1) """ if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="std", dim=dim, - fill_value=fill_value, + # fill_value=fill_value, keep_attrs=keep_attrs, skipna=skipna, + ddof=ddof, + numeric_only=True, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -933,6 +2852,7 @@ def std( duck_array_ops.std, dim=dim, skipna=skipna, + ddof=ddof, numeric_only=True, keep_attrs=keep_attrs, **kwargs, @@ -941,9 +2861,9 @@ def std( def var( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, + ddof: int = 0, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -953,13 +2873,15 @@ def var( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -967,6 +2889,7 @@ def var( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -974,6 +2897,18 @@ def var( New Dataset with ``var`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.var + dask.array.var + Dataset.var + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -995,44 +2930,127 @@ def var( da (time) float64 1.0 2.0 3.0 1.0 2.0 nan >>> ds.groupby("labels").var() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 0.0 0.0 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").var(skipna=False) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 0.0 1.0 + + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.groupby("labels").var(skipna=True, ddof=1) + """ + + if dask_groupby and OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="var", + dim=dim, + # fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + ddof=ddof, + numeric_only=True, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + ddof=ddof, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def median( + self: DatasetReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``median`` applied to its data and the + indicated dimension(s) removed See Also -------- - numpy.var - Dataset.var + numpy.median + dask.array.median + Dataset.median :ref:`groupby` User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").median() + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.groupby("labels").median(skipna=False) """ if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( - func="var", + func="median", dim=dim, - fill_value=fill_value, + # fill_value=fill_value, keep_attrs=keep_attrs, skipna=skipna, + numeric_only=True, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) else: return self.reduce( - duck_array_ops.var, + duck_array_ops.median, dim=dim, skipna=skipna, numeric_only=True, @@ -1048,7 +3066,6 @@ def count( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -1058,8 +3075,7 @@ def count( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -1067,6 +3083,7 @@ def count( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1074,6 +3091,14 @@ def count( New Dataset with ``count`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.count + dask.array.count + Dataset.count + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -1101,21 +3126,15 @@ def count( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) int64 1 3 1 - - See Also - -------- - numpy.count - Dataset.count - :ref:`resampling` - User guide on resampling operations. """ if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="count", dim=dim, - fill_value=fill_value, + # fill_value=fill_value, keep_attrs=keep_attrs, + numeric_only=False, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -1132,7 +3151,6 @@ def all( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -1142,8 +3160,7 @@ def all( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -1151,6 +3168,7 @@ def all( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1158,6 +3176,14 @@ def all( New Dataset with ``all`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.all + dask.array.all + Dataset.all + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -1185,21 +3211,15 @@ def all( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) bool True True False - - See Also - -------- - numpy.all - Dataset.all - :ref:`resampling` - User guide on resampling operations. """ if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="all", dim=dim, - fill_value=fill_value, + # fill_value=fill_value, keep_attrs=keep_attrs, + numeric_only=False, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -1216,7 +3236,6 @@ def any( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -1226,8 +3245,7 @@ def any( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -1235,6 +3253,7 @@ def any( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1242,6 +3261,14 @@ def any( New Dataset with ``any`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.any + dask.array.any + Dataset.any + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -1269,21 +3296,15 @@ def any( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) bool True True True - - See Also - -------- - numpy.any - Dataset.any - :ref:`resampling` - User guide on resampling operations. """ if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="any", dim=dim, - fill_value=fill_value, + # fill_value=fill_value, keep_attrs=keep_attrs, + numeric_only=False, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -1299,9 +3320,8 @@ def any( def max( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -1311,12 +3331,11 @@ def max( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -1325,6 +3344,7 @@ def max( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1332,6 +3352,14 @@ def max( New Dataset with ``max`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.max + dask.array.max + Dataset.max + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -1358,7 +3386,7 @@ def max( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) float64 1.0 3.0 2.0 + da (time) float64 1.0 3.0 nan Use ``skipna`` to control whether NaNs are ignored. @@ -1369,22 +3397,16 @@ def max( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) float64 1.0 3.0 nan - - See Also - -------- - numpy.max - Dataset.max - :ref:`resampling` - User guide on resampling operations. """ if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="max", dim=dim, - fill_value=fill_value, + # fill_value=fill_value, keep_attrs=keep_attrs, skipna=skipna, + numeric_only=False, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -1401,9 +3423,8 @@ def max( def min( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -1413,12 +3434,11 @@ def min( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -1427,6 +3447,7 @@ def min( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1434,6 +3455,14 @@ def min( New Dataset with ``min`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.min + dask.array.min + Dataset.min + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -1460,7 +3489,7 @@ def min( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) float64 1.0 1.0 2.0 + da (time) float64 1.0 1.0 nan Use ``skipna`` to control whether NaNs are ignored. @@ -1471,22 +3500,16 @@ def min( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) float64 1.0 1.0 nan - - See Also - -------- - numpy.min - Dataset.min - :ref:`resampling` - User guide on resampling operations. """ if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="min", dim=dim, - fill_value=fill_value, + # fill_value=fill_value, keep_attrs=keep_attrs, skipna=skipna, + numeric_only=False, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -1503,9 +3526,8 @@ def min( def mean( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -1515,12 +3537,11 @@ def mean( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -1529,6 +3550,7 @@ def mean( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1536,6 +3558,18 @@ def mean( New Dataset with ``mean`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.mean + dask.array.mean + Dataset.mean + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -1562,7 +3596,7 @@ def mean( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) float64 1.0 2.0 2.0 + da (time) float64 1.0 2.0 nan Use ``skipna`` to control whether NaNs are ignored. @@ -1573,22 +3607,16 @@ def mean( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) float64 1.0 2.0 nan - - See Also - -------- - numpy.mean - Dataset.mean - :ref:`resampling` - User guide on resampling operations. """ if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="mean", dim=dim, - fill_value=fill_value, + # fill_value=fill_value, keep_attrs=keep_attrs, skipna=skipna, + numeric_only=True, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -1605,10 +3633,9 @@ def mean( def prod( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -1618,12 +3645,11 @@ def prod( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -1638,6 +3664,7 @@ def prod( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1645,6 +3672,18 @@ def prod( New Dataset with ``prod`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.prod + dask.array.prod + Dataset.prod + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -1671,7 +3710,7 @@ def prod( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) float64 1.0 6.0 2.0 + da (time) float64 1.0 6.0 nan Use ``skipna`` to control whether NaNs are ignored. @@ -1692,23 +3731,17 @@ def prod( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) float64 nan 6.0 nan - - See Also - -------- - numpy.prod - Dataset.prod - :ref:`resampling` - User guide on resampling operations. """ if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="prod", dim=dim, - fill_value=fill_value, + # fill_value=fill_value, keep_attrs=keep_attrs, skipna=skipna, min_count=min_count, + numeric_only=True, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -1726,10 +3759,9 @@ def prod( def sum( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -1739,12 +3771,11 @@ def sum( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -1759,6 +3790,7 @@ def sum( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1766,6 +3798,18 @@ def sum( New Dataset with ``sum`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.sum + dask.array.sum + Dataset.sum + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -1792,7 +3836,7 @@ def sum( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) float64 1.0 6.0 2.0 + da (time) float64 1.0 6.0 nan Use ``skipna`` to control whether NaNs are ignored. @@ -1813,23 +3857,17 @@ def sum( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) float64 nan 6.0 nan - - See Also - -------- - numpy.sum - Dataset.sum - :ref:`resampling` - User guide on resampling operations. """ if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="sum", dim=dim, - fill_value=fill_value, + # fill_value=fill_value, keep_attrs=keep_attrs, skipna=skipna, min_count=min_count, + numeric_only=True, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -1847,9 +3885,9 @@ def sum( def std( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, + ddof: int = 0, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -1859,13 +3897,15 @@ def std( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -1873,6 +3913,7 @@ def std( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1880,6 +3921,18 @@ def std( New Dataset with ``std`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.std + dask.array.std + Dataset.std + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -1901,38 +3954,25 @@ def std( da (time) float64 1.0 2.0 3.0 1.0 2.0 nan >>> ds.resample(time="3M").std() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 0.0 0.8165 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").std(skipna=False) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 0.0 0.8165 nan - See Also - -------- - numpy.std - Dataset.std - :ref:`resampling` - User guide on resampling operations. + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.resample(time="3M").std(skipna=True, ddof=1) """ if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="std", dim=dim, - fill_value=fill_value, + # fill_value=fill_value, keep_attrs=keep_attrs, skipna=skipna, + ddof=ddof, + numeric_only=True, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -1941,6 +3981,7 @@ def std( duck_array_ops.std, dim=dim, skipna=skipna, + ddof=ddof, numeric_only=True, keep_attrs=keep_attrs, **kwargs, @@ -1949,9 +3990,9 @@ def std( def var( self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, + ddof: int = 0, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_Dataset: """ @@ -1961,13 +4002,15 @@ def var( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -1975,6 +4018,7 @@ def var( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1982,6 +4026,18 @@ def var( New Dataset with ``var`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.var + dask.array.var + Dataset.var + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -2003,38 +4059,25 @@ def var( da (time) float64 1.0 2.0 3.0 1.0 2.0 nan >>> ds.resample(time="3M").var() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 0.0 0.6667 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").var(skipna=False) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 0.0 0.6667 nan - See Also - -------- - numpy.var - Dataset.var - :ref:`resampling` - User guide on resampling operations. + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.resample(time="3M").var(skipna=True, ddof=1) """ if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="var", dim=dim, - fill_value=fill_value, + # fill_value=fill_value, keep_attrs=keep_attrs, skipna=skipna, + ddof=ddof, + numeric_only=True, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -2043,23 +4086,106 @@ def var( duck_array_ops.var, dim=dim, skipna=skipna, + ddof=ddof, numeric_only=True, keep_attrs=keep_attrs, **kwargs, ) - -class DataArrayReduce(Protocol): - def reduce( - self, - func: Callable[..., Any], + def median( + self: DatasetReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - axis: Union[None, int, Sequence[int]] = None, + skipna: bool = None, keep_attrs: bool = None, - keepdims: bool = False, - **kwargs: Any, - ) -> T_DataArray: - ... + **kwargs, + ) -> T_Dataset: + """ + Reduce this Dataset's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``median`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.median + dask.array.median + Dataset.median + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").median() + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.resample(time="3M").median(skipna=False) + """ + + if dask_groupby and OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="median", + dim=dim, + # fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + numeric_only=True, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) class DataArrayGroupByReductions: @@ -2069,7 +4195,6 @@ def count( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -2079,8 +4204,7 @@ def count( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2088,6 +4212,7 @@ def count( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2095,6 +4220,14 @@ def count( New DataArray with ``count`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.count + dask.array.count + DataArray.count + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -2117,20 +4250,13 @@ def count( array([1, 2, 2]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.count - DataArray.count - :ref:`groupby` - User guide on groupby operations. """ if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="count", dim=dim, - fill_value=fill_value, + # fill_value=fill_value, keep_attrs=keep_attrs, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, @@ -2147,7 +4273,6 @@ def all( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -2157,8 +4282,7 @@ def all( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2166,6 +4290,7 @@ def all( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2173,6 +4298,14 @@ def all( New DataArray with ``all`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.all + dask.array.all + DataArray.all + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -2195,20 +4328,13 @@ def all( array([False, True, True]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.all - DataArray.all - :ref:`groupby` - User guide on groupby operations. """ if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="all", dim=dim, - fill_value=fill_value, + # fill_value=fill_value, keep_attrs=keep_attrs, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, @@ -2225,7 +4351,6 @@ def any( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -2235,8 +4360,7 @@ def any( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2244,6 +4368,7 @@ def any( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2251,6 +4376,14 @@ def any( New DataArray with ``any`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.any + dask.array.any + DataArray.any + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -2273,20 +4406,13 @@ def any( array([ True, True, True]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.any - DataArray.any - :ref:`groupby` - User guide on groupby operations. """ if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="any", dim=dim, - fill_value=fill_value, + # fill_value=fill_value, keep_attrs=keep_attrs, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, @@ -2302,9 +4428,8 @@ def any( def max( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -2314,12 +4439,11 @@ def max( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -2328,6 +4452,7 @@ def max( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2335,6 +4460,14 @@ def max( New DataArray with ``max`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.max + dask.array.max + DataArray.max + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -2354,7 +4487,7 @@ def max( >>> da.groupby("labels").max() - array([1., 2., 3.]) + array([nan, 2., 3.]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -2365,20 +4498,13 @@ def max( array([nan, 2., 3.]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.max - DataArray.max - :ref:`groupby` - User guide on groupby operations. """ if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="max", dim=dim, - fill_value=fill_value, + # fill_value=fill_value, keep_attrs=keep_attrs, skipna=skipna, # TODO: Add dask resampling reduction tests! @@ -2396,9 +4522,8 @@ def max( def min( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -2408,12 +4533,11 @@ def min( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -2422,6 +4546,7 @@ def min( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2429,6 +4554,14 @@ def min( New DataArray with ``min`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.min + dask.array.min + DataArray.min + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -2448,7 +4581,7 @@ def min( >>> da.groupby("labels").min() - array([1., 2., 1.]) + array([nan, 2., 1.]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -2459,20 +4592,13 @@ def min( array([nan, 2., 1.]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.min - DataArray.min - :ref:`groupby` - User guide on groupby operations. """ if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="min", dim=dim, - fill_value=fill_value, + # fill_value=fill_value, keep_attrs=keep_attrs, skipna=skipna, # TODO: Add dask resampling reduction tests! @@ -2490,9 +4616,8 @@ def min( def mean( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -2502,12 +4627,11 @@ def mean( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -2516,6 +4640,7 @@ def mean( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2523,6 +4648,18 @@ def mean( New DataArray with ``mean`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.mean + dask.array.mean + DataArray.mean + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -2542,7 +4679,7 @@ def mean( >>> da.groupby("labels").mean() - array([1., 2., 2.]) + array([nan, 2., 2.]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -2553,20 +4690,13 @@ def mean( array([nan, 2., 2.]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.mean - DataArray.mean - :ref:`groupby` - User guide on groupby operations. """ if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="mean", dim=dim, - fill_value=fill_value, + # fill_value=fill_value, keep_attrs=keep_attrs, skipna=skipna, # TODO: Add dask resampling reduction tests! @@ -2584,10 +4714,9 @@ def mean( def prod( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -2597,12 +4726,11 @@ def prod( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -2617,6 +4745,7 @@ def prod( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2624,6 +4753,18 @@ def prod( New DataArray with ``prod`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.prod + dask.array.prod + DataArray.prod + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -2643,7 +4784,7 @@ def prod( >>> da.groupby("labels").prod() - array([1., 4., 3.]) + array([nan, 4., 3.]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -2662,20 +4803,13 @@ def prod( array([nan, 4., 3.]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.prod - DataArray.prod - :ref:`groupby` - User guide on groupby operations. """ if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="prod", dim=dim, - fill_value=fill_value, + # fill_value=fill_value, keep_attrs=keep_attrs, skipna=skipna, min_count=min_count, @@ -2695,10 +4829,9 @@ def prod( def sum( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -2708,12 +4841,11 @@ def sum( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -2728,6 +4860,7 @@ def sum( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2735,6 +4868,18 @@ def sum( New DataArray with ``sum`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.sum + dask.array.sum + DataArray.sum + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -2754,7 +4899,7 @@ def sum( >>> da.groupby("labels").sum() - array([1., 4., 4.]) + array([nan, 4., 4.]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -2773,20 +4918,13 @@ def sum( array([nan, 4., 4.]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.sum - DataArray.sum - :ref:`groupby` - User guide on groupby operations. """ if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="sum", dim=dim, - fill_value=fill_value, + # fill_value=fill_value, keep_attrs=keep_attrs, skipna=skipna, min_count=min_count, @@ -2806,9 +4944,9 @@ def sum( def std( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, + ddof: int = 0, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -2818,13 +4956,15 @@ def std( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2832,6 +4972,7 @@ def std( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2839,6 +4980,18 @@ def std( New DataArray with ``std`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.std + dask.array.std + DataArray.std + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -2857,34 +5010,24 @@ def std( labels (time) >> da.groupby("labels").std() - - array([0., 0., 1.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").std(skipna=False) - - array([nan, 0., 1.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - See Also - -------- - numpy.std - DataArray.std - :ref:`groupby` - User guide on groupby operations. + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.groupby("labels").std(skipna=True, ddof=1) """ if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="std", dim=dim, - fill_value=fill_value, + # fill_value=fill_value, keep_attrs=keep_attrs, skipna=skipna, + ddof=ddof, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -2893,6 +5036,7 @@ def std( duck_array_ops.std, dim=dim, skipna=skipna, + ddof=ddof, keep_attrs=keep_attrs, **kwargs, ) @@ -2900,9 +5044,9 @@ def std( def var( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, + ddof: int = 0, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -2912,13 +5056,15 @@ def var( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2926,6 +5072,7 @@ def var( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2933,6 +5080,18 @@ def var( New DataArray with ``var`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.var + dask.array.var + DataArray.var + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -2951,32 +5110,113 @@ def var( labels (time) >> da.groupby("labels").var() - - array([0., 0., 1.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").var(skipna=False) - - array([nan, 0., 1.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' + + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.groupby("labels").var(skipna=True, ddof=1) + """ + + if dask_groupby and OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="var", + dim=dim, + # fill_value=fill_value, + keep_attrs=keep_attrs, + skipna=skipna, + ddof=ddof, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + ddof=ddof, + keep_attrs=keep_attrs, + **kwargs, + ) + + def median( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``median`` applied to its data and the + indicated dimension(s) removed See Also -------- - numpy.var - DataArray.var + numpy.median + dask.array.median + DataArray.median :ref:`groupby` User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").median() + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.groupby("labels").median(skipna=False) """ if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( - func="var", + func="median", dim=dim, - fill_value=fill_value, + # fill_value=fill_value, keep_attrs=keep_attrs, skipna=skipna, # TODO: Add dask resampling reduction tests! @@ -2984,7 +5224,7 @@ def var( ) else: return self.reduce( - duck_array_ops.var, + duck_array_ops.median, dim=dim, skipna=skipna, keep_attrs=keep_attrs, @@ -2999,7 +5239,6 @@ def count( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -3009,8 +5248,7 @@ def count( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -3018,6 +5256,7 @@ def count( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3025,6 +5264,14 @@ def count( New DataArray with ``count`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.count + dask.array.count + DataArray.count + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -3047,37 +5294,18 @@ def count( array([1, 3, 1]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.count - DataArray.count - :ref:`resampling` - User guide on resampling operations. """ - - if dask_groupby and OPTIONS["use_numpy_groupies"]: - return self._dask_groupby_reduce( - func="count", - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, - ) - else: - return self.reduce( - duck_array_ops.count, - dim=dim, - keep_attrs=keep_attrs, - **kwargs, - ) + return self.reduce( + duck_array_ops.count, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) def all( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -3087,8 +5315,7 @@ def all( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -3096,6 +5323,7 @@ def all( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3103,6 +5331,14 @@ def all( New DataArray with ``all`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.all + dask.array.all + DataArray.all + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -3125,37 +5361,18 @@ def all( array([ True, True, False]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.all - DataArray.all - :ref:`resampling` - User guide on resampling operations. """ - - if dask_groupby and OPTIONS["use_numpy_groupies"]: - return self._dask_groupby_reduce( - func="all", - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, - ) - else: - return self.reduce( - duck_array_ops.array_all, - dim=dim, - keep_attrs=keep_attrs, - **kwargs, - ) + return self.reduce( + duck_array_ops.array_all, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) def any( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -3165,8 +5382,7 @@ def any( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -3174,6 +5390,7 @@ def any( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3181,6 +5398,14 @@ def any( New DataArray with ``any`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.any + dask.array.any + DataArray.any + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -3203,38 +5428,19 @@ def any( array([ True, True, True]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.any - DataArray.any - :ref:`resampling` - User guide on resampling operations. """ - - if dask_groupby and OPTIONS["use_numpy_groupies"]: - return self._dask_groupby_reduce( - func="any", - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, - ) - else: - return self.reduce( - duck_array_ops.array_any, - dim=dim, - keep_attrs=keep_attrs, - **kwargs, - ) + return self.reduce( + duck_array_ops.array_any, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) def max( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -3244,12 +5450,11 @@ def max( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -3258,6 +5463,7 @@ def max( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3265,6 +5471,14 @@ def max( New DataArray with ``max`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.max + dask.array.max + DataArray.max + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -3295,40 +5509,20 @@ def max( array([ 1., 3., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.max - DataArray.max - :ref:`resampling` - User guide on resampling operations. """ - - if dask_groupby and OPTIONS["use_numpy_groupies"]: - return self._dask_groupby_reduce( - func="max", - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - skipna=skipna, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, - ) - else: - return self.reduce( - duck_array_ops.max, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) + return self.reduce( + duck_array_ops.max, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) def min( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -3338,12 +5532,11 @@ def min( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -3352,6 +5545,7 @@ def min( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3359,6 +5553,14 @@ def min( New DataArray with ``min`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.min + dask.array.min + DataArray.min + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -3389,40 +5591,20 @@ def min( array([ 1., 1., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.min - DataArray.min - :ref:`resampling` - User guide on resampling operations. """ - - if dask_groupby and OPTIONS["use_numpy_groupies"]: - return self._dask_groupby_reduce( - func="min", - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - skipna=skipna, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, - ) - else: - return self.reduce( - duck_array_ops.min, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) + return self.reduce( + duck_array_ops.min, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) def mean( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -3432,12 +5614,11 @@ def mean( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -3446,6 +5627,7 @@ def mean( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3453,6 +5635,18 @@ def mean( New DataArray with ``mean`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.mean + dask.array.mean + DataArray.mean + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -3483,41 +5677,21 @@ def mean( array([ 1., 2., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.mean - DataArray.mean - :ref:`resampling` - User guide on resampling operations. """ - - if dask_groupby and OPTIONS["use_numpy_groupies"]: - return self._dask_groupby_reduce( - func="mean", - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - skipna=skipna, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, - ) - else: - return self.reduce( - duck_array_ops.mean, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) + return self.reduce( + duck_array_ops.mean, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) def prod( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -3527,12 +5701,11 @@ def prod( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -3547,6 +5720,7 @@ def prod( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3554,6 +5728,18 @@ def prod( New DataArray with ``prod`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.prod + dask.array.prod + DataArray.prod + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -3592,43 +5778,22 @@ def prod( array([nan, 6., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.prod - DataArray.prod - :ref:`resampling` - User guide on resampling operations. """ - - if dask_groupby and OPTIONS["use_numpy_groupies"]: - return self._dask_groupby_reduce( - func="prod", - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - skipna=skipna, - min_count=min_count, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, - ) - else: - return self.reduce( - duck_array_ops.prod, - dim=dim, - skipna=skipna, - min_count=min_count, - keep_attrs=keep_attrs, - **kwargs, - ) + return self.reduce( + duck_array_ops.prod, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) def sum( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -3638,12 +5803,11 @@ def sum( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -3658,6 +5822,7 @@ def sum( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3665,6 +5830,18 @@ def sum( New DataArray with ``sum`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.sum + dask.array.sum + DataArray.sum + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -3703,42 +5880,22 @@ def sum( array([nan, 6., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.sum - DataArray.sum - :ref:`resampling` - User guide on resampling operations. """ - - if dask_groupby and OPTIONS["use_numpy_groupies"]: - return self._dask_groupby_reduce( - func="sum", - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - skipna=skipna, - min_count=min_count, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, - ) - else: - return self.reduce( - duck_array_ops.sum, - dim=dim, - skipna=skipna, - min_count=min_count, - keep_attrs=keep_attrs, - **kwargs, - ) + return self.reduce( + duck_array_ops.sum, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) def std( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, + ddof: int = 0, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -3748,13 +5905,15 @@ def std( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -3762,6 +5921,7 @@ def std( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3769,6 +5929,18 @@ def std( New DataArray with ``std`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.std + dask.array.std + DataArray.std + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -3800,39 +5972,29 @@ def std( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - See Also - -------- - numpy.std - DataArray.std - :ref:`resampling` - User guide on resampling operations. - """ + Specify ``ddof=1`` for an unbiased estimate. - if dask_groupby and OPTIONS["use_numpy_groupies"]: - return self._dask_groupby_reduce( - func="std", - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - skipna=skipna, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, - ) - else: - return self.reduce( - duck_array_ops.std, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) + >>> da.resample(time="3M").std(skipna=True, ddof=1) + + array([nan, 1., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + """ + return self.reduce( + duck_array_ops.std, + dim=dim, + skipna=skipna, + ddof=ddof, + keep_attrs=keep_attrs, + **kwargs, + ) def var( self: DataArrayReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + skipna: bool = None, + ddof: int = 0, keep_attrs: bool = None, - fill_value=None, **kwargs, ) -> T_DataArray: """ @@ -3842,13 +6004,15 @@ def var( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -3856,6 +6020,7 @@ def var( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3863,6 +6028,18 @@ def var( New DataArray with ``var`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.var + dask.array.var + DataArray.var + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -3894,29 +6071,105 @@ def var( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.resample(time="3M").var(skipna=True, ddof=1) + + array([nan, 1., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + """ + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + ddof=ddof, + keep_attrs=keep_attrs, + **kwargs, + ) + + def median( + self: DataArrayReduce, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``median`` applied to its data and the + indicated dimension(s) removed + See Also -------- - numpy.var - DataArray.var + numpy.median + dask.array.median + DataArray.median :ref:`resampling` User guide on resampling operations. - """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: - return self._dask_groupby_reduce( - func="var", - dim=dim, - fill_value=fill_value, - keep_attrs=keep_attrs, - skipna=skipna, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, - ) - else: - return self.reduce( - duck_array_ops.var, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").median() + + array([1., 2., 2.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.resample(time="3M").median(skipna=False) + + array([ 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + """ + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 24656104138..b004336f34a 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -530,6 +530,9 @@ def _maybe_unstack(self, obj): def _dask_groupby_reduce(self, dim, **kwargs): from dask_groupby.xarray import xarray_reduce + # TODO: fix this + kwargs.pop("numeric_only", None) + # weird backcompat # reducing along a unique indexed dimension with squeeze=True # should raise an error diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index fcb5a573c96..d1f0fb31813 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -24,12 +24,19 @@ from typing import Any, Callable, Hashable, Optional, Sequence, Union from . import duck_array_ops +from .options import OPTIONS from .types import T_DataArray, T_Dataset if sys.version_info >= (3, 8): from typing import Protocol else: - from typing_extensions import Protocol''' + from typing_extensions import Protocol + + +try: + import dask_groupby +except ImportError: + dask_groupby = None''' OBJ_PREAMBLE = """ @@ -291,6 +298,36 @@ def generate_code(self, method): )""" +class GroupByReductionGenerator(ReductionGenerator): + def generate_code(self, method): + extra_kwargs = [kwarg.call for kwarg in method.extra_kwargs if kwarg.call] + + if self.datastructure.numeric_only: + extra_kwargs.append(f"numeric_only={method.numeric_only},") + + if extra_kwargs: + extra_kwargs = textwrap.indent("\n" + "\n".join(extra_kwargs), 16 * " ") + else: + extra_kwargs = "" + return f""" + if dask_groupby and OPTIONS["use_numpy_groupies"]: + return self._dask_groupby_reduce( + func="{method.name}", + dim=dim,{extra_kwargs} + # fill_value=fill_value, + keep_attrs=keep_attrs, + # TODO: Add dask resampling reduction tests! + **self._dask_groupby_kwargs, + ) + else: + return self.reduce( + duck_array_ops.{method.array_method}, + dim=dim,{extra_kwargs} + keep_attrs=keep_attrs, + **kwargs, + )""" + + REDUCTION_METHODS = ( Method("count"), Method("all", bool_reduce=True), @@ -349,7 +386,7 @@ class DataStructure: see_also_obj="Dataset", ) -DataArrayGroupByGenerator = GenericReductionGenerator( +DataArrayGroupByGenerator = GroupByReductionGenerator( cls="GroupBy", datastructure=DataArrayObject, methods=REDUCTION_METHODS, @@ -365,7 +402,7 @@ class DataStructure: docref_description="resampling operations", example_call_preamble='.resample(time="3M")', ) -DatasetGroupByGenerator = GenericReductionGenerator( +DatasetGroupByGenerator = GroupByReductionGenerator( cls="GroupBy", datastructure=DatasetObject, methods=REDUCTION_METHODS, @@ -373,7 +410,7 @@ class DataStructure: docref_description="groupby operations", example_call_preamble='.groupby("labels")', ) -DatasetResampleGenerator = GenericReductionGenerator( +DatasetResampleGenerator = GroupByReductionGenerator( cls="Resample", datastructure=DatasetObject, methods=REDUCTION_METHODS, From 3e08964dca9371ff93139f67bbc6a0170c48c28d Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 8 Nov 2021 16:54:52 -0700 Subject: [PATCH 038/138] Add benchmarks --- asv_bench/benchmarks/groupby.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 46d6293cc98..3cea3015f76 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -21,15 +21,17 @@ def setup(self, *args, **kwargs): def time_init(self, ndim): getattr(self, f"ds{ndim}d").groupby("b") - @parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)]) - def time_agg_small_num_groups(self, method, ndim): + @parameterized(["method", "ndim", "npg"], [("sum", "mean"), (1, 2), [True, False]]) + def time_agg_small_num_groups(self, method, ndim, npg): ds = getattr(self, f"ds{ndim}d") - getattr(ds.groupby("a"), method)() + with xr.set_options(use_numpy_groupies=npg): + getattr(ds.groupby("a"), method)() - @parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)]) - def time_agg_large_num_groups(self, method, ndim): + @parameterized(["method", "ndim", "npg"], [("sum", "mean"), (1, 2), [True, False]]) + def time_agg_large_num_groups(self, method, ndim, npg): ds = getattr(self, f"ds{ndim}d") - getattr(ds.groupby("b"), method)() + with xr.set_options(use_numpy_groupies=npg): + getattr(ds.groupby("b"), method)() class GroupByDask(GroupBy): @@ -79,15 +81,17 @@ def setup(self, *args, **kwargs): def time_init(self, ndim): getattr(self, f"ds{ndim}d").resample(time="D") - @parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)]) - def time_agg_small_num_groups(self, method, ndim): + @parameterized(["method", "ndim", "npg"], [("sum", "mean"), (1, 2), [True, False]]) + def time_agg_small_num_groups(self, method, ndim, npg): ds = getattr(self, f"ds{ndim}d") - getattr(ds.resample(time="3M"), method)() + with xr.set_options(use_numpy_groupies=npg): + getattr(ds.resample(time="3M"), method)() - @parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)]) - def time_agg_large_num_groups(self, method, ndim): + @parameterized(["method", "ndim", "npg"], [("sum", "mean"), (1, 2), [True, False]]) + def time_agg_large_num_groups(self, method, ndim, npg): ds = getattr(self, f"ds{ndim}d") - getattr(ds.resample(time="48H"), method)() + with xr.set_options(use_numpy_groupies=npg): + getattr(ds.resample(time="48H"), method)() class ResampleDask(Resample): From 583187a4118613f98d8a7082790724070a3df3ea Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 8 Nov 2021 21:08:53 -0700 Subject: [PATCH 039/138] Fix benchmark to not groupby chunked variables. --- asv_bench/benchmarks/groupby.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 3cea3015f76..4b08014ac37 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -13,6 +13,7 @@ def setup(self, *args, **kwargs): { "a": xr.DataArray(np.r_[np.repeat(1, self.n), np.repeat(2, self.n)]), "b": xr.DataArray(np.arange(2 * self.n)), + "c": xr.DataArray(np.arange(2 * self.n)), } ) self.ds2d = self.ds1d.expand_dims(z=10) @@ -38,10 +39,10 @@ class GroupByDask(GroupBy): def setup(self, *args, **kwargs): requires_dask() super().setup(**kwargs) - self.ds1d = self.ds1d.sel(dim_0=slice(None, None, 2)).chunk({"dim_0": 50}) - self.ds2d = self.ds2d.sel(dim_0=slice(None, None, 2)).chunk( - {"dim_0": 50, "z": 5} - ) + self.ds1d = self.ds1d.sel(dim_0=slice(None, None, 2)) + self.ds1d["c"] = self.ds1d["c"].chunk({"dim_0": 50}) + self.ds2d = self.ds2d.sel(dim_0=slice(None, None, 2)) + self.ds2d["c"] = self.ds2d["c"].chunk({"dim_0": 50, "z": 5}) class GroupByPandasDataFrame(GroupBy): From 4ef53dbc9f84dc0a1a19f3b8cd17a3b4fd044296 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 8 Nov 2021 21:57:21 -0700 Subject: [PATCH 040/138] Start supporting ndim groups --- xarray/core/groupby.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index b004336f34a..9eb8698d134 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -322,17 +322,18 @@ def __init__( if getattr(group, "name", None) is None: group.name = "group" - group, obj, stacked_dim, inserted_dims = _ensure_1d(group, obj) - (group_dim,) = group.dims + group_dim = group.dims - expected_size = obj.sizes[group_dim] - if group.size != expected_size: + expected_shape = tuple(obj.sizes[dim] for dim in group_dim) + if group.shape != expected_shape: raise ValueError( - "the group variable's length does not " - "match the length of this variable along its " - "dimension" + f"the group variable's shape {group.shape} does not " + "match the shape of this variable along " + f"dimensions {group_dim!r}: {expected_shape}." ) + group, obj, stacked_dim, inserted_dims = _ensure_1d(group, obj) + full_index = None if bins is not None: @@ -344,6 +345,8 @@ def __init__( full_index = binned.categories if grouper is not None: + if group.ndim > 1: + raise NotImplementedError index = safe_cast_to_index(group) if not index.is_monotonic: # TODO: sort instead of raising an error @@ -410,7 +413,7 @@ def __init__( def dims(self): if self._dims is None: self._dims = self._obj.isel( - **{self._group_dim: self._group_indices[0]} + **{self._group_dim[0]: self._group_indices[0]} ).dims return self._dims @@ -462,7 +465,7 @@ def _get_index_and_items(self, index, grouper): def _iter_grouped(self): """Iterate over each element in this group""" for indices in self._group_indices: - yield self._obj.isel(**{self._group_dim: indices}) + yield self._obj.isel(**{self._group_dim[0]: indices}) def _infer_concat_args(self, applied_example): if self._group_dim in applied_example.dims: From 6afb3bf82fbbc3a46a70d48eb74c0c9224d05bce Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 8 Nov 2021 21:57:55 -0700 Subject: [PATCH 041/138] WIP refactor init --- xarray/core/groupby.py | 48 +++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 9eb8698d134..34ed3f1163e 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -180,23 +180,6 @@ def __getitem__(self, key): return self.values[key] -def _ensure_1d(group, obj): - if group.ndim != 1: - # try to stack the dims of the group into a single dim - orig_dims = group.dims - stacked_dim = "stacked_" + "_".join(orig_dims) - # these dimensions get created by the stack operation - inserted_dims = [dim for dim in group.dims if dim not in group.coords] - # the copy is necessary here, otherwise read only array raises error - # in pandas: https://github.com/pydata/pandas/issues/12813 - group = group.stack(**{stacked_dim: orig_dims}).copy() - obj = obj.stack(**{stacked_dim: orig_dims}) - else: - stacked_dim = None - inserted_dims = [] - return group, obj, stacked_dim, inserted_dims - - def _unique_and_monotonic(group): if isinstance(group, _DummyGroup): return True @@ -257,10 +240,13 @@ class GroupBy: "_obj", "_restore_coord_dims", "_stacked_dim", + "_stacked_obj", + "_stacked_group", "_unique_coord", "_dims", "_dask_groupby_kwargs", "_squeeze", + "_bins", ) def __init__( @@ -395,22 +381,44 @@ def __init__( self._obj = obj self._group = group self._group_dim = group_dim + self._bins = bins self._group_indices = group_indices self._unique_coord = unique_coord - self._stacked_dim = stacked_dim - self._inserted_dims = inserted_dims self._full_index = full_index self._restore_coord_dims = restore_coord_dims self._dask_groupby_kwargs = {} self._squeeze = squeeze + + self._stacked_obj = None + self._stacked_dim = None + self._stacked_dim = None + self._inserted_dims = None # self._by = by # cached attributes self._groups = None self._dims = None + def _ensure_1d(self): + if self._group.ndim != 1 and self._stacked_obj is None: + # try to stack the dims of the group into a single dim + orig_dims = group.dims + self._stacked_dim = "stacked_" + "_".join(orig_dims) + # these dimensions get created by the stack operation + self._inserted_dims = [dim for dim in group.dims if dim not in group.coords] + # the copy is necessary here, otherwise read only array raises error + # in pandas: https://github.com/pydata/pandas/issues/12813 + self._stacked_group = group.stack(**{stacked_dim: orig_dims}).copy() + self._stacked_obj = obj.stack(**{stacked_dim: orig_dims}) + else: + self._stacked_group = self._group + self._stacked_obj = self._obj + self._stacked_dim = None + self._inserted_dims = [] + @property def dims(self): + self._ensure_1d() if self._dims is None: self._dims = self._obj.isel( **{self._group_dim[0]: self._group_indices[0]} @@ -424,6 +432,7 @@ def groups(self): Mapping from group labels to indices. The indices can be used to index the underlying object. """ # provided to mimic pandas.groupby + self._ensure_1d() if self._groups is None: self._groups = dict(zip(self._unique_coord.values, self._group_indices)) return self._groups @@ -464,6 +473,7 @@ def _get_index_and_items(self, index, grouper): def _iter_grouped(self): """Iterate over each element in this group""" + self._ensure_1d() for indices in self._group_indices: yield self._obj.isel(**{self._group_dim[0]: indices}) From 35af40ad26ca9e2193313d7794789ec96cd7f0ba Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 8 Nov 2021 21:58:02 -0700 Subject: [PATCH 042/138] Revert "WIP refactor init" This reverts commit 6afb3bf82fbbc3a46a70d48eb74c0c9224d05bce. --- xarray/core/groupby.py | 48 +++++++++++++++++------------------------- 1 file changed, 19 insertions(+), 29 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 34ed3f1163e..9eb8698d134 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -180,6 +180,23 @@ def __getitem__(self, key): return self.values[key] +def _ensure_1d(group, obj): + if group.ndim != 1: + # try to stack the dims of the group into a single dim + orig_dims = group.dims + stacked_dim = "stacked_" + "_".join(orig_dims) + # these dimensions get created by the stack operation + inserted_dims = [dim for dim in group.dims if dim not in group.coords] + # the copy is necessary here, otherwise read only array raises error + # in pandas: https://github.com/pydata/pandas/issues/12813 + group = group.stack(**{stacked_dim: orig_dims}).copy() + obj = obj.stack(**{stacked_dim: orig_dims}) + else: + stacked_dim = None + inserted_dims = [] + return group, obj, stacked_dim, inserted_dims + + def _unique_and_monotonic(group): if isinstance(group, _DummyGroup): return True @@ -240,13 +257,10 @@ class GroupBy: "_obj", "_restore_coord_dims", "_stacked_dim", - "_stacked_obj", - "_stacked_group", "_unique_coord", "_dims", "_dask_groupby_kwargs", "_squeeze", - "_bins", ) def __init__( @@ -381,44 +395,22 @@ def __init__( self._obj = obj self._group = group self._group_dim = group_dim - self._bins = bins self._group_indices = group_indices self._unique_coord = unique_coord + self._stacked_dim = stacked_dim + self._inserted_dims = inserted_dims self._full_index = full_index self._restore_coord_dims = restore_coord_dims self._dask_groupby_kwargs = {} self._squeeze = squeeze - - self._stacked_obj = None - self._stacked_dim = None - self._stacked_dim = None - self._inserted_dims = None # self._by = by # cached attributes self._groups = None self._dims = None - def _ensure_1d(self): - if self._group.ndim != 1 and self._stacked_obj is None: - # try to stack the dims of the group into a single dim - orig_dims = group.dims - self._stacked_dim = "stacked_" + "_".join(orig_dims) - # these dimensions get created by the stack operation - self._inserted_dims = [dim for dim in group.dims if dim not in group.coords] - # the copy is necessary here, otherwise read only array raises error - # in pandas: https://github.com/pydata/pandas/issues/12813 - self._stacked_group = group.stack(**{stacked_dim: orig_dims}).copy() - self._stacked_obj = obj.stack(**{stacked_dim: orig_dims}) - else: - self._stacked_group = self._group - self._stacked_obj = self._obj - self._stacked_dim = None - self._inserted_dims = [] - @property def dims(self): - self._ensure_1d() if self._dims is None: self._dims = self._obj.isel( **{self._group_dim[0]: self._group_indices[0]} @@ -432,7 +424,6 @@ def groups(self): Mapping from group labels to indices. The indices can be used to index the underlying object. """ # provided to mimic pandas.groupby - self._ensure_1d() if self._groups is None: self._groups = dict(zip(self._unique_coord.values, self._group_indices)) return self._groups @@ -473,7 +464,6 @@ def _get_index_and_items(self, index, grouper): def _iter_grouped(self): """Iterate over each element in this group""" - self._ensure_1d() for indices in self._group_indices: yield self._obj.isel(**{self._group_dim[0]: indices}) From c9a82b3fe9d2dcc0ddee6015595be01a01d5ad00 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 9 Nov 2021 10:09:29 -0700 Subject: [PATCH 043/138] Revert "Start supporting ndim groups" This reverts commit 4ef53dbc9f84dc0a1a19f3b8cd17a3b4fd044296. --- xarray/core/groupby.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 9eb8698d134..b004336f34a 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -322,18 +322,17 @@ def __init__( if getattr(group, "name", None) is None: group.name = "group" - group_dim = group.dims + group, obj, stacked_dim, inserted_dims = _ensure_1d(group, obj) + (group_dim,) = group.dims - expected_shape = tuple(obj.sizes[dim] for dim in group_dim) - if group.shape != expected_shape: + expected_size = obj.sizes[group_dim] + if group.size != expected_size: raise ValueError( - f"the group variable's shape {group.shape} does not " - "match the shape of this variable along " - f"dimensions {group_dim!r}: {expected_shape}." + "the group variable's length does not " + "match the length of this variable along its " + "dimension" ) - group, obj, stacked_dim, inserted_dims = _ensure_1d(group, obj) - full_index = None if bins is not None: @@ -345,8 +344,6 @@ def __init__( full_index = binned.categories if grouper is not None: - if group.ndim > 1: - raise NotImplementedError index = safe_cast_to_index(group) if not index.is_monotonic: # TODO: sort instead of raising an error @@ -413,7 +410,7 @@ def __init__( def dims(self): if self._dims is None: self._dims = self._obj.isel( - **{self._group_dim[0]: self._group_indices[0]} + **{self._group_dim: self._group_indices[0]} ).dims return self._dims @@ -465,7 +462,7 @@ def _get_index_and_items(self, index, grouper): def _iter_grouped(self): """Iterate over each element in this group""" for indices in self._group_indices: - yield self._obj.isel(**{self._group_dim[0]: indices}) + yield self._obj.isel(**{self._group_dim: indices}) def _infer_concat_args(self, applied_example): if self._group_dim in applied_example.dims: From 0ac5498c87492d595a6860d1bb2d1f4183b329e3 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 9 Nov 2021 10:12:40 -0700 Subject: [PATCH 044/138] Avoid stacking by default --- xarray/core/groupby.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index b004336f34a..16590132204 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -261,6 +261,10 @@ class GroupBy: "_dims", "_dask_groupby_kwargs", "_squeeze", + # Save unstacked object for dask_groupby + "_original_obj", + "_unstacked_group", + "_bins", ) def __init__( @@ -322,6 +326,9 @@ def __init__( if getattr(group, "name", None) is None: group.name = "group" + self._original_obj = obj + self._unstacked_group = group + group, obj, stacked_dim, inserted_dims = _ensure_1d(group, obj) (group_dim,) = group.dims @@ -342,6 +349,7 @@ def __init__( new_dim_name = group.name + "_bins" group = DataArray(binned, group.coords, name=new_dim_name) full_index = binned.categories + self._unstacked_group = group if grouper is not None: index = safe_cast_to_index(group) @@ -539,6 +547,7 @@ def _dask_groupby_reduce(self, dim, **kwargs): if ( dim is None or dim == self._group.name ) and self._group.name in self._obj.xindexes: + # TODO: switch to xindexes after we can use is_unique index = self._obj.indexes[self._group.name] if index.is_unique and self._squeeze: raise ValueError(f"cannot reduce over dimensions {self._group.name!r}") @@ -561,20 +570,14 @@ def _dask_groupby_reduce(self, dim, **kwargs): name=self._unique_coord.name, ) else: - if isinstance(self._group, _DummyGroup): - group = self._group.name + if isinstance(self._unstacked_group, _DummyGroup): + group = self._unstacked_group.name else: - group = self._group - - # TODO: avoid stacking by default - if self._stacked_dim is not None: - obj = self._obj.unstack(self._stacked_dim) - group = group.unstack(self._stacked_dim) - else: - obj = self._obj + group = self._unstacked_group + # TODO: Properly deal with bins here. result = xarray_reduce( - obj, + self._original_obj, group, dim=dim, expected_groups=(self._unique_coord.values,), From b9bc1dd0dd6e73793315457e9c9e23cf52428b4c Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 9 Nov 2021 17:01:27 -0700 Subject: [PATCH 045/138] Update reductions --- xarray/core/_reductions.py | 100 ++++++++++++++++++------------------- 1 file changed, 50 insertions(+), 50 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index bb8a84156e2..a9d0f271454 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -2003,9 +2003,9 @@ def count( return self._dask_groupby_reduce( func="count", dim=dim, + numeric_only=False, # fill_value=fill_value, keep_attrs=keep_attrs, - numeric_only=False, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -2088,9 +2088,9 @@ def all( return self._dask_groupby_reduce( func="all", dim=dim, + numeric_only=False, # fill_value=fill_value, keep_attrs=keep_attrs, - numeric_only=False, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -2173,9 +2173,9 @@ def any( return self._dask_groupby_reduce( func="any", dim=dim, + numeric_only=False, # fill_value=fill_value, keep_attrs=keep_attrs, - numeric_only=False, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -2274,10 +2274,10 @@ def max( return self._dask_groupby_reduce( func="max", dim=dim, - # fill_value=fill_value, - keep_attrs=keep_attrs, skipna=skipna, numeric_only=False, + # fill_value=fill_value, + keep_attrs=keep_attrs, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -2377,10 +2377,10 @@ def min( return self._dask_groupby_reduce( func="min", dim=dim, - # fill_value=fill_value, - keep_attrs=keep_attrs, skipna=skipna, numeric_only=False, + # fill_value=fill_value, + keep_attrs=keep_attrs, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -2484,10 +2484,10 @@ def mean( return self._dask_groupby_reduce( func="mean", dim=dim, - # fill_value=fill_value, - keep_attrs=keep_attrs, skipna=skipna, numeric_only=True, + # fill_value=fill_value, + keep_attrs=keep_attrs, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -2608,11 +2608,11 @@ def prod( return self._dask_groupby_reduce( func="prod", dim=dim, - # fill_value=fill_value, - keep_attrs=keep_attrs, skipna=skipna, min_count=min_count, numeric_only=True, + # fill_value=fill_value, + keep_attrs=keep_attrs, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -2734,11 +2734,11 @@ def sum( return self._dask_groupby_reduce( func="sum", dim=dim, - # fill_value=fill_value, - keep_attrs=keep_attrs, skipna=skipna, min_count=min_count, numeric_only=True, + # fill_value=fill_value, + keep_attrs=keep_attrs, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -2839,11 +2839,11 @@ def std( return self._dask_groupby_reduce( func="std", dim=dim, - # fill_value=fill_value, - keep_attrs=keep_attrs, skipna=skipna, ddof=ddof, numeric_only=True, + # fill_value=fill_value, + keep_attrs=keep_attrs, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -2944,11 +2944,11 @@ def var( return self._dask_groupby_reduce( func="var", dim=dim, - # fill_value=fill_value, - keep_attrs=keep_attrs, skipna=skipna, ddof=ddof, numeric_only=True, + # fill_value=fill_value, + keep_attrs=keep_attrs, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -3041,10 +3041,10 @@ def median( return self._dask_groupby_reduce( func="median", dim=dim, - # fill_value=fill_value, - keep_attrs=keep_attrs, skipna=skipna, numeric_only=True, + # fill_value=fill_value, + keep_attrs=keep_attrs, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -3132,9 +3132,9 @@ def count( return self._dask_groupby_reduce( func="count", dim=dim, + numeric_only=False, # fill_value=fill_value, keep_attrs=keep_attrs, - numeric_only=False, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -3217,9 +3217,9 @@ def all( return self._dask_groupby_reduce( func="all", dim=dim, + numeric_only=False, # fill_value=fill_value, keep_attrs=keep_attrs, - numeric_only=False, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -3302,9 +3302,9 @@ def any( return self._dask_groupby_reduce( func="any", dim=dim, + numeric_only=False, # fill_value=fill_value, keep_attrs=keep_attrs, - numeric_only=False, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -3403,10 +3403,10 @@ def max( return self._dask_groupby_reduce( func="max", dim=dim, - # fill_value=fill_value, - keep_attrs=keep_attrs, skipna=skipna, numeric_only=False, + # fill_value=fill_value, + keep_attrs=keep_attrs, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -3506,10 +3506,10 @@ def min( return self._dask_groupby_reduce( func="min", dim=dim, - # fill_value=fill_value, - keep_attrs=keep_attrs, skipna=skipna, numeric_only=False, + # fill_value=fill_value, + keep_attrs=keep_attrs, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -3613,10 +3613,10 @@ def mean( return self._dask_groupby_reduce( func="mean", dim=dim, - # fill_value=fill_value, - keep_attrs=keep_attrs, skipna=skipna, numeric_only=True, + # fill_value=fill_value, + keep_attrs=keep_attrs, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -3737,11 +3737,11 @@ def prod( return self._dask_groupby_reduce( func="prod", dim=dim, - # fill_value=fill_value, - keep_attrs=keep_attrs, skipna=skipna, min_count=min_count, numeric_only=True, + # fill_value=fill_value, + keep_attrs=keep_attrs, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -3863,11 +3863,11 @@ def sum( return self._dask_groupby_reduce( func="sum", dim=dim, - # fill_value=fill_value, - keep_attrs=keep_attrs, skipna=skipna, min_count=min_count, numeric_only=True, + # fill_value=fill_value, + keep_attrs=keep_attrs, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -3968,11 +3968,11 @@ def std( return self._dask_groupby_reduce( func="std", dim=dim, - # fill_value=fill_value, - keep_attrs=keep_attrs, skipna=skipna, ddof=ddof, numeric_only=True, + # fill_value=fill_value, + keep_attrs=keep_attrs, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -4073,11 +4073,11 @@ def var( return self._dask_groupby_reduce( func="var", dim=dim, - # fill_value=fill_value, - keep_attrs=keep_attrs, skipna=skipna, ddof=ddof, numeric_only=True, + # fill_value=fill_value, + keep_attrs=keep_attrs, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -4170,10 +4170,10 @@ def median( return self._dask_groupby_reduce( func="median", dim=dim, - # fill_value=fill_value, - keep_attrs=keep_attrs, skipna=skipna, numeric_only=True, + # fill_value=fill_value, + keep_attrs=keep_attrs, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -4504,9 +4504,9 @@ def max( return self._dask_groupby_reduce( func="max", dim=dim, + skipna=skipna, # fill_value=fill_value, keep_attrs=keep_attrs, - skipna=skipna, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -4598,9 +4598,9 @@ def min( return self._dask_groupby_reduce( func="min", dim=dim, + skipna=skipna, # fill_value=fill_value, keep_attrs=keep_attrs, - skipna=skipna, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -4696,9 +4696,9 @@ def mean( return self._dask_groupby_reduce( func="mean", dim=dim, + skipna=skipna, # fill_value=fill_value, keep_attrs=keep_attrs, - skipna=skipna, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -4809,10 +4809,10 @@ def prod( return self._dask_groupby_reduce( func="prod", dim=dim, - # fill_value=fill_value, - keep_attrs=keep_attrs, skipna=skipna, min_count=min_count, + # fill_value=fill_value, + keep_attrs=keep_attrs, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -4924,10 +4924,10 @@ def sum( return self._dask_groupby_reduce( func="sum", dim=dim, - # fill_value=fill_value, - keep_attrs=keep_attrs, skipna=skipna, min_count=min_count, + # fill_value=fill_value, + keep_attrs=keep_attrs, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -5024,10 +5024,10 @@ def std( return self._dask_groupby_reduce( func="std", dim=dim, - # fill_value=fill_value, - keep_attrs=keep_attrs, skipna=skipna, ddof=ddof, + # fill_value=fill_value, + keep_attrs=keep_attrs, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -5124,10 +5124,10 @@ def var( return self._dask_groupby_reduce( func="var", dim=dim, - # fill_value=fill_value, - keep_attrs=keep_attrs, skipna=skipna, ddof=ddof, + # fill_value=fill_value, + keep_attrs=keep_attrs, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) @@ -5216,9 +5216,9 @@ def median( return self._dask_groupby_reduce( func="median", dim=dim, + skipna=skipna, # fill_value=fill_value, keep_attrs=keep_attrs, - skipna=skipna, # TODO: Add dask resampling reduction tests! **self._dask_groupby_kwargs, ) From bece14e22794e5fadb9e3b3b34af920fcc2be77f Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 9 Nov 2021 17:41:13 -0700 Subject: [PATCH 046/138] Fix median and add test. --- xarray/core/_reductions.py | 116 ++++++++++++++--------------- xarray/tests/test_groupby.py | 21 ++++-- xarray/util/generate_reductions.py | 20 ++++- 3 files changed, 88 insertions(+), 69 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index a9d0f271454..67f821596e6 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -3031,32 +3031,31 @@ def median( da (time) float64 1.0 2.0 3.0 1.0 2.0 nan >>> ds.groupby("labels").median() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").median(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 2.0 """ - - if dask_groupby and OPTIONS["use_numpy_groupies"]: - return self._dask_groupby_reduce( - func="median", - dim=dim, - skipna=skipna, - numeric_only=True, - # fill_value=fill_value, - keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, - ) - else: - return self.reduce( - duck_array_ops.median, - dim=dim, - skipna=skipna, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) class DatasetResampleReductions: @@ -4160,32 +4159,31 @@ def median( da (time) float64 1.0 2.0 3.0 1.0 2.0 nan >>> ds.resample(time="3M").median() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").median(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 2.0 nan """ - - if dask_groupby and OPTIONS["use_numpy_groupies"]: - return self._dask_groupby_reduce( - func="median", - dim=dim, - skipna=skipna, - numeric_only=True, - # fill_value=fill_value, - keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, - ) - else: - return self.reduce( - duck_array_ops.median, - dim=dim, - skipna=skipna, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) class DataArrayGroupByReductions: @@ -5206,30 +5204,26 @@ def median( labels (time) >> da.groupby("labels").median() + + array([1., 2., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").median(skipna=False) + + array([nan, 2., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ - - if dask_groupby and OPTIONS["use_numpy_groupies"]: - return self._dask_groupby_reduce( - func="median", - dim=dim, - skipna=skipna, - # fill_value=fill_value, - keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, - ) - else: - return self.reduce( - duck_array_ops.median, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) class DataArrayResampleReductions: diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 4d2d1e8c98b..b3efc1218e5 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -926,19 +926,21 @@ def test_groupby_sum(self): assert_allclose(expected_sum_axis1, grouped.reduce(np.sum, "y")) assert_allclose(expected_sum_axis1, grouped.sum("y")) - def test_groupby_sum_default(self): + @pytest.mark.parametrize("method", ["sum", "mean", "median"]) + def test_groupby_reductions(self, method): array = self.da grouped = array.groupby("abc") - expected_sum_all = Dataset( + reduction = getattr(np, method) + expected = Dataset( { "foo": Variable( ["x", "abc"], np.array( [ - self.x[:, :9].sum(axis=-1), - self.x[:, 10:].sum(axis=-1), - self.x[:, 9:10].sum(axis=-1), + reduction(self.x[:, :9], axis=-1), + reduction(self.x[:, 10:], axis=-1), + reduction(self.x[:, 9:10], axis=-1), ] ).T, ), @@ -946,7 +948,14 @@ def test_groupby_sum_default(self): } )["foo"] - assert_allclose(expected_sum_all, grouped.sum(dim="y")) + with xr.set_options(use_numpy_groupies=False): + actual_legacy = getattr(grouped, method)(dim="y") + + with xr.set_options(use_numpy_groupies=False): + actual_npg = getattr(grouped, method)(dim="y") + + assert_allclose(expected, actual_legacy) + assert_allclose(expected, actual_npg) def test_groupby_count(self): array = DataArray( diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index d1f0fb31813..8e30620a181 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -305,11 +305,27 @@ def generate_code(self, method): if self.datastructure.numeric_only: extra_kwargs.append(f"numeric_only={method.numeric_only},") + # numpy_groupies & dask_groupby do not support median + if method.name == "median": + indent = 12 + else: + indent = 16 + if extra_kwargs: - extra_kwargs = textwrap.indent("\n" + "\n".join(extra_kwargs), 16 * " ") + extra_kwargs = textwrap.indent("\n" + "\n".join(extra_kwargs), indent * " ") else: extra_kwargs = "" - return f""" + + if method.name == "median": + return f""" return self.reduce( + duck_array_ops.{method.array_method}, + dim=dim,{extra_kwargs} + keep_attrs=keep_attrs, + **kwargs, + )""" + + else: + return f""" if dask_groupby and OPTIONS["use_numpy_groupies"]: return self._dask_groupby_reduce( func="{method.name}", From 41f0aa5aaf89b83bee6eb8f9f87f497cb4efd31e Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 9 Nov 2021 19:27:57 -0700 Subject: [PATCH 047/138] fix test --- xarray/tests/test_groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index b3efc1218e5..2d9ce04d16d 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -951,7 +951,7 @@ def test_groupby_reductions(self, method): with xr.set_options(use_numpy_groupies=False): actual_legacy = getattr(grouped, method)(dim="y") - with xr.set_options(use_numpy_groupies=False): + with xr.set_options(use_numpy_groupies=True): actual_npg = getattr(grouped, method)(dim="y") assert_allclose(expected, actual_legacy) From 0559ee151420c31ac713829b74c3148fdfd46fa5 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 9 Nov 2021 21:33:56 -0700 Subject: [PATCH 048/138] Fix var, std doctests --- xarray/core/_reductions.py | 96 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 67f821596e6..85bd096cc18 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -2825,14 +2825,32 @@ def std( da (time) float64 1.0 2.0 3.0 1.0 2.0 nan >>> ds.groupby("labels").std() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").std(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 1.0 Specify ``ddof=1`` for an unbiased estimate. >>> ds.groupby("labels").std(skipna=True, ddof=1) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 1.414 """ if dask_groupby and OPTIONS["use_numpy_groupies"]: @@ -2930,14 +2948,32 @@ def var( da (time) float64 1.0 2.0 3.0 1.0 2.0 nan >>> ds.groupby("labels").var() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").var(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 1.0 Specify ``ddof=1`` for an unbiased estimate. >>> ds.groupby("labels").var(skipna=True, ddof=1) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 2.0 """ if dask_groupby and OPTIONS["use_numpy_groupies"]: @@ -3953,14 +3989,32 @@ def std( da (time) float64 1.0 2.0 3.0 1.0 2.0 nan >>> ds.resample(time="3M").std() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 0.0 0.8165 nan Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").std(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 0.0 0.8165 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.resample(time="3M").std(skipna=True, ddof=1) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 nan 1.0 nan """ if dask_groupby and OPTIONS["use_numpy_groupies"]: @@ -4058,14 +4112,32 @@ def var( da (time) float64 1.0 2.0 3.0 1.0 2.0 nan >>> ds.resample(time="3M").var() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 0.0 0.6667 nan Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").var(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 0.0 0.6667 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.resample(time="3M").var(skipna=True, ddof=1) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 nan 1.0 nan """ if dask_groupby and OPTIONS["use_numpy_groupies"]: @@ -5008,14 +5080,26 @@ def std( labels (time) >> da.groupby("labels").std() + + array([nan, 0., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").std(skipna=False) + + array([nan, 0., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Specify ``ddof=1`` for an unbiased estimate. >>> da.groupby("labels").std(skipna=True, ddof=1) + + array([ nan, 0. , 1.41421356]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ if dask_groupby and OPTIONS["use_numpy_groupies"]: @@ -5108,14 +5192,26 @@ def var( labels (time) >> da.groupby("labels").var() + + array([nan, 0., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").var(skipna=False) + + array([nan, 0., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Specify ``ddof=1`` for an unbiased estimate. >>> da.groupby("labels").var(skipna=True, ddof=1) + + array([nan, 0., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ if dask_groupby and OPTIONS["use_numpy_groupies"]: From 31e1fd2419f54205616d59eb1ee176e7a61e07b2 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 10 Nov 2021 09:06:36 -0700 Subject: [PATCH 049/138] Force test failure to check CI env --- xarray/core/_reductions.py | 1024 +--------------------------- xarray/util/generate_reductions.py | 1 + 2 files changed, 31 insertions(+), 994 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 85bd096cc18..4df94ff42d3 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -98,19 +98,8 @@ def count( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.count() - - Dimensions: () - Data variables: - da int64 5 """ return self.reduce( duck_array_ops.count, @@ -169,19 +158,8 @@ def all( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.all() - - Dimensions: () - Data variables: - da bool False """ return self.reduce( duck_array_ops.array_all, @@ -240,19 +218,8 @@ def any( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.any() - - Dimensions: () - Data variables: - da bool True """ return self.reduce( duck_array_ops.array_any, @@ -317,27 +284,12 @@ def max( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.max() - - Dimensions: () - Data variables: - da float64 3.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.max(skipna=False) - - Dimensions: () - Data variables: - da float64 nan """ return self.reduce( duck_array_ops.max, @@ -403,27 +355,12 @@ def min( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.min() - - Dimensions: () - Data variables: - da float64 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.min(skipna=False) - - Dimensions: () - Data variables: - da float64 nan """ return self.reduce( duck_array_ops.min, @@ -493,27 +430,12 @@ def mean( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.mean() - - Dimensions: () - Data variables: - da float64 1.8 Use ``skipna`` to control whether NaNs are ignored. >>> ds.mean(skipna=False) - - Dimensions: () - Data variables: - da float64 nan """ return self.reduce( duck_array_ops.mean, @@ -590,35 +512,16 @@ def prod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.prod() - - Dimensions: () - Data variables: - da float64 12.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.prod(skipna=False) - - Dimensions: () - Data variables: - da float64 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.prod(skipna=True, min_count=2) - - Dimensions: () - Data variables: - da float64 12.0 """ return self.reduce( duck_array_ops.prod, @@ -696,35 +599,16 @@ def sum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.sum() - - Dimensions: () - Data variables: - da float64 9.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.sum(skipna=False) - - Dimensions: () - Data variables: - da float64 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.sum(skipna=True, min_count=2) - - Dimensions: () - Data variables: - da float64 9.0 """ return self.reduce( duck_array_ops.sum, @@ -799,35 +683,16 @@ def std( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.std() - - Dimensions: () - Data variables: - da float64 0.7483 Use ``skipna`` to control whether NaNs are ignored. >>> ds.std(skipna=False) - - Dimensions: () - Data variables: - da float64 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.std(skipna=True, ddof=1) - - Dimensions: () - Data variables: - da float64 0.8367 """ return self.reduce( duck_array_ops.std, @@ -902,35 +767,16 @@ def var( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.var() - - Dimensions: () - Data variables: - da float64 0.56 Use ``skipna`` to control whether NaNs are ignored. >>> ds.var(skipna=False) - - Dimensions: () - Data variables: - da float64 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.var(skipna=True, ddof=1) - - Dimensions: () - Data variables: - da float64 0.7 """ return self.reduce( duck_array_ops.var, @@ -1001,27 +847,12 @@ def median( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.median() - - Dimensions: () - Data variables: - da float64 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.median(skipna=False) - - Dimensions: () - Data variables: - da float64 nan """ return self.reduce( duck_array_ops.median, @@ -1084,15 +915,8 @@ def count( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.count() - - array(5) """ return self.reduce( duck_array_ops.count, @@ -1149,15 +973,8 @@ def all( ... ), ... ) >>> da - - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.all() - - array(False) """ return self.reduce( duck_array_ops.array_all, @@ -1214,15 +1031,8 @@ def any( ... ), ... ) >>> da - - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.any() - - array(True) """ return self.reduce( duck_array_ops.array_any, @@ -1285,21 +1095,12 @@ def max( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.max() - - array(3.) Use ``skipna`` to control whether NaNs are ignored. >>> da.max(skipna=False) - - array(nan) """ return self.reduce( duck_array_ops.max, @@ -1363,21 +1164,12 @@ def min( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.min() - - array(1.) Use ``skipna`` to control whether NaNs are ignored. >>> da.min(skipna=False) - - array(nan) """ return self.reduce( duck_array_ops.min, @@ -1445,21 +1237,12 @@ def mean( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.mean() - - array(1.8) Use ``skipna`` to control whether NaNs are ignored. >>> da.mean(skipna=False) - - array(nan) """ return self.reduce( duck_array_ops.mean, @@ -1534,27 +1317,16 @@ def prod( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.prod() - - array(12.) Use ``skipna`` to control whether NaNs are ignored. >>> da.prod(skipna=False) - - array(nan) Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.prod(skipna=True, min_count=2) - - array(12.) """ return self.reduce( duck_array_ops.prod, @@ -1630,27 +1402,16 @@ def sum( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.sum() - - array(9.) Use ``skipna`` to control whether NaNs are ignored. >>> da.sum(skipna=False) - - array(nan) Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.sum(skipna=True, min_count=2) - - array(9.) """ return self.reduce( duck_array_ops.sum, @@ -1723,27 +1484,16 @@ def std( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.std() - - array(0.74833148) Use ``skipna`` to control whether NaNs are ignored. >>> da.std(skipna=False) - - array(nan) Specify ``ddof=1`` for an unbiased estimate. >>> da.std(skipna=True, ddof=1) - - array(0.83666003) """ return self.reduce( duck_array_ops.std, @@ -1816,27 +1566,16 @@ def var( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.var() - - array(0.56) Use ``skipna`` to control whether NaNs are ignored. >>> da.var(skipna=False) - - array(nan) Specify ``ddof=1`` for an unbiased estimate. >>> da.var(skipna=True, ddof=1) - - array(0.7) """ return self.reduce( duck_array_ops.var, @@ -1905,21 +1644,12 @@ def median( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.median() - - array(2.) Use ``skipna`` to control whether NaNs are ignored. >>> da.median(skipna=False) - - array(nan) """ return self.reduce( duck_array_ops.median, @@ -1982,24 +1712,12 @@ def count( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").count() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) int64 1 2 2 """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="count", dim=dim, @@ -2067,24 +1785,12 @@ def all( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").all() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) bool False True True """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="all", dim=dim, @@ -2152,24 +1858,12 @@ def any( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").any() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) bool True True True """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="any", dim=dim, @@ -2243,34 +1937,16 @@ def max( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").max() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 2.0 3.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").max(skipna=False) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 2.0 3.0 """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="max", dim=dim, @@ -2346,34 +2022,16 @@ def min( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").min() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 2.0 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").min(skipna=False) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 2.0 1.0 """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="min", dim=dim, @@ -2453,34 +2111,16 @@ def mean( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").mean() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").mean(skipna=False) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 2.0 2.0 """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="mean", dim=dim, @@ -2567,44 +2207,20 @@ def prod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").prod() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 4.0 3.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").prod(skipna=False) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 4.0 3.0 Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.groupby("labels").prod(skipna=True, min_count=2) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 4.0 3.0 """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="prod", dim=dim, @@ -2693,44 +2309,20 @@ def sum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").sum() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 4.0 4.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").sum(skipna=False) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 4.0 4.0 Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.groupby("labels").sum(skipna=True, min_count=2) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 4.0 4.0 """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="sum", dim=dim, @@ -2816,44 +2408,20 @@ def std( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").std() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 0.0 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").std(skipna=False) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 0.0 1.0 Specify ``ddof=1`` for an unbiased estimate. >>> ds.groupby("labels").std(skipna=True, ddof=1) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 0.0 1.414 """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="std", dim=dim, @@ -2939,44 +2507,20 @@ def var( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").var() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 0.0 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").var(skipna=False) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 0.0 1.0 Specify ``ddof=1`` for an unbiased estimate. >>> ds.groupby("labels").var(skipna=True, ddof=1) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 0.0 2.0 """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="var", dim=dim, @@ -3058,31 +2602,12 @@ def median( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").median() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").median(skipna=False) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 2.0 2.0 """ return self.reduce( duck_array_ops.median, @@ -3146,24 +2671,12 @@ def count( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").count() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) int64 1 3 1 """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="count", dim=dim, @@ -3231,24 +2744,12 @@ def all( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").all() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) bool True True False """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="all", dim=dim, @@ -3316,24 +2817,12 @@ def any( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").any() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) bool True True True """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="any", dim=dim, @@ -3407,34 +2896,16 @@ def max( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").max() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 3.0 nan Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").max(skipna=False) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 3.0 nan """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="max", dim=dim, @@ -3510,34 +2981,16 @@ def min( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").min() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 1.0 nan Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").min(skipna=False) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 1.0 nan """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="min", dim=dim, @@ -3617,34 +3070,16 @@ def mean( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").mean() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 2.0 nan Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").mean(skipna=False) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 2.0 nan """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="mean", dim=dim, @@ -3731,44 +3166,20 @@ def prod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").prod() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 6.0 nan Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").prod(skipna=False) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 6.0 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.resample(time="3M").prod(skipna=True, min_count=2) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 nan 6.0 nan """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="prod", dim=dim, @@ -3857,44 +3268,20 @@ def sum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").sum() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 6.0 nan Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").sum(skipna=False) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 6.0 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.resample(time="3M").sum(skipna=True, min_count=2) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 nan 6.0 nan """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="sum", dim=dim, @@ -3980,44 +3367,20 @@ def std( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").std() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 0.0 0.8165 nan Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").std(skipna=False) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 0.0 0.8165 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.resample(time="3M").std(skipna=True, ddof=1) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 nan 1.0 nan """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="std", dim=dim, @@ -4103,44 +3466,20 @@ def var( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").var() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 0.0 0.6667 nan Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").var(skipna=False) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 0.0 0.6667 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.resample(time="3M").var(skipna=True, ddof=1) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 nan 1.0 nan """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="var", dim=dim, @@ -4222,31 +3561,12 @@ def median( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").median() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").median(skipna=False) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 2.0 nan """ return self.reduce( duck_array_ops.median, @@ -4309,20 +3629,12 @@ def count( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").count() - - array([1, 2, 2]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="count", dim=dim, @@ -4387,20 +3699,12 @@ def all( ... ), ... ) >>> da - - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").all() - - array([False, True, True]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="all", dim=dim, @@ -4465,20 +3769,12 @@ def any( ... ), ... ) >>> da - - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").any() - - array([ True, True, True]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="any", dim=dim, @@ -4549,28 +3845,16 @@ def max( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").max() - - array([nan, 2., 3.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").max(skipna=False) - - array([nan, 2., 3.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="max", dim=dim, @@ -4643,28 +3927,16 @@ def min( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").min() - - array([nan, 2., 1.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").min(skipna=False) - - array([nan, 2., 1.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="min", dim=dim, @@ -4741,28 +4013,16 @@ def mean( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").mean() - - array([nan, 2., 2.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").mean(skipna=False) - - array([nan, 2., 2.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="mean", dim=dim, @@ -4846,36 +4106,20 @@ def prod( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").prod() - - array([nan, 4., 3.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").prod(skipna=False) - - array([nan, 4., 3.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.groupby("labels").prod(skipna=True, min_count=2) - - array([nan, 4., 3.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="prod", dim=dim, @@ -4961,36 +4205,20 @@ def sum( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").sum() - - array([nan, 4., 4.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").sum(skipna=False) - - array([nan, 4., 4.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.groupby("labels").sum(skipna=True, min_count=2) - - array([nan, 4., 4.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="sum", dim=dim, @@ -5073,36 +4301,20 @@ def std( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").std() - - array([nan, 0., 1.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").std(skipna=False) - - array([nan, 0., 1.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Specify ``ddof=1`` for an unbiased estimate. >>> da.groupby("labels").std(skipna=True, ddof=1) - - array([ nan, 0. , 1.41421356]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="std", dim=dim, @@ -5185,36 +4397,20 @@ def var( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").var() - - array([nan, 0., 1.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").var(skipna=False) - - array([nan, 0., 1.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Specify ``ddof=1`` for an unbiased estimate. >>> da.groupby("labels").var(skipna=True, ddof=1) - - array([nan, 0., 2.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="var", dim=dim, @@ -5293,25 +4489,12 @@ def median( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").median() - - array([1., 2., 2.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").median(skipna=False) - - array([nan, 2., 2.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.median, @@ -5373,17 +4556,8 @@ def count( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").count() - - array([1, 3, 1]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.count, @@ -5440,17 +4614,8 @@ def all( ... ), ... ) >>> da - - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").all() - - array([ True, True, False]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.array_all, @@ -5507,17 +4672,8 @@ def any( ... ), ... ) >>> da - - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").any() - - array([ True, True, True]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.array_any, @@ -5580,25 +4736,12 @@ def max( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").max() - - array([1., 3., 2.]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").max(skipna=False) - - array([ 1., 3., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.max, @@ -5662,25 +4805,12 @@ def min( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").min() - - array([1., 1., 2.]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").min(skipna=False) - - array([ 1., 1., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.min, @@ -5748,25 +4878,12 @@ def mean( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").mean() - - array([1., 2., 2.]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").mean(skipna=False) - - array([ 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.mean, @@ -5841,33 +4958,16 @@ def prod( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").prod() - - array([1., 6., 2.]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").prod(skipna=False) - - array([ 1., 6., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.resample(time="3M").prod(skipna=True, min_count=2) - - array([nan, 6., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.prod, @@ -5943,33 +5043,16 @@ def sum( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").sum() - - array([1., 6., 2.]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").sum(skipna=False) - - array([ 1., 6., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.resample(time="3M").sum(skipna=True, min_count=2) - - array([nan, 6., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.sum, @@ -6042,33 +5125,16 @@ def std( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").std() - - array([0. , 0.81649658, 0. ]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").std(skipna=False) - - array([0. , 0.81649658, nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Specify ``ddof=1`` for an unbiased estimate. >>> da.resample(time="3M").std(skipna=True, ddof=1) - - array([nan, 1., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.std, @@ -6141,33 +5207,16 @@ def var( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").var() - - array([0. , 0.66666667, 0. ]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").var(skipna=False) - - array([0. , 0.66666667, nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Specify ``ddof=1`` for an unbiased estimate. >>> da.resample(time="3M").var(skipna=True, ddof=1) - - array([nan, 1., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.var, @@ -6236,25 +5285,12 @@ def median( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").median() - - array([1., 2., 2.]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").median(skipna=False) - - array([ 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.median, diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 8e30620a181..c02d9dde65d 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -327,6 +327,7 @@ def generate_code(self, method): else: return f""" if dask_groupby and OPTIONS["use_numpy_groupies"]: + raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="{method.name}", dim=dim,{extra_kwargs} From 47b593c481ecb93fe83dd6b9458d5683cc93119f Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 10 Nov 2021 09:10:39 -0700 Subject: [PATCH 050/138] Use conda-forge numpy_groupies in CI --- ci/requirements/environment-windows.yml | 2 +- ci/requirements/environment.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index e42c01c6106..8dd38f9fda3 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -22,6 +22,7 @@ dependencies: - netcdf4 - numba - numpy + - numpy_groupies - pandas - pint - pip @@ -43,5 +44,4 @@ dependencies: - zarr - pip: - numbagg - - numpy_groupies - git+https://github.com/dcherian/dask_groupby.git diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index 5892f2bf108..f89be56395e 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -25,6 +25,7 @@ dependencies: - numba - numexpr - numpy + - numpy_groupies - pandas - pint - pip @@ -47,5 +48,4 @@ dependencies: - zarr - pip: - numbagg - - numpy_groupies - git+https://github.com/dcherian/dask_groupby.git From c7e9d9647f2c7df7e5b14644926dc2126ad4318f Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 10 Nov 2021 11:49:47 -0700 Subject: [PATCH 051/138] Minor improvement --- xarray/util/generate_reductions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index fcb5a573c96..2ad65bab08b 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -280,7 +280,7 @@ def generate_code(self, method): extra_kwargs.append(f"numeric_only={method.numeric_only},") if extra_kwargs: - extra_kwargs = "\n " + "\n ".join(extra_kwargs) + extra_kwargs = textwrap.indent("\n" + "\n".join(extra_kwargs), 12 * " ") else: extra_kwargs = "" return f""" return self.reduce( From 77d2665458db1eec682c725e4dcdfef78454fc30 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 10 Nov 2021 12:12:39 -0700 Subject: [PATCH 052/138] Revert "Force test failure to check CI env" This reverts commit 31e1fd2419f54205616d59eb1ee176e7a61e07b2. --- xarray/core/_reductions.py | 1024 +++++++++++++++++++++++++++- xarray/util/generate_reductions.py | 1 - 2 files changed, 994 insertions(+), 31 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 4df94ff42d3..85bd096cc18 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -98,8 +98,19 @@ def count( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.count() + + Dimensions: () + Data variables: + da int64 5 """ return self.reduce( duck_array_ops.count, @@ -158,8 +169,19 @@ def all( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.all() + + Dimensions: () + Data variables: + da bool False """ return self.reduce( duck_array_ops.array_all, @@ -218,8 +240,19 @@ def any( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.any() + + Dimensions: () + Data variables: + da bool True """ return self.reduce( duck_array_ops.array_any, @@ -284,12 +317,27 @@ def max( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.max() + + Dimensions: () + Data variables: + da float64 3.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.max(skipna=False) + + Dimensions: () + Data variables: + da float64 nan """ return self.reduce( duck_array_ops.max, @@ -355,12 +403,27 @@ def min( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.min() + + Dimensions: () + Data variables: + da float64 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.min(skipna=False) + + Dimensions: () + Data variables: + da float64 nan """ return self.reduce( duck_array_ops.min, @@ -430,12 +493,27 @@ def mean( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.mean() + + Dimensions: () + Data variables: + da float64 1.8 Use ``skipna`` to control whether NaNs are ignored. >>> ds.mean(skipna=False) + + Dimensions: () + Data variables: + da float64 nan """ return self.reduce( duck_array_ops.mean, @@ -512,16 +590,35 @@ def prod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.prod() + + Dimensions: () + Data variables: + da float64 12.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.prod(skipna=False) + + Dimensions: () + Data variables: + da float64 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.prod(skipna=True, min_count=2) + + Dimensions: () + Data variables: + da float64 12.0 """ return self.reduce( duck_array_ops.prod, @@ -599,16 +696,35 @@ def sum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.sum() + + Dimensions: () + Data variables: + da float64 9.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.sum(skipna=False) + + Dimensions: () + Data variables: + da float64 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.sum(skipna=True, min_count=2) + + Dimensions: () + Data variables: + da float64 9.0 """ return self.reduce( duck_array_ops.sum, @@ -683,16 +799,35 @@ def std( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.std() + + Dimensions: () + Data variables: + da float64 0.7483 Use ``skipna`` to control whether NaNs are ignored. >>> ds.std(skipna=False) + + Dimensions: () + Data variables: + da float64 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.std(skipna=True, ddof=1) + + Dimensions: () + Data variables: + da float64 0.8367 """ return self.reduce( duck_array_ops.std, @@ -767,16 +902,35 @@ def var( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.var() + + Dimensions: () + Data variables: + da float64 0.56 Use ``skipna`` to control whether NaNs are ignored. >>> ds.var(skipna=False) + + Dimensions: () + Data variables: + da float64 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.var(skipna=True, ddof=1) + + Dimensions: () + Data variables: + da float64 0.7 """ return self.reduce( duck_array_ops.var, @@ -847,12 +1001,27 @@ def median( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.median() + + Dimensions: () + Data variables: + da float64 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.median(skipna=False) + + Dimensions: () + Data variables: + da float64 nan """ return self.reduce( duck_array_ops.median, @@ -915,8 +1084,15 @@ def count( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.count() + + array(5) """ return self.reduce( duck_array_ops.count, @@ -973,8 +1149,15 @@ def all( ... ), ... ) >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.all() + + array(False) """ return self.reduce( duck_array_ops.array_all, @@ -1031,8 +1214,15 @@ def any( ... ), ... ) >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.any() + + array(True) """ return self.reduce( duck_array_ops.array_any, @@ -1095,12 +1285,21 @@ def max( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.max() + + array(3.) Use ``skipna`` to control whether NaNs are ignored. >>> da.max(skipna=False) + + array(nan) """ return self.reduce( duck_array_ops.max, @@ -1164,12 +1363,21 @@ def min( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.min() + + array(1.) Use ``skipna`` to control whether NaNs are ignored. >>> da.min(skipna=False) + + array(nan) """ return self.reduce( duck_array_ops.min, @@ -1237,12 +1445,21 @@ def mean( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.mean() + + array(1.8) Use ``skipna`` to control whether NaNs are ignored. >>> da.mean(skipna=False) + + array(nan) """ return self.reduce( duck_array_ops.mean, @@ -1317,16 +1534,27 @@ def prod( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.prod() + + array(12.) Use ``skipna`` to control whether NaNs are ignored. >>> da.prod(skipna=False) + + array(nan) Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.prod(skipna=True, min_count=2) + + array(12.) """ return self.reduce( duck_array_ops.prod, @@ -1402,16 +1630,27 @@ def sum( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.sum() + + array(9.) Use ``skipna`` to control whether NaNs are ignored. >>> da.sum(skipna=False) + + array(nan) Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.sum(skipna=True, min_count=2) + + array(9.) """ return self.reduce( duck_array_ops.sum, @@ -1484,16 +1723,27 @@ def std( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.std() + + array(0.74833148) Use ``skipna`` to control whether NaNs are ignored. >>> da.std(skipna=False) + + array(nan) Specify ``ddof=1`` for an unbiased estimate. >>> da.std(skipna=True, ddof=1) + + array(0.83666003) """ return self.reduce( duck_array_ops.std, @@ -1566,16 +1816,27 @@ def var( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.var() + + array(0.56) Use ``skipna`` to control whether NaNs are ignored. >>> da.var(skipna=False) + + array(nan) Specify ``ddof=1`` for an unbiased estimate. >>> da.var(skipna=True, ddof=1) + + array(0.7) """ return self.reduce( duck_array_ops.var, @@ -1644,12 +1905,21 @@ def median( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.median() + + array(2.) Use ``skipna`` to control whether NaNs are ignored. >>> da.median(skipna=False) + + array(nan) """ return self.reduce( duck_array_ops.median, @@ -1712,12 +1982,24 @@ def count( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").count() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) int64 1 2 2 """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="count", dim=dim, @@ -1785,12 +2067,24 @@ def all( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").all() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) bool False True True """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="all", dim=dim, @@ -1858,12 +2152,24 @@ def any( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").any() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) bool True True True """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="any", dim=dim, @@ -1937,16 +2243,34 @@ def max( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").max() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 3.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").max(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 3.0 """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="max", dim=dim, @@ -2022,16 +2346,34 @@ def min( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").min() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").min(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 1.0 """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="min", dim=dim, @@ -2111,16 +2453,34 @@ def mean( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").mean() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").mean(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 2.0 """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="mean", dim=dim, @@ -2207,20 +2567,44 @@ def prod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").prod() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 4.0 3.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").prod(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 4.0 3.0 Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.groupby("labels").prod(skipna=True, min_count=2) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 4.0 3.0 """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="prod", dim=dim, @@ -2309,20 +2693,44 @@ def sum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").sum() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 4.0 4.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").sum(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 4.0 4.0 Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.groupby("labels").sum(skipna=True, min_count=2) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 4.0 4.0 """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="sum", dim=dim, @@ -2408,20 +2816,44 @@ def std( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").std() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").std(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 1.0 Specify ``ddof=1`` for an unbiased estimate. >>> ds.groupby("labels").std(skipna=True, ddof=1) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 1.414 """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="std", dim=dim, @@ -2507,20 +2939,44 @@ def var( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").var() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").var(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 1.0 Specify ``ddof=1`` for an unbiased estimate. >>> ds.groupby("labels").var(skipna=True, ddof=1) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 2.0 """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="var", dim=dim, @@ -2602,12 +3058,31 @@ def median( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").median() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").median(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 2.0 """ return self.reduce( duck_array_ops.median, @@ -2671,12 +3146,24 @@ def count( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").count() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) int64 1 3 1 """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="count", dim=dim, @@ -2744,12 +3231,24 @@ def all( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").all() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) bool True True False """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="all", dim=dim, @@ -2817,12 +3316,24 @@ def any( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").any() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) bool True True True """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="any", dim=dim, @@ -2896,16 +3407,34 @@ def max( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").max() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 3.0 nan Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").max(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 3.0 nan """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="max", dim=dim, @@ -2981,16 +3510,34 @@ def min( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").min() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 1.0 nan Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").min(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 1.0 nan """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="min", dim=dim, @@ -3070,16 +3617,34 @@ def mean( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").mean() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 2.0 nan Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").mean(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 2.0 nan """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="mean", dim=dim, @@ -3166,20 +3731,44 @@ def prod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").prod() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 6.0 nan Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").prod(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 6.0 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.resample(time="3M").prod(skipna=True, min_count=2) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 nan 6.0 nan """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="prod", dim=dim, @@ -3268,20 +3857,44 @@ def sum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").sum() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 6.0 nan Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").sum(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 6.0 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.resample(time="3M").sum(skipna=True, min_count=2) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 nan 6.0 nan """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="sum", dim=dim, @@ -3367,20 +3980,44 @@ def std( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").std() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 0.0 0.8165 nan Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").std(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 0.0 0.8165 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.resample(time="3M").std(skipna=True, ddof=1) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 nan 1.0 nan """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="std", dim=dim, @@ -3466,20 +4103,44 @@ def var( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").var() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 0.0 0.6667 nan Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").var(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 0.0 0.6667 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.resample(time="3M").var(skipna=True, ddof=1) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 nan 1.0 nan """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="var", dim=dim, @@ -3561,12 +4222,31 @@ def median( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").median() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").median(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 2.0 nan """ return self.reduce( duck_array_ops.median, @@ -3629,12 +4309,20 @@ def count( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").count() + + array([1, 2, 2]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="count", dim=dim, @@ -3699,12 +4387,20 @@ def all( ... ), ... ) >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").all() + + array([False, True, True]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="all", dim=dim, @@ -3769,12 +4465,20 @@ def any( ... ), ... ) >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").any() + + array([ True, True, True]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="any", dim=dim, @@ -3845,16 +4549,28 @@ def max( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").max() + + array([nan, 2., 3.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").max(skipna=False) + + array([nan, 2., 3.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="max", dim=dim, @@ -3927,16 +4643,28 @@ def min( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").min() + + array([nan, 2., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").min(skipna=False) + + array([nan, 2., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="min", dim=dim, @@ -4013,16 +4741,28 @@ def mean( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").mean() + + array([nan, 2., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").mean(skipna=False) + + array([nan, 2., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="mean", dim=dim, @@ -4106,20 +4846,36 @@ def prod( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").prod() + + array([nan, 4., 3.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").prod(skipna=False) + + array([nan, 4., 3.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.groupby("labels").prod(skipna=True, min_count=2) + + array([nan, 4., 3.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="prod", dim=dim, @@ -4205,20 +4961,36 @@ def sum( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").sum() + + array([nan, 4., 4.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").sum(skipna=False) + + array([nan, 4., 4.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.groupby("labels").sum(skipna=True, min_count=2) + + array([nan, 4., 4.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="sum", dim=dim, @@ -4301,20 +5073,36 @@ def std( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").std() + + array([nan, 0., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").std(skipna=False) + + array([nan, 0., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Specify ``ddof=1`` for an unbiased estimate. >>> da.groupby("labels").std(skipna=True, ddof=1) + + array([ nan, 0. , 1.41421356]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="std", dim=dim, @@ -4397,20 +5185,36 @@ def var( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").var() + + array([nan, 0., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").var(skipna=False) + + array([nan, 0., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Specify ``ddof=1`` for an unbiased estimate. >>> da.groupby("labels").var(skipna=True, ddof=1) + + array([nan, 0., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="var", dim=dim, @@ -4489,12 +5293,25 @@ def median( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").median() + + array([1., 2., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").median(skipna=False) + + array([nan, 2., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.median, @@ -4556,8 +5373,17 @@ def count( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").count() + + array([1, 3, 1]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.count, @@ -4614,8 +5440,17 @@ def all( ... ), ... ) >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").all() + + array([ True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.array_all, @@ -4672,8 +5507,17 @@ def any( ... ), ... ) >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").any() + + array([ True, True, True]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.array_any, @@ -4736,12 +5580,25 @@ def max( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").max() + + array([1., 3., 2.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").max(skipna=False) + + array([ 1., 3., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.max, @@ -4805,12 +5662,25 @@ def min( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").min() + + array([1., 1., 2.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").min(skipna=False) + + array([ 1., 1., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.min, @@ -4878,12 +5748,25 @@ def mean( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").mean() + + array([1., 2., 2.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").mean(skipna=False) + + array([ 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.mean, @@ -4958,16 +5841,33 @@ def prod( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").prod() + + array([1., 6., 2.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").prod(skipna=False) + + array([ 1., 6., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.resample(time="3M").prod(skipna=True, min_count=2) + + array([nan, 6., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.prod, @@ -5043,16 +5943,33 @@ def sum( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").sum() + + array([1., 6., 2.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").sum(skipna=False) + + array([ 1., 6., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.resample(time="3M").sum(skipna=True, min_count=2) + + array([nan, 6., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.sum, @@ -5125,16 +6042,33 @@ def std( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").std() + + array([0. , 0.81649658, 0. ]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").std(skipna=False) + + array([0. , 0.81649658, nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Specify ``ddof=1`` for an unbiased estimate. >>> da.resample(time="3M").std(skipna=True, ddof=1) + + array([nan, 1., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.std, @@ -5207,16 +6141,33 @@ def var( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").var() + + array([0. , 0.66666667, 0. ]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").var(skipna=False) + + array([0. , 0.66666667, nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Specify ``ddof=1`` for an unbiased estimate. >>> da.resample(time="3M").var(skipna=True, ddof=1) + + array([nan, 1., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.var, @@ -5285,12 +6236,25 @@ def median( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").median() + + array([1., 2., 2.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").median(skipna=False) + + array([ 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.median, diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index c02d9dde65d..8e30620a181 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -327,7 +327,6 @@ def generate_code(self, method): else: return f""" if dask_groupby and OPTIONS["use_numpy_groupies"]: - raise ValueError("using numpy_groupies!") return self._dask_groupby_reduce( func="{method.name}", dim=dim,{extra_kwargs} From 11c3d3398a12b2abcd6fd1de0af689a5d673730f Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 10 Nov 2021 12:37:24 -0700 Subject: [PATCH 053/138] Fixed doctests in dask_groupby --- xarray/core/_reductions.py | 42 +++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 85bd096cc18..9298b25db9e 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -2257,7 +2257,7 @@ def max( Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 nan 2.0 3.0 + da (labels) float64 1.0 2.0 3.0 Use ``skipna`` to control whether NaNs are ignored. @@ -2360,7 +2360,7 @@ def min( Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 nan 2.0 1.0 + da (labels) float64 1.0 2.0 1.0 Use ``skipna`` to control whether NaNs are ignored. @@ -2467,7 +2467,7 @@ def mean( Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 nan 2.0 2.0 + da (labels) float64 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. @@ -2581,7 +2581,7 @@ def prod( Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 nan 4.0 3.0 + da (labels) float64 1.0 4.0 3.0 Use ``skipna`` to control whether NaNs are ignored. @@ -2707,7 +2707,7 @@ def sum( Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 nan 4.0 4.0 + da (labels) float64 1.0 4.0 4.0 Use ``skipna`` to control whether NaNs are ignored. @@ -2830,7 +2830,7 @@ def std( Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 nan 0.0 1.0 + da (labels) float64 0.0 0.0 1.0 Use ``skipna`` to control whether NaNs are ignored. @@ -2953,7 +2953,7 @@ def var( Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 nan 0.0 1.0 + da (labels) float64 0.0 0.0 1.0 Use ``skipna`` to control whether NaNs are ignored. @@ -3421,7 +3421,7 @@ def max( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) float64 1.0 3.0 nan + da (time) float64 1.0 3.0 2.0 Use ``skipna`` to control whether NaNs are ignored. @@ -3524,7 +3524,7 @@ def min( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) float64 1.0 1.0 nan + da (time) float64 1.0 1.0 2.0 Use ``skipna`` to control whether NaNs are ignored. @@ -3631,7 +3631,7 @@ def mean( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) float64 1.0 2.0 nan + da (time) float64 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. @@ -3745,7 +3745,7 @@ def prod( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) float64 1.0 6.0 nan + da (time) float64 1.0 6.0 2.0 Use ``skipna`` to control whether NaNs are ignored. @@ -3871,7 +3871,7 @@ def sum( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) float64 1.0 6.0 nan + da (time) float64 1.0 6.0 2.0 Use ``skipna`` to control whether NaNs are ignored. @@ -3994,7 +3994,7 @@ def std( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) float64 0.0 0.8165 nan + da (time) float64 0.0 0.8165 0.0 Use ``skipna`` to control whether NaNs are ignored. @@ -4117,7 +4117,7 @@ def var( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) float64 0.0 0.6667 nan + da (time) float64 0.0 0.6667 0.0 Use ``skipna`` to control whether NaNs are ignored. @@ -4557,7 +4557,7 @@ def max( >>> da.groupby("labels").max() - array([nan, 2., 3.]) + array([1., 2., 3.]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -4651,7 +4651,7 @@ def min( >>> da.groupby("labels").min() - array([nan, 2., 1.]) + array([1., 2., 1.]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -4749,7 +4749,7 @@ def mean( >>> da.groupby("labels").mean() - array([nan, 2., 2.]) + array([1., 2., 2.]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -4854,7 +4854,7 @@ def prod( >>> da.groupby("labels").prod() - array([nan, 4., 3.]) + array([1., 4., 3.]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -4969,7 +4969,7 @@ def sum( >>> da.groupby("labels").sum() - array([nan, 4., 4.]) + array([1., 4., 4.]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -5081,7 +5081,7 @@ def std( >>> da.groupby("labels").std() - array([nan, 0., 1.]) + array([0., 0., 1.]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -5193,7 +5193,7 @@ def var( >>> da.groupby("labels").var() - array([nan, 0., 1.]) + array([0., 0., 1.]) Coordinates: * labels (labels) object 'a' 'b' 'c' From be53f13b1dd832a4ba221458f64f6c277998e9da Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 10 Nov 2021 12:55:58 -0700 Subject: [PATCH 054/138] See if its an import error --- xarray/core/_reductions.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 9298b25db9e..1d694b34109 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -4,6 +4,8 @@ import sys from typing import Any, Callable, Hashable, Optional, Sequence, Union +import dask_groupby + from . import duck_array_ops from .options import OPTIONS from .types import T_DataArray, T_Dataset @@ -14,12 +16,6 @@ from typing_extensions import Protocol -try: - import dask_groupby -except ImportError: - dask_groupby = None - - class DatasetReduce(Protocol): def reduce( self, From 35908b590d04f7c66508d6dcea30e6ed8351ac64 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 10 Nov 2021 14:33:44 -0700 Subject: [PATCH 055/138] Revert "See if its an import error" This reverts commit be53f13b1dd832a4ba221458f64f6c277998e9da. --- xarray/core/_reductions.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 1d694b34109..9298b25db9e 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -4,8 +4,6 @@ import sys from typing import Any, Callable, Hashable, Optional, Sequence, Union -import dask_groupby - from . import duck_array_ops from .options import OPTIONS from .types import T_DataArray, T_Dataset @@ -16,6 +14,12 @@ from typing_extensions import Protocol +try: + import dask_groupby +except ImportError: + dask_groupby = None + + class DatasetReduce(Protocol): def reduce( self, From 9c2cbb8df424ff8d6e6c6faae5abc24c15a4e84b Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 11 Nov 2021 13:25:50 -0700 Subject: [PATCH 056/138] Ppass through objects with only numpy or dask arrays --- xarray/core/utils.py | 18 ++++++++++++++++++ xarray/util/generate_reductions.py | 7 ++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index ebf6d7e28ed..58a184171ed 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -936,3 +936,21 @@ def iterate_nested(nested_list): yield from iterate_nested(item) else: yield item + + +def contains_only_dask_or_numpy(obj) -> bool: + """Returns True if xarray object contains only numpy or dask arrays. + + Expects obj to be Dataset or DataArray""" + from .dataarray import DataArray + from .pycompat import is_duck_dask_array + + if isinstance(obj, DataArray): + obj = obj._to_temp_dataset() + + return all( + [ + isinstance(var.data, np.ndarray) or is_duck_dask_array(var.data) + for var in obj.variables.values() + ] + ) diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 8e30620a181..6d5db3d1132 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -26,6 +26,7 @@ from . import duck_array_ops from .options import OPTIONS from .types import T_DataArray, T_Dataset +from .utils import contains_only_dask_or_numpy if sys.version_info >= (3, 8): from typing import Protocol @@ -326,7 +327,11 @@ def generate_code(self, method): else: return f""" - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="{method.name}", dim=dim,{extra_kwargs} From e9af57c28638288532b470334547223221e1a397 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 11 Nov 2021 13:36:07 -0700 Subject: [PATCH 057/138] Try fixing mypy --- xarray/core/_reductions.py | 317 ++++++++++++++++++++++------- xarray/util/generate_reductions.py | 36 +++- 2 files changed, 276 insertions(+), 77 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 9298b25db9e..dc9f89439f1 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -2,11 +2,12 @@ # This file was generated using xarray.util.generate_reductions. Do not edit manually. import sys -from typing import Any, Callable, Hashable, Optional, Sequence, Union +from typing import Any, Callable, Hashable, Mapping, Optional, Sequence, Union from . import duck_array_ops from .options import OPTIONS from .types import T_DataArray, T_Dataset +from .utils import contains_only_dask_or_numpy if sys.version_info >= (3, 8): from typing import Protocol @@ -33,6 +34,29 @@ def reduce( ... +class DatasetGroupByReduce(Protocol): + _obj: T_Dataset + _dask_groupby_kwargs: Mapping + + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> T_Dataset: + ... + + def _dask_groupby_reduce( + self, + dim: Union[None, Hashable, Sequence[Hashable]], + **kwargs, + ) -> T_Dataset: + ... + + class DataArrayReduce(Protocol): def reduce( self, @@ -46,6 +70,29 @@ def reduce( ... +class DataArrayGroupByReduce(Protocol): + _obj: T_DataArray + _dask_groupby_kwargs: Mapping + + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> T_DataArray: + ... + + def _dask_groupby_reduce( + self, + dim: Union[None, Hashable, Sequence[Hashable]], + **kwargs, + ) -> T_DataArray: + ... + + class DatasetReductions: __slots__ = () @@ -1934,7 +1981,7 @@ class DatasetGroupByReductions: __slots__ = () def count( - self: DatasetReduce, + self: DatasetGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, @@ -1999,7 +2046,11 @@ def count( da (labels) int64 1 2 2 """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="count", dim=dim, @@ -2019,7 +2070,7 @@ def count( ) def all( - self: DatasetReduce, + self: DatasetGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, @@ -2084,7 +2135,11 @@ def all( da (labels) bool False True True """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="all", dim=dim, @@ -2104,7 +2159,7 @@ def all( ) def any( - self: DatasetReduce, + self: DatasetGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, @@ -2169,7 +2224,11 @@ def any( da (labels) bool True True True """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="any", dim=dim, @@ -2189,7 +2248,7 @@ def any( ) def max( - self: DatasetReduce, + self: DatasetGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, @@ -2270,7 +2329,11 @@ def max( da (labels) float64 nan 2.0 3.0 """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="max", dim=dim, @@ -2292,7 +2355,7 @@ def max( ) def min( - self: DatasetReduce, + self: DatasetGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, @@ -2373,7 +2436,11 @@ def min( da (labels) float64 nan 2.0 1.0 """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="min", dim=dim, @@ -2395,7 +2462,7 @@ def min( ) def mean( - self: DatasetReduce, + self: DatasetGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, @@ -2480,7 +2547,11 @@ def mean( da (labels) float64 nan 2.0 2.0 """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="mean", dim=dim, @@ -2502,7 +2573,7 @@ def mean( ) def prod( - self: DatasetReduce, + self: DatasetGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, @@ -2604,7 +2675,11 @@ def prod( da (labels) float64 nan 4.0 3.0 """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="prod", dim=dim, @@ -2628,7 +2703,7 @@ def prod( ) def sum( - self: DatasetReduce, + self: DatasetGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, @@ -2730,7 +2805,11 @@ def sum( da (labels) float64 nan 4.0 4.0 """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="sum", dim=dim, @@ -2754,7 +2833,7 @@ def sum( ) def std( - self: DatasetReduce, + self: DatasetGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, @@ -2853,7 +2932,11 @@ def std( da (labels) float64 nan 0.0 1.414 """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="std", dim=dim, @@ -2877,7 +2960,7 @@ def std( ) def var( - self: DatasetReduce, + self: DatasetGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, @@ -2976,7 +3059,11 @@ def var( da (labels) float64 nan 0.0 2.0 """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="var", dim=dim, @@ -3000,7 +3087,7 @@ def var( ) def median( - self: DatasetReduce, + self: DatasetGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, @@ -3098,7 +3185,7 @@ class DatasetResampleReductions: __slots__ = () def count( - self: DatasetReduce, + self: DatasetGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, @@ -3163,7 +3250,11 @@ def count( da (time) int64 1 3 1 """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="count", dim=dim, @@ -3183,7 +3274,7 @@ def count( ) def all( - self: DatasetReduce, + self: DatasetGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, @@ -3248,7 +3339,11 @@ def all( da (time) bool True True False """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="all", dim=dim, @@ -3268,7 +3363,7 @@ def all( ) def any( - self: DatasetReduce, + self: DatasetGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, @@ -3333,7 +3428,11 @@ def any( da (time) bool True True True """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="any", dim=dim, @@ -3353,7 +3452,7 @@ def any( ) def max( - self: DatasetReduce, + self: DatasetGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, @@ -3434,7 +3533,11 @@ def max( da (time) float64 1.0 3.0 nan """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="max", dim=dim, @@ -3456,7 +3559,7 @@ def max( ) def min( - self: DatasetReduce, + self: DatasetGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, @@ -3537,7 +3640,11 @@ def min( da (time) float64 1.0 1.0 nan """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="min", dim=dim, @@ -3559,7 +3666,7 @@ def min( ) def mean( - self: DatasetReduce, + self: DatasetGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, @@ -3644,7 +3751,11 @@ def mean( da (time) float64 1.0 2.0 nan """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="mean", dim=dim, @@ -3666,7 +3777,7 @@ def mean( ) def prod( - self: DatasetReduce, + self: DatasetGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, @@ -3768,7 +3879,11 @@ def prod( da (time) float64 nan 6.0 nan """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="prod", dim=dim, @@ -3792,7 +3907,7 @@ def prod( ) def sum( - self: DatasetReduce, + self: DatasetGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, @@ -3894,7 +4009,11 @@ def sum( da (time) float64 nan 6.0 nan """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="sum", dim=dim, @@ -3918,7 +4037,7 @@ def sum( ) def std( - self: DatasetReduce, + self: DatasetGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, @@ -4017,7 +4136,11 @@ def std( da (time) float64 nan 1.0 nan """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="std", dim=dim, @@ -4041,7 +4164,7 @@ def std( ) def var( - self: DatasetReduce, + self: DatasetGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, @@ -4140,7 +4263,11 @@ def var( da (time) float64 nan 1.0 nan """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="var", dim=dim, @@ -4164,7 +4291,7 @@ def var( ) def median( - self: DatasetReduce, + self: DatasetGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, @@ -4262,7 +4389,7 @@ class DataArrayGroupByReductions: __slots__ = () def count( - self: DataArrayReduce, + self: DataArrayGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, @@ -4322,7 +4449,11 @@ def count( * labels (labels) object 'a' 'b' 'c' """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="count", dim=dim, @@ -4340,7 +4471,7 @@ def count( ) def all( - self: DataArrayReduce, + self: DataArrayGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, @@ -4400,7 +4531,11 @@ def all( * labels (labels) object 'a' 'b' 'c' """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="all", dim=dim, @@ -4418,7 +4553,7 @@ def all( ) def any( - self: DataArrayReduce, + self: DataArrayGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, @@ -4478,7 +4613,11 @@ def any( * labels (labels) object 'a' 'b' 'c' """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="any", dim=dim, @@ -4496,7 +4635,7 @@ def any( ) def max( - self: DataArrayReduce, + self: DataArrayGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, @@ -4570,7 +4709,11 @@ def max( * labels (labels) object 'a' 'b' 'c' """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="max", dim=dim, @@ -4590,7 +4733,7 @@ def max( ) def min( - self: DataArrayReduce, + self: DataArrayGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, @@ -4664,7 +4807,11 @@ def min( * labels (labels) object 'a' 'b' 'c' """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="min", dim=dim, @@ -4684,7 +4831,7 @@ def min( ) def mean( - self: DataArrayReduce, + self: DataArrayGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, @@ -4762,7 +4909,11 @@ def mean( * labels (labels) object 'a' 'b' 'c' """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="mean", dim=dim, @@ -4782,7 +4933,7 @@ def mean( ) def prod( - self: DataArrayReduce, + self: DataArrayGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, @@ -4875,7 +5026,11 @@ def prod( * labels (labels) object 'a' 'b' 'c' """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="prod", dim=dim, @@ -4897,7 +5052,7 @@ def prod( ) def sum( - self: DataArrayReduce, + self: DataArrayGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, @@ -4990,7 +5145,11 @@ def sum( * labels (labels) object 'a' 'b' 'c' """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="sum", dim=dim, @@ -5012,7 +5171,7 @@ def sum( ) def std( - self: DataArrayReduce, + self: DataArrayGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, @@ -5102,7 +5261,11 @@ def std( * labels (labels) object 'a' 'b' 'c' """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="std", dim=dim, @@ -5124,7 +5287,7 @@ def std( ) def var( - self: DataArrayReduce, + self: DataArrayGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, @@ -5214,7 +5377,11 @@ def var( * labels (labels) object 'a' 'b' 'c' """ - if dask_groupby and OPTIONS["use_numpy_groupies"]: + if ( + dask_groupby + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): return self._dask_groupby_reduce( func="var", dim=dim, @@ -5236,7 +5403,7 @@ def var( ) def median( - self: DataArrayReduce, + self: DataArrayGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, @@ -5326,7 +5493,7 @@ class DataArrayResampleReductions: __slots__ = () def count( - self: DataArrayReduce, + self: DataArrayGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, @@ -5393,7 +5560,7 @@ def count( ) def all( - self: DataArrayReduce, + self: DataArrayGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, @@ -5460,7 +5627,7 @@ def all( ) def any( - self: DataArrayReduce, + self: DataArrayGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, @@ -5527,7 +5694,7 @@ def any( ) def max( - self: DataArrayReduce, + self: DataArrayGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, @@ -5609,7 +5776,7 @@ def max( ) def min( - self: DataArrayReduce, + self: DataArrayGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, @@ -5691,7 +5858,7 @@ def min( ) def mean( - self: DataArrayReduce, + self: DataArrayGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, @@ -5777,7 +5944,7 @@ def mean( ) def prod( - self: DataArrayReduce, + self: DataArrayGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, @@ -5879,7 +6046,7 @@ def prod( ) def sum( - self: DataArrayReduce, + self: DataArrayGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, @@ -5981,7 +6148,7 @@ def sum( ) def std( - self: DataArrayReduce, + self: DataArrayGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, @@ -6080,7 +6247,7 @@ def std( ) def var( - self: DataArrayReduce, + self: DataArrayGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, @@ -6179,7 +6346,7 @@ def var( ) def median( - self: DataArrayReduce, + self: DataArrayGroupByReduce, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 6d5db3d1132..7e3ddcbfc15 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -21,7 +21,7 @@ # This file was generated using xarray.util.generate_reductions. Do not edit manually. import sys -from typing import Any, Callable, Hashable, Optional, Sequence, Union +from typing import Any, Callable, Hashable, Mapping, Optional, Sequence, Union from . import duck_array_ops from .options import OPTIONS @@ -50,6 +50,29 @@ def reduce( keep_attrs: bool = None, keepdims: bool = False, **kwargs: Any, + ) -> T_{obj}: + ... + + +class {obj}GroupByReduce(Protocol): + _obj: T_{obj} + _dask_groupby_kwargs: Mapping + + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> T_{obj}: + ... + + def _dask_groupby_reduce( + self, + dim: Union[None, Hashable, Sequence[Hashable]], + **kwargs, ) -> T_{obj}: ...""" @@ -61,7 +84,7 @@ class {obj}{cls}Reductions: TEMPLATE_REDUCTION_SIGNATURE = ''' def {method}( - self: {obj}Reduce, + self: {self_type}, dim: Union[None, Hashable, Sequence[Hashable]] = None,{extra_kwargs} keep_attrs: bool = None, **kwargs, @@ -190,6 +213,7 @@ def __init__( self, cls, datastructure, + self_type, methods, docref, docref_description, @@ -197,6 +221,7 @@ def __init__( see_also_obj=None, ): self.datastructure = datastructure + self.self_type = self_type self.cls = cls self.methods = methods self.docref = docref @@ -226,6 +251,7 @@ def generate_method(self, method): yield TEMPLATE_REDUCTION_SIGNATURE.format( **template_kwargs, extra_kwargs=extra_kwargs, + self_type=self.self_type, ) for text in [ @@ -396,6 +422,7 @@ class DataStructure: docref_description="reduction or aggregation operations", example_call_preamble="", see_also_obj="DataArray", + self_type="DatasetReduce", ) DataArrayGenerator = GenericReductionGenerator( cls="", @@ -405,6 +432,7 @@ class DataStructure: docref_description="reduction or aggregation operations", example_call_preamble="", see_also_obj="Dataset", + self_type="DataArrayReduce", ) DataArrayGroupByGenerator = GroupByReductionGenerator( @@ -414,6 +442,7 @@ class DataStructure: docref="groupby", docref_description="groupby operations", example_call_preamble='.groupby("labels")', + self_type="DataArrayGroupByReduce", ) DataArrayResampleGenerator = GenericReductionGenerator( cls="Resample", @@ -422,6 +451,7 @@ class DataStructure: docref="resampling", docref_description="resampling operations", example_call_preamble='.resample(time="3M")', + self_type="DataArrayGroupByReduce", ) DatasetGroupByGenerator = GroupByReductionGenerator( cls="GroupBy", @@ -430,6 +460,7 @@ class DataStructure: docref="groupby", docref_description="groupby operations", example_call_preamble='.groupby("labels")', + self_type="DatasetGroupByReduce", ) DatasetResampleGenerator = GroupByReductionGenerator( cls="Resample", @@ -438,6 +469,7 @@ class DataStructure: docref="resampling", docref_description="resampling operations", example_call_preamble='.resample(time="3M")', + self_type="DatasetGroupByReduce", ) From 415eb294af77b36f481f3c934baf617d1f55bb1e Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 12 Nov 2021 15:47:10 -0700 Subject: [PATCH 058/138] Fix bug when binning by nD variable. --- xarray/core/groupby.py | 6 ++++-- xarray/tests/test_groupby.py | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 16590132204..bbf23f9109a 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -264,7 +264,6 @@ class GroupBy: # Save unstacked object for dask_groupby "_original_obj", "_unstacked_group", - "_bins", ) def __init__( @@ -349,7 +348,10 @@ def __init__( new_dim_name = group.name + "_bins" group = DataArray(binned, group.coords, name=new_dim_name) full_index = binned.categories - self._unstacked_group = group + if stacked_dim is not None: + self._unstacked_group = group.unstack(stacked_dim) + else: + self._unstacked_group = group if grouper is not None: index = safe_cast_to_index(group) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 2d9ce04d16d..257dbb988bf 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1202,6 +1202,25 @@ def test_groupby_bins_multidim(self): actual = array.groupby_bins("lat", bins).map(lambda x: x.sum()) assert_identical(expected, actual) + bins = [-2, -1, 0, 1, 2] + field = DataArray(np.ones((5, 12)), dims=("x", "y")) + by = DataArray(np.random.randn(5, 12), dims=("x", "y")) + actual = field.groupby_bins(by, bins=bins).mean() + + bincoord = np.array( + [ + pd.Interval(left, right, closed="right") + for left, right in zip(bins[:-1], bins[1:]) + ], + dtype=np.object, + ) + expected = DataArray( + np.ones((4,)), + dims="group_bins", + coords={"group_bins": bincoord}, + ) + assert_identical(actual, expected) + def test_groupby_bins_sort(self): data = xr.DataArray( np.arange(100), dims="x", coords={"x": np.linspace(-100, 100, num=100)} From edbd376c792f54ade6a9447674b15a79f44c983b Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 14 Nov 2021 09:57:32 -0700 Subject: [PATCH 059/138] Fix binning and weird issues with precision and pd.cut --- xarray/core/groupby.py | 32 ++++++++++++++++++++++++-------- xarray/tests/test_groupby.py | 16 ++++++++++++---- 2 files changed, 36 insertions(+), 12 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index bbf23f9109a..4b1114b1177 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -264,6 +264,7 @@ class GroupBy: # Save unstacked object for dask_groupby "_original_obj", "_unstacked_group", + "_bins", ) def __init__( @@ -327,6 +328,7 @@ def __init__( self._original_obj = obj self._unstacked_group = group + self._bins = bins group, obj, stacked_dim, inserted_dims = _ensure_1d(group, obj) (group_dim,) = group.dims @@ -344,14 +346,10 @@ def __init__( if bins is not None: if duck_array_ops.isnull(bins).all(): raise ValueError("All bin edges are NaN.") - binned = pd.cut(group.values, bins, **cut_kwargs) + binned, self._bins = pd.cut(group.values, bins, **cut_kwargs, retbins=True) new_dim_name = group.name + "_bins" group = DataArray(binned, group.coords, name=new_dim_name) full_index = binned.categories - if stacked_dim is not None: - self._unstacked_group = group.unstack(stacked_dim) - else: - self._unstacked_group = group if grouper is not None: index = safe_cast_to_index(group) @@ -577,16 +575,34 @@ def _dask_groupby_reduce(self, dim, **kwargs): else: group = self._unstacked_group - # TODO: Properly deal with bins here. + # TODO: handle bins=N in dask_groupby + if self._bins is not None: + expected_groups = (self._bins,) + isbin = (True,) + else: + expected_groups = (self._unique_coord.values,) + isbin = False + result = xarray_reduce( self._original_obj, group, dim=dim, - expected_groups=(self._unique_coord.values,), + expected_groups=expected_groups, + isbin=isbin, **kwargs, ) - result = self._maybe_restore_empty_groups(result) + if self._bins is not None: + # bins provided to dask_groupby are at full precision + # the bin edge labels a default precision of 3 + # reassign to fix that. + new_coord = [ + pd.Interval(inter.left, inter.right) for inter in self._full_index + ] + result[self._group.name] = new_coord + + # TODO: delete? + # result = self._maybe_restore_empty_groups(result) # TODO: make this cleaner; the renaming happens in DatasetResample.map if self._unique_coord.name == "__resample_dim__": result = result.rename(dict(__resample_dim__=self._group_dim)) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 257dbb988bf..8dd4726f034 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1203,9 +1203,11 @@ def test_groupby_bins_multidim(self): assert_identical(expected, actual) bins = [-2, -1, 0, 1, 2] - field = DataArray(np.ones((5, 12)), dims=("x", "y")) - by = DataArray(np.random.randn(5, 12), dims=("x", "y")) - actual = field.groupby_bins(by, bins=bins).mean() + field = DataArray(np.ones((5, 3)), dims=("x", "y")) + by = DataArray( + np.array([[-1.5, -1.5, 0.5, 1.5, 1.5] * 3]).reshape(5, 3), dims=("x", "y") + ) + actual = field.groupby_bins(by, bins=bins).count() bincoord = np.array( [ @@ -1215,7 +1217,7 @@ def test_groupby_bins_multidim(self): dtype=np.object, ) expected = DataArray( - np.ones((4,)), + np.array([6, 0, 3, 6], dtype=int), dims="group_bins", coords={"group_bins": bincoord}, ) @@ -1228,6 +1230,12 @@ def test_groupby_bins_sort(self): binned_mean = data.groupby_bins("x", bins=11).mean() assert binned_mean.to_index().is_monotonic + with xr.set_options(use_numpy_groupies=True): + actual = data.groupby_bins("x", bins=11).count() + with xr.set_options(use_numpy_groupies=False): + expected = data.groupby_bins("x", bins=11).count() + assert_identical(actual, expected) + def test_groupby_assign_coords(self): array = DataArray([1, 2, 3, 4], {"c": ("x", [0, 0, 1, 1])}, dims="x") From c189eea36b3bb993451c2cb6765be4cd07449b67 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 14 Nov 2021 09:58:40 -0700 Subject: [PATCH 060/138] Fix upsampling with resample (these have "empty groups") --- xarray/core/groupby.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 4b1114b1177..04787924efa 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -601,10 +601,9 @@ def _dask_groupby_reduce(self, dim, **kwargs): ] result[self._group.name] = new_coord - # TODO: delete? - # result = self._maybe_restore_empty_groups(result) - # TODO: make this cleaner; the renaming happens in DatasetResample.map if self._unique_coord.name == "__resample_dim__": + result = self._maybe_restore_empty_groups(result) + # TODO: make this cleaner; the renaming happens in DatasetResample.map result = result.rename(dict(__resample_dim__=self._group_dim)) return result From 43ade8cbdbcd2bf9fc950a6fc5f5d2078b56e86f Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 14 Nov 2021 20:32:06 -0700 Subject: [PATCH 061/138] "blockwise" need not be the best strategy for resample.. --- xarray/core/_reductions.py | 90 ++++++++++-------------------- xarray/core/groupby.py | 2 - xarray/core/resample.py | 2 - xarray/util/generate_reductions.py | 3 +- 4 files changed, 31 insertions(+), 66 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index dc9f89439f1..358b912ff9c 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -2057,8 +2057,7 @@ def count( numeric_only=False, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -2146,8 +2145,7 @@ def all( numeric_only=False, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -2235,8 +2233,7 @@ def any( numeric_only=False, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -2341,8 +2338,7 @@ def max( numeric_only=False, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -2448,8 +2444,7 @@ def min( numeric_only=False, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -2559,8 +2554,7 @@ def mean( numeric_only=True, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -2688,8 +2682,7 @@ def prod( numeric_only=True, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -2818,8 +2811,7 @@ def sum( numeric_only=True, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -2945,8 +2937,7 @@ def std( numeric_only=True, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -3072,8 +3063,7 @@ def var( numeric_only=True, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -3261,8 +3251,7 @@ def count( numeric_only=False, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -3350,8 +3339,7 @@ def all( numeric_only=False, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -3439,8 +3427,7 @@ def any( numeric_only=False, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -3545,8 +3532,7 @@ def max( numeric_only=False, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -3652,8 +3638,7 @@ def min( numeric_only=False, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -3763,8 +3748,7 @@ def mean( numeric_only=True, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -3892,8 +3876,7 @@ def prod( numeric_only=True, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -4022,8 +4005,7 @@ def sum( numeric_only=True, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -4149,8 +4131,7 @@ def std( numeric_only=True, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -4276,8 +4257,7 @@ def var( numeric_only=True, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -4459,8 +4439,7 @@ def count( dim=dim, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -4541,8 +4520,7 @@ def all( dim=dim, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -4623,8 +4601,7 @@ def any( dim=dim, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -4720,8 +4697,7 @@ def max( skipna=skipna, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -4818,8 +4794,7 @@ def min( skipna=skipna, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -4920,8 +4895,7 @@ def mean( skipna=skipna, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -5038,8 +5012,7 @@ def prod( min_count=min_count, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -5157,8 +5130,7 @@ def sum( min_count=min_count, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -5273,8 +5245,7 @@ def std( ddof=ddof, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( @@ -5389,8 +5360,7 @@ def var( ddof=ddof, # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 04787924efa..b76e50a9f6a 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -259,7 +259,6 @@ class GroupBy: "_stacked_dim", "_unique_coord", "_dims", - "_dask_groupby_kwargs", "_squeeze", # Save unstacked object for dask_groupby "_original_obj", @@ -406,7 +405,6 @@ def __init__( self._inserted_dims = inserted_dims self._full_index = full_index self._restore_coord_dims = restore_coord_dims - self._dask_groupby_kwargs = {} self._squeeze = squeeze # self._by = by diff --git a/xarray/core/resample.py b/xarray/core/resample.py index 4e1579ca109..6b5b7bd1935 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -175,8 +175,6 @@ def __init__(self, *args, dim=None, resample_dim=None, **kwargs): super().__init__(*args, **kwargs) - self._dask_groupby_kwargs = dict(method="blockwise") - def map(self, func, shortcut=False, args=(), **kwargs): """Apply a function to each array in the group and concatenate them together into a new array. diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 7e3ddcbfc15..93de314297f 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -363,8 +363,7 @@ def generate_code(self, method): dim=dim,{extra_kwargs} # fill_value=fill_value, keep_attrs=keep_attrs, - # TODO: Add dask resampling reduction tests! - **self._dask_groupby_kwargs, + **kwargs, ) else: return self.reduce( From 03b7b31da98842a216801f422151bd4ab130d9cf Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 14 Nov 2021 20:39:48 -0700 Subject: [PATCH 062/138] one more bugfix --- xarray/core/resample.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/core/resample.py b/xarray/core/resample.py index 6b5b7bd1935..e2f599e8b4e 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -263,7 +263,6 @@ def __init__(self, *args, dim=None, resample_dim=None, **kwargs): self._resample_dim = resample_dim super().__init__(*args, **kwargs) - self._dask_groupby_kwargs = dict(method="blockwise") def map(self, func, args=(), shortcut=None, **kwargs): """Apply a function over each Dataset in the groups generated for From e038cc7a27449b693e19b2bac1003fb368466221 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 15 Nov 2021 10:10:53 -0700 Subject: [PATCH 063/138] Fix dimension order when binning a dimension coordinate --- xarray/core/groupby.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index b76e50a9f6a..3a171e3a409 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -536,6 +536,8 @@ def _maybe_unstack(self, obj): def _dask_groupby_reduce(self, dim, **kwargs): from dask_groupby.xarray import xarray_reduce + from .dataset import Dataset + # TODO: fix this kwargs.pop("numeric_only", None) @@ -598,6 +600,11 @@ def _dask_groupby_reduce(self, dim, **kwargs): pd.Interval(inter.left, inter.right) for inter in self._full_index ] result[self._group.name] = new_coord + # Fix dimension order when binning a dimension coordinate + # Needed as long as we do a separate code path for pint; + # For some reason Datasets and DataArrays behave differently! + if isinstance(self._obj, Dataset) and self._group_dim in self._obj.dims: + result = result.transpose(self._group.name, ...) if self._unique_coord.name == "__resample_dim__": result = self._maybe_restore_empty_groups(result) From 1f370f65f394256b16841d812557e8867ae43cbe Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 15 Nov 2021 14:30:03 -0700 Subject: [PATCH 064/138] silence warning --- xarray/tests/test_groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 8dd4726f034..bacdd3e1b8c 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1214,7 +1214,7 @@ def test_groupby_bins_multidim(self): pd.Interval(left, right, closed="right") for left, right in zip(bins[:-1], bins[1:]) ], - dtype=np.object, + dtype=object, ) expected = DataArray( np.array([6, 0, 3, 6], dtype=int), From 7375dd4d6c6f8fafb771e99e0c9f27c79aad0d9b Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 15 Nov 2021 14:54:23 -0700 Subject: [PATCH 065/138] fix test. --- xarray/core/groupby.py | 9 +++++++++ xarray/tests/test_groupby.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 3a171e3a409..3d438048e2f 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -579,6 +579,15 @@ def _dask_groupby_reduce(self, dim, **kwargs): if self._bins is not None: expected_groups = (self._bins,) isbin = (True,) + # This is an annoying hack. Xarray returns np.nan + # when there are no observations in a bin, instead of 0. + # We can fake that here by forcing min_count=1. + if kwargs["func"] == "count": + if "fill_value" not in kwargs or kwargs["fill_value"] is None: + kwargs["fill_value"] = np.nan + # note min_count makes no sense in the xarray world + # as a kwarg for count, so this should be OK + kwargs["min_count"] = 1 else: expected_groups = (self._unique_coord.values,) isbin = False diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index bacdd3e1b8c..b284808e0dd 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1217,7 +1217,7 @@ def test_groupby_bins_multidim(self): dtype=object, ) expected = DataArray( - np.array([6, 0, 3, 6], dtype=int), + np.array([6, np.nan, 3, 6]), dims="group_bins", coords={"group_bins": bincoord}, ) From 860f7be3cc8af1cfdc586dc6d466399ed56e4d65 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 15 Nov 2021 15:22:02 -0700 Subject: [PATCH 066/138] add extra test --- xarray/tests/test_groupby.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index b284808e0dd..f188c45698f 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1177,6 +1177,14 @@ def test_groupby_bins(self): # make sure original array dims are unchanged assert len(array.dim_0) == 4 + da = xr.DataArray(np.ones((2, 3, 4))) + bins = [-1, 0, 1, 2] + with xr.set_options(use_numpy_groupies=False): + actual = da.groupby_bins("dim_0", bins).mean(...) + with xr.set_options(use_numpy_groupies=True): + expected = da.groupby_bins("dim_0", bins).mean(...) + assert_allclose(actual, expected) + def test_groupby_bins_empty(self): array = DataArray(np.arange(4), [("x", range(4))]) # one of these bins will be empty From ced9034d99e618c70ba397e3aee5f53a2e475fd0 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 15 Nov 2021 15:29:38 -0700 Subject: [PATCH 067/138] Update upstream-dev env --- ci/install-upstream-wheels.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ci/install-upstream-wheels.sh b/ci/install-upstream-wheels.sh index b0b45dd1cb9..63641ecf6d7 100755 --- a/ci/install-upstream-wheels.sh +++ b/ci/install-upstream-wheels.sh @@ -14,6 +14,7 @@ conda uninstall -y --force \ pint \ bottleneck \ sparse \ + dask_groupby \ xarray # to limit the runtime of Upstream CI python -m pip install pytest-timeout @@ -43,4 +44,5 @@ python -m pip install \ git+https://github.com/pydata/bottleneck \ git+https://github.com/pydata/sparse \ git+https://github.com/intake/filesystem_spec \ - git+https://github.com/SciTools/nc-time-axis + git+https://github.com/SciTools/nc-time-axis \ + git+https://github.com/dcherian/dask_groupby From b269439cac3f58be18c02007b53d2cfbddd77cd8 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 15 Nov 2021 17:54:27 -0700 Subject: [PATCH 068/138] [test-upstream] Revert setting npg option in benchmarks --- asv_bench/benchmarks/groupby.py | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 4b08014ac37..62319ce6946 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -22,17 +22,15 @@ def setup(self, *args, **kwargs): def time_init(self, ndim): getattr(self, f"ds{ndim}d").groupby("b") - @parameterized(["method", "ndim", "npg"], [("sum", "mean"), (1, 2), [True, False]]) - def time_agg_small_num_groups(self, method, ndim, npg): + @parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)]) + def time_agg_small_num_groups(self, method, ndim): ds = getattr(self, f"ds{ndim}d") - with xr.set_options(use_numpy_groupies=npg): - getattr(ds.groupby("a"), method)() + getattr(ds.groupby("a"), method)() - @parameterized(["method", "ndim", "npg"], [("sum", "mean"), (1, 2), [True, False]]) - def time_agg_large_num_groups(self, method, ndim, npg): + @parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)]) + def time_agg_large_num_groups(self, method, ndim): ds = getattr(self, f"ds{ndim}d") - with xr.set_options(use_numpy_groupies=npg): - getattr(ds.groupby("b"), method)() + getattr(ds.groupby("b"), method)() class GroupByDask(GroupBy): @@ -82,17 +80,15 @@ def setup(self, *args, **kwargs): def time_init(self, ndim): getattr(self, f"ds{ndim}d").resample(time="D") - @parameterized(["method", "ndim", "npg"], [("sum", "mean"), (1, 2), [True, False]]) - def time_agg_small_num_groups(self, method, ndim, npg): + @parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)]) + def time_agg_small_num_groups(self, method, ndim): ds = getattr(self, f"ds{ndim}d") - with xr.set_options(use_numpy_groupies=npg): - getattr(ds.resample(time="3M"), method)() + getattr(ds.resample(time="3M"), method)() - @parameterized(["method", "ndim", "npg"], [("sum", "mean"), (1, 2), [True, False]]) - def time_agg_large_num_groups(self, method, ndim, npg): + @parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)]) + def time_agg_large_num_groups(self, method, ndim): ds = getattr(self, f"ds{ndim}d") - with xr.set_options(use_numpy_groupies=npg): - getattr(ds.resample(time="48H"), method)() + getattr(ds.resample(time="48H"), method)() class ResampleDask(Resample): From cc8abfe28a426afe56d289307b7a9818d52c7d40 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 16 Nov 2021 10:13:05 -0700 Subject: [PATCH 069/138] [test-upstream] Rename to flox --- asv_bench/asv.conf.json | 2 +- ci/install-upstream-wheels.sh | 4 +- ci/requirements/environment-windows.yml | 2 +- ci/requirements/environment.yml | 2 +- setup.cfg | 2 +- xarray/core/_reductions.py | 132 ++++++++++++------------ xarray/core/groupby.py | 6 +- xarray/core/options.py | 2 +- xarray/util/generate_reductions.py | 15 ++- 9 files changed, 82 insertions(+), 85 deletions(-) diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index 9eb81b2c166..0d9ce0d51a3 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -65,7 +65,7 @@ "bottleneck": ["", null], "dask": [""], "distributed": [""], - "dask_groupby": [""], + "flox": [""], "numpy_groupies": [""], }, diff --git a/ci/install-upstream-wheels.sh b/ci/install-upstream-wheels.sh index 63641ecf6d7..0f41e20b0af 100755 --- a/ci/install-upstream-wheels.sh +++ b/ci/install-upstream-wheels.sh @@ -14,7 +14,7 @@ conda uninstall -y --force \ pint \ bottleneck \ sparse \ - dask_groupby \ + flox \ xarray # to limit the runtime of Upstream CI python -m pip install pytest-timeout @@ -45,4 +45,4 @@ python -m pip install \ git+https://github.com/pydata/sparse \ git+https://github.com/intake/filesystem_spec \ git+https://github.com/SciTools/nc-time-axis \ - git+https://github.com/dcherian/dask_groupby + git+https://github.com/dcherian/flox diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index 8dd38f9fda3..56da5db4249 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -44,4 +44,4 @@ dependencies: - zarr - pip: - numbagg - - git+https://github.com/dcherian/dask_groupby.git + - git+https://github.com/dcherian/flox.git diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index f89be56395e..0988dd22ee2 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -48,4 +48,4 @@ dependencies: - zarr - pip: - numbagg - - git+https://github.com/dcherian/dask_groupby.git + - git+https://github.com/dcherian/flox.git diff --git a/setup.cfg b/setup.cfg index f380892b204..e5df55451fa 100644 --- a/setup.cfg +++ b/setup.cfg @@ -101,7 +101,7 @@ accel = bottleneck numbagg numpy_groupies - dask_groupby + flox parallel = dask[complete] diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 358b912ff9c..db403f71cac 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -2,7 +2,7 @@ # This file was generated using xarray.util.generate_reductions. Do not edit manually. import sys -from typing import Any, Callable, Hashable, Mapping, Optional, Sequence, Union +from typing import Any, Callable, Hashable, Optional, Sequence, Union from . import duck_array_ops from .options import OPTIONS @@ -16,9 +16,9 @@ try: - import dask_groupby + import flox except ImportError: - dask_groupby = None + flox = None class DatasetReduce(Protocol): @@ -36,7 +36,6 @@ def reduce( class DatasetGroupByReduce(Protocol): _obj: T_Dataset - _dask_groupby_kwargs: Mapping def reduce( self, @@ -49,7 +48,7 @@ def reduce( ) -> T_Dataset: ... - def _dask_groupby_reduce( + def _flox_reduce( self, dim: Union[None, Hashable, Sequence[Hashable]], **kwargs, @@ -72,7 +71,6 @@ def reduce( class DataArrayGroupByReduce(Protocol): _obj: T_DataArray - _dask_groupby_kwargs: Mapping def reduce( self, @@ -85,7 +83,7 @@ def reduce( ) -> T_DataArray: ... - def _dask_groupby_reduce( + def _flox_reduce( self, dim: Union[None, Hashable, Sequence[Hashable]], **kwargs, @@ -2047,11 +2045,11 @@ def count( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="count", dim=dim, numeric_only=False, @@ -2135,11 +2133,11 @@ def all( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="all", dim=dim, numeric_only=False, @@ -2223,11 +2221,11 @@ def any( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="any", dim=dim, numeric_only=False, @@ -2327,11 +2325,11 @@ def max( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="max", dim=dim, skipna=skipna, @@ -2433,11 +2431,11 @@ def min( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="min", dim=dim, skipna=skipna, @@ -2543,11 +2541,11 @@ def mean( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="mean", dim=dim, skipna=skipna, @@ -2670,11 +2668,11 @@ def prod( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="prod", dim=dim, skipna=skipna, @@ -2799,11 +2797,11 @@ def sum( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="sum", dim=dim, skipna=skipna, @@ -2925,11 +2923,11 @@ def std( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="std", dim=dim, skipna=skipna, @@ -3051,11 +3049,11 @@ def var( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="var", dim=dim, skipna=skipna, @@ -3241,11 +3239,11 @@ def count( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="count", dim=dim, numeric_only=False, @@ -3329,11 +3327,11 @@ def all( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="all", dim=dim, numeric_only=False, @@ -3417,11 +3415,11 @@ def any( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="any", dim=dim, numeric_only=False, @@ -3521,11 +3519,11 @@ def max( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="max", dim=dim, skipna=skipna, @@ -3627,11 +3625,11 @@ def min( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="min", dim=dim, skipna=skipna, @@ -3737,11 +3735,11 @@ def mean( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="mean", dim=dim, skipna=skipna, @@ -3864,11 +3862,11 @@ def prod( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="prod", dim=dim, skipna=skipna, @@ -3993,11 +3991,11 @@ def sum( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="sum", dim=dim, skipna=skipna, @@ -4119,11 +4117,11 @@ def std( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="std", dim=dim, skipna=skipna, @@ -4245,11 +4243,11 @@ def var( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="var", dim=dim, skipna=skipna, @@ -4430,11 +4428,11 @@ def count( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="count", dim=dim, # fill_value=fill_value, @@ -4511,11 +4509,11 @@ def all( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="all", dim=dim, # fill_value=fill_value, @@ -4592,11 +4590,11 @@ def any( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="any", dim=dim, # fill_value=fill_value, @@ -4687,11 +4685,11 @@ def max( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="max", dim=dim, skipna=skipna, @@ -4784,11 +4782,11 @@ def min( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="min", dim=dim, skipna=skipna, @@ -4885,11 +4883,11 @@ def mean( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="mean", dim=dim, skipna=skipna, @@ -5001,11 +4999,11 @@ def prod( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="prod", dim=dim, skipna=skipna, @@ -5119,11 +5117,11 @@ def sum( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="sum", dim=dim, skipna=skipna, @@ -5234,11 +5232,11 @@ def std( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="std", dim=dim, skipna=skipna, @@ -5349,11 +5347,11 @@ def var( """ if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="var", dim=dim, skipna=skipna, diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 3d438048e2f..80b6360c0dc 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -260,7 +260,7 @@ class GroupBy: "_unique_coord", "_dims", "_squeeze", - # Save unstacked object for dask_groupby + # Save unstacked object for flox "_original_obj", "_unstacked_group", "_bins", @@ -533,8 +533,8 @@ def _maybe_unstack(self, obj): obj._indexes = propagate_indexes(obj._indexes, exclude=self._inserted_dims) return obj - def _dask_groupby_reduce(self, dim, **kwargs): - from dask_groupby.xarray import xarray_reduce + def _flox_reduce(self, dim, **kwargs): + from flox.xarray import xarray_reduce from .dataset import Dataset diff --git a/xarray/core/options.py b/xarray/core/options.py index df640d16025..20ec34e764e 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -187,7 +187,7 @@ class set_options: Whether to use ``bottleneck`` to accelerate 1D reductions and 1D rolling reduction operations. use_numpy_groupies : bool, default: True - Whether to use ``numpy_groupies`` and ``dask_groupby`` to + Whether to use ``numpy_groupies`` and ``flox`` to accelerate groupby and resampling reductions. warn_for_unclosed_files : bool, default: False Whether or not to issue a warning when unclosed files are diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 93de314297f..ab4f3acea7f 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -21,7 +21,7 @@ # This file was generated using xarray.util.generate_reductions. Do not edit manually. import sys -from typing import Any, Callable, Hashable, Mapping, Optional, Sequence, Union +from typing import Any, Callable, Hashable, Optional, Sequence, Union from . import duck_array_ops from .options import OPTIONS @@ -35,9 +35,9 @@ try: - import dask_groupby + import flox except ImportError: - dask_groupby = None''' + flox = None''' OBJ_PREAMBLE = """ @@ -56,7 +56,6 @@ def reduce( class {obj}GroupByReduce(Protocol): _obj: T_{obj} - _dask_groupby_kwargs: Mapping def reduce( self, @@ -69,7 +68,7 @@ def reduce( ) -> T_{obj}: ... - def _dask_groupby_reduce( + def _flox_reduce( self, dim: Union[None, Hashable, Sequence[Hashable]], **kwargs, @@ -332,7 +331,7 @@ def generate_code(self, method): if self.datastructure.numeric_only: extra_kwargs.append(f"numeric_only={method.numeric_only},") - # numpy_groupies & dask_groupby do not support median + # numpy_groupies & flox do not support median if method.name == "median": indent = 12 else: @@ -354,11 +353,11 @@ def generate_code(self, method): else: return f""" if ( - dask_groupby + flox and OPTIONS["use_numpy_groupies"] and contains_only_dask_or_numpy(self._obj) ): - return self._dask_groupby_reduce( + return self._flox_reduce( func="{method.name}", dim=dim,{extra_kwargs} # fill_value=fill_value, From 033f5b590fb4df4acadff70515fd3c14b0f10935 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 17 Nov 2021 20:24:10 -0700 Subject: [PATCH 070/138] Add to print_versions --- xarray/util/print_versions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xarray/util/print_versions.py b/xarray/util/print_versions.py index 561126ea05f..b8689e3a18f 100755 --- a/xarray/util/print_versions.py +++ b/xarray/util/print_versions.py @@ -122,6 +122,8 @@ def show_versions(file=sys.stdout): ("cupy", lambda mod: mod.__version__), ("pint", lambda mod: mod.__version__), ("sparse", lambda mod: mod.__version__), + ("flox", lambda mod: mod.__version__), + ("numpy_groupies", lambda mod: mod.__version__), # xarray setup/test ("setuptools", lambda mod: mod.__version__), ("pip", lambda mod: mod.__version__), From bd24db4881646aba2c0466e93d6989c187aaae98 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 17 Nov 2021 20:24:48 -0700 Subject: [PATCH 071/138] Add to all-but-dask --- ci/requirements/py38-all-but-dask.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci/requirements/py38-all-but-dask.yml b/ci/requirements/py38-all-but-dask.yml index 688dfb7a2bc..4654e21d3a9 100644 --- a/ci/requirements/py38-all-but-dask.yml +++ b/ci/requirements/py38-all-but-dask.yml @@ -13,6 +13,7 @@ dependencies: - cfgrib - cftime - coveralls + - flox - h5netcdf - h5py - hdf5 @@ -23,6 +24,7 @@ dependencies: - netcdf4 - numba - numpy + - numpy_groupies - pandas - pint - pip From 098467d3406c3de0cb7e2aaf38639f7ea74800ed Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 18 Nov 2021 07:09:17 -0700 Subject: [PATCH 072/138] Force failure to make sure CI is working. --- xarray/core/_reductions.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index db403f71cac..e8754326bb5 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -15,10 +15,7 @@ from typing_extensions import Protocol -try: - import flox -except ImportError: - flox = None +import flox class DatasetReduce(Protocol): From a282ad47362111da4efc637ccdaed01824a2e39c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 18 Nov 2021 14:11:30 +0000 Subject: [PATCH 073/138] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/_reductions.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index e8754326bb5..b1aece73224 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -4,6 +4,8 @@ import sys from typing import Any, Callable, Hashable, Optional, Sequence, Union +import flox + from . import duck_array_ops from .options import OPTIONS from .types import T_DataArray, T_Dataset @@ -15,9 +17,6 @@ from typing_extensions import Protocol -import flox - - class DatasetReduce(Protocol): def reduce( self, From 8f23310e68c1a08c318471cdcd9d9b5b61d236a4 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 19 Nov 2021 14:50:01 -0700 Subject: [PATCH 074/138] Revert "Force failure to make sure CI is working." This reverts commit 098467d3406c3de0cb7e2aaf38639f7ea74800ed. --- xarray/core/_reductions.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index b1aece73224..db403f71cac 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -4,8 +4,6 @@ import sys from typing import Any, Callable, Hashable, Optional, Sequence, Union -import flox - from . import duck_array_ops from .options import OPTIONS from .types import T_DataArray, T_Dataset @@ -17,6 +15,12 @@ from typing_extensions import Protocol +try: + import flox +except ImportError: + flox = None + + class DatasetReduce(Protocol): def reduce( self, From 5dcb5bfebe04302beb732259f3e805bd31691ed8 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 20 Nov 2021 12:30:50 +0100 Subject: [PATCH 075/138] Attempt fixing typing errors Mixing in DatasetReduce fixes: xarray/tests/test_groupby.py:460: error: Invalid self argument "Dataset" to attribute function "mean" with type "Callable[[DatasetReduce, Optional[Hashable], Optional[bool], Optional[bool], KwArg(Any)], T_Dataset]" [misc] Switching to "Dateset" as returned type fixes: xarray/tests/test_groupby.py:77: error: Need type annotation for "expected" [var-annotated] --- xarray/util/generate_reductions.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 2ad65bab08b..ac3a1b500a0 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -21,15 +21,18 @@ # This file was generated using xarray.util.generate_reductions. Do not edit manually. import sys -from typing import Any, Callable, Hashable, Optional, Sequence, Union +from typing import TYPE_CHECKING, Any, Callable, Hashable, Optional, Sequence, Union from . import duck_array_ops -from .types import T_DataArray, T_Dataset if sys.version_info >= (3, 8): from typing import Protocol else: - from typing_extensions import Protocol''' + from typing_extensions import Protocol + +if TYPE_CHECKING: + from .dataset import Dataset + from.dataarray import DataArray''' OBJ_PREAMBLE = """ @@ -42,22 +45,22 @@ def reduce( keep_attrs: bool = None, keepdims: bool = False, **kwargs: Any, - ) -> T_{obj}: + ) -> "{obj}": ...""" CLASS_PREAMBLE = """ -class {obj}{cls}Reductions: +class {obj}{cls}Reductions({obj}Reduce): __slots__ = ()""" TEMPLATE_REDUCTION_SIGNATURE = ''' def {method}( - self: {obj}Reduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None,{extra_kwargs} keep_attrs: bool = None, **kwargs, - ) -> T_{obj}: + ) -> "{obj}": """ Reduce this {obj}'s data by applying ``{method}`` along some dimension(s). From 411d75d5ced18349968c060918e9bdcd4be04537 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 20 Nov 2021 18:00:08 +0100 Subject: [PATCH 076/138] Now get normal code running as well Protocols are not needed anymore when subclassing/defining directly in the class. When adding a dummy method in DatasetResampleReductions the order of subclassing had to be changed so the correct reduce was used. --- xarray/core/_reductions.py | 1347 ++++------------------------ xarray/core/resample.py | 4 +- xarray/util/generate_reductions.py | 25 +- 3 files changed, 204 insertions(+), 1172 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 006f4e35a09..e17b9dbd6b8 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -1,32 +1,18 @@ """Mixin classes with reduction operations.""" # This file was generated using xarray.util.generate_reductions. Do not edit manually. -import sys -from typing import Any, Callable, Hashable, Optional, Sequence, Union +from typing import TYPE_CHECKING, Any, Callable, Hashable, Optional, Sequence, Union from . import duck_array_ops -from .types import T_DataArray, T_Dataset -if sys.version_info >= (3, 8): - from typing import Protocol -else: - from typing_extensions import Protocol +if TYPE_CHECKING: + from .dataset import Dataset + from .dataarray import DataArray -class DatasetReduce(Protocol): - def reduce( - self, - func: Callable[..., Any], - dim: Union[None, Hashable, Sequence[Hashable]] = None, - axis: Union[None, int, Sequence[int]] = None, - keep_attrs: bool = None, - keepdims: bool = False, - **kwargs: Any, - ) -> T_Dataset: - ... - +class DatasetReductions: + __slots__ = () -class DataArrayReduce(Protocol): def reduce( self, func: Callable[..., Any], @@ -35,19 +21,15 @@ def reduce( keep_attrs: bool = None, keepdims: bool = False, **kwargs: Any, - ) -> T_DataArray: - ... - - -class DatasetReductions: - __slots__ = () + ) -> "Dataset": + return NotImplemented def count( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``count`` along some dimension(s). @@ -91,19 +73,8 @@ def count( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.count() - - Dimensions: () - Data variables: - da int64 5 """ return self.reduce( duck_array_ops.count, @@ -114,11 +85,11 @@ def count( ) def all( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``all`` along some dimension(s). @@ -162,19 +133,8 @@ def all( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.all() - - Dimensions: () - Data variables: - da bool False """ return self.reduce( duck_array_ops.array_all, @@ -185,11 +145,11 @@ def all( ) def any( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``any`` along some dimension(s). @@ -233,19 +193,8 @@ def any( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.any() - - Dimensions: () - Data variables: - da bool True """ return self.reduce( duck_array_ops.array_any, @@ -256,12 +205,12 @@ def any( ) def max( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``max`` along some dimension(s). @@ -310,27 +259,12 @@ def max( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.max() - - Dimensions: () - Data variables: - da float64 3.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.max(skipna=False) - - Dimensions: () - Data variables: - da float64 nan """ return self.reduce( duck_array_ops.max, @@ -342,12 +276,12 @@ def max( ) def min( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``min`` along some dimension(s). @@ -396,27 +330,12 @@ def min( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.min() - - Dimensions: () - Data variables: - da float64 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.min(skipna=False) - - Dimensions: () - Data variables: - da float64 nan """ return self.reduce( duck_array_ops.min, @@ -428,12 +347,12 @@ def min( ) def mean( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``mean`` along some dimension(s). @@ -486,27 +405,12 @@ def mean( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.mean() - - Dimensions: () - Data variables: - da float64 1.8 Use ``skipna`` to control whether NaNs are ignored. >>> ds.mean(skipna=False) - - Dimensions: () - Data variables: - da float64 nan """ return self.reduce( duck_array_ops.mean, @@ -518,13 +422,13 @@ def mean( ) def prod( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``prod`` along some dimension(s). @@ -583,35 +487,16 @@ def prod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.prod() - - Dimensions: () - Data variables: - da float64 12.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.prod(skipna=False) - - Dimensions: () - Data variables: - da float64 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.prod(skipna=True, min_count=2) - - Dimensions: () - Data variables: - da float64 12.0 """ return self.reduce( duck_array_ops.prod, @@ -624,13 +509,13 @@ def prod( ) def sum( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``sum`` along some dimension(s). @@ -689,35 +574,16 @@ def sum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.sum() - - Dimensions: () - Data variables: - da float64 9.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.sum(skipna=False) - - Dimensions: () - Data variables: - da float64 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.sum(skipna=True, min_count=2) - - Dimensions: () - Data variables: - da float64 9.0 """ return self.reduce( duck_array_ops.sum, @@ -730,13 +596,13 @@ def sum( ) def std( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``std`` along some dimension(s). @@ -792,35 +658,16 @@ def std( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.std() - - Dimensions: () - Data variables: - da float64 0.7483 Use ``skipna`` to control whether NaNs are ignored. >>> ds.std(skipna=False) - - Dimensions: () - Data variables: - da float64 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.std(skipna=True, ddof=1) - - Dimensions: () - Data variables: - da float64 0.8367 """ return self.reduce( duck_array_ops.std, @@ -833,13 +680,13 @@ def std( ) def var( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``var`` along some dimension(s). @@ -895,35 +742,16 @@ def var( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.var() - - Dimensions: () - Data variables: - da float64 0.56 Use ``skipna`` to control whether NaNs are ignored. >>> ds.var(skipna=False) - - Dimensions: () - Data variables: - da float64 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.var(skipna=True, ddof=1) - - Dimensions: () - Data variables: - da float64 0.7 """ return self.reduce( duck_array_ops.var, @@ -936,12 +764,12 @@ def var( ) def median( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``median`` along some dimension(s). @@ -994,27 +822,12 @@ def median( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.median() - - Dimensions: () - Data variables: - da float64 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.median(skipna=False) - - Dimensions: () - Data variables: - da float64 nan """ return self.reduce( duck_array_ops.median, @@ -1029,12 +842,23 @@ def median( class DataArrayReductions: __slots__ = () + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> "DataArray": + return NotImplemented + def count( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``count`` along some dimension(s). @@ -1077,15 +901,8 @@ def count( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.count() - - array(5) """ return self.reduce( duck_array_ops.count, @@ -1095,11 +912,11 @@ def count( ) def all( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``all`` along some dimension(s). @@ -1142,15 +959,8 @@ def all( ... ), ... ) >>> da - - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.all() - - array(False) """ return self.reduce( duck_array_ops.array_all, @@ -1160,11 +970,11 @@ def all( ) def any( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``any`` along some dimension(s). @@ -1207,15 +1017,8 @@ def any( ... ), ... ) >>> da - - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.any() - - array(True) """ return self.reduce( duck_array_ops.array_any, @@ -1225,12 +1028,12 @@ def any( ) def max( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``max`` along some dimension(s). @@ -1278,21 +1081,12 @@ def max( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.max() - - array(3.) Use ``skipna`` to control whether NaNs are ignored. >>> da.max(skipna=False) - - array(nan) """ return self.reduce( duck_array_ops.max, @@ -1303,12 +1097,12 @@ def max( ) def min( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``min`` along some dimension(s). @@ -1356,21 +1150,12 @@ def min( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.min() - - array(1.) Use ``skipna`` to control whether NaNs are ignored. >>> da.min(skipna=False) - - array(nan) """ return self.reduce( duck_array_ops.min, @@ -1381,12 +1166,12 @@ def min( ) def mean( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``mean`` along some dimension(s). @@ -1438,21 +1223,12 @@ def mean( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.mean() - - array(1.8) Use ``skipna`` to control whether NaNs are ignored. >>> da.mean(skipna=False) - - array(nan) """ return self.reduce( duck_array_ops.mean, @@ -1463,13 +1239,13 @@ def mean( ) def prod( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``prod`` along some dimension(s). @@ -1527,27 +1303,16 @@ def prod( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.prod() - - array(12.) Use ``skipna`` to control whether NaNs are ignored. >>> da.prod(skipna=False) - - array(nan) Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.prod(skipna=True, min_count=2) - - array(12.) """ return self.reduce( duck_array_ops.prod, @@ -1559,13 +1324,13 @@ def prod( ) def sum( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``sum`` along some dimension(s). @@ -1623,27 +1388,16 @@ def sum( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.sum() - - array(9.) Use ``skipna`` to control whether NaNs are ignored. >>> da.sum(skipna=False) - - array(nan) Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.sum(skipna=True, min_count=2) - - array(9.) """ return self.reduce( duck_array_ops.sum, @@ -1655,13 +1409,13 @@ def sum( ) def std( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``std`` along some dimension(s). @@ -1716,27 +1470,16 @@ def std( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.std() - - array(0.74833148) Use ``skipna`` to control whether NaNs are ignored. >>> da.std(skipna=False) - - array(nan) Specify ``ddof=1`` for an unbiased estimate. >>> da.std(skipna=True, ddof=1) - - array(0.83666003) """ return self.reduce( duck_array_ops.std, @@ -1748,13 +1491,13 @@ def std( ) def var( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``var`` along some dimension(s). @@ -1809,27 +1552,16 @@ def var( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.var() - - array(0.56) Use ``skipna`` to control whether NaNs are ignored. >>> da.var(skipna=False) - - array(nan) Specify ``ddof=1`` for an unbiased estimate. >>> da.var(skipna=True, ddof=1) - - array(0.7) """ return self.reduce( duck_array_ops.var, @@ -1841,12 +1573,12 @@ def var( ) def median( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``median`` along some dimension(s). @@ -1898,21 +1630,12 @@ def median( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.median() - - array(2.) Use ``skipna`` to control whether NaNs are ignored. >>> da.median(skipna=False) - - array(nan) """ return self.reduce( duck_array_ops.median, @@ -1926,12 +1649,23 @@ def median( class DatasetGroupByReductions: __slots__ = () + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> "Dataset": + return NotImplemented + def count( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``count`` along some dimension(s). @@ -1975,21 +1709,8 @@ def count( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").count() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) int64 1 2 2 """ return self.reduce( duck_array_ops.count, @@ -2000,11 +1721,11 @@ def count( ) def all( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``all`` along some dimension(s). @@ -2048,21 +1769,8 @@ def all( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").all() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) bool False True True """ return self.reduce( duck_array_ops.array_all, @@ -2073,11 +1781,11 @@ def all( ) def any( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``any`` along some dimension(s). @@ -2121,21 +1829,8 @@ def any( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").any() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) bool True True True """ return self.reduce( duck_array_ops.array_any, @@ -2146,12 +1841,12 @@ def any( ) def max( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``max`` along some dimension(s). @@ -2200,31 +1895,12 @@ def max( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").max() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 1.0 2.0 3.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").max(skipna=False) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 2.0 3.0 """ return self.reduce( duck_array_ops.max, @@ -2236,12 +1912,12 @@ def max( ) def min( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``min`` along some dimension(s). @@ -2290,31 +1966,12 @@ def min( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").min() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 1.0 2.0 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").min(skipna=False) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 2.0 1.0 """ return self.reduce( duck_array_ops.min, @@ -2326,12 +1983,12 @@ def min( ) def mean( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``mean`` along some dimension(s). @@ -2384,31 +2041,12 @@ def mean( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").mean() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").mean(skipna=False) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 2.0 2.0 """ return self.reduce( duck_array_ops.mean, @@ -2420,13 +2058,13 @@ def mean( ) def prod( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``prod`` along some dimension(s). @@ -2485,41 +2123,16 @@ def prod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").prod() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 1.0 4.0 3.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").prod(skipna=False) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 4.0 3.0 Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.groupby("labels").prod(skipna=True, min_count=2) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 4.0 3.0 """ return self.reduce( duck_array_ops.prod, @@ -2532,13 +2145,13 @@ def prod( ) def sum( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``sum`` along some dimension(s). @@ -2597,41 +2210,16 @@ def sum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").sum() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 1.0 4.0 4.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").sum(skipna=False) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 4.0 4.0 Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.groupby("labels").sum(skipna=True, min_count=2) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 4.0 4.0 """ return self.reduce( duck_array_ops.sum, @@ -2644,13 +2232,13 @@ def sum( ) def std( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``std`` along some dimension(s). @@ -2706,41 +2294,16 @@ def std( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").std() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 0.0 0.0 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").std(skipna=False) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 0.0 1.0 Specify ``ddof=1`` for an unbiased estimate. >>> ds.groupby("labels").std(skipna=True, ddof=1) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 0.0 1.414 """ return self.reduce( duck_array_ops.std, @@ -2753,13 +2316,13 @@ def std( ) def var( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``var`` along some dimension(s). @@ -2815,41 +2378,16 @@ def var( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").var() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 0.0 0.0 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").var(skipna=False) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 0.0 1.0 Specify ``ddof=1`` for an unbiased estimate. >>> ds.groupby("labels").var(skipna=True, ddof=1) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 0.0 2.0 """ return self.reduce( duck_array_ops.var, @@ -2862,12 +2400,12 @@ def var( ) def median( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``median`` along some dimension(s). @@ -2920,31 +2458,12 @@ def median( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").median() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").median(skipna=False) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 2.0 2.0 """ return self.reduce( duck_array_ops.median, @@ -2959,12 +2478,23 @@ def median( class DatasetResampleReductions: __slots__ = () + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> "Dataset": + return NotImplemented + def count( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``count`` along some dimension(s). @@ -3008,21 +2538,8 @@ def count( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").count() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) int64 1 3 1 """ return self.reduce( duck_array_ops.count, @@ -3033,11 +2550,11 @@ def count( ) def all( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``all`` along some dimension(s). @@ -3081,21 +2598,8 @@ def all( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").all() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) bool True True False """ return self.reduce( duck_array_ops.array_all, @@ -3106,11 +2610,11 @@ def all( ) def any( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``any`` along some dimension(s). @@ -3154,21 +2658,8 @@ def any( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").any() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) bool True True True """ return self.reduce( duck_array_ops.array_any, @@ -3179,12 +2670,12 @@ def any( ) def max( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``max`` along some dimension(s). @@ -3233,31 +2724,12 @@ def max( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").max() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 3.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").max(skipna=False) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 3.0 nan """ return self.reduce( duck_array_ops.max, @@ -3269,12 +2741,12 @@ def max( ) def min( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``min`` along some dimension(s). @@ -3323,31 +2795,12 @@ def min( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").min() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 1.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").min(skipna=False) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 1.0 nan """ return self.reduce( duck_array_ops.min, @@ -3359,12 +2812,12 @@ def min( ) def mean( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``mean`` along some dimension(s). @@ -3417,31 +2870,12 @@ def mean( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").mean() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").mean(skipna=False) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 2.0 nan """ return self.reduce( duck_array_ops.mean, @@ -3453,13 +2887,13 @@ def mean( ) def prod( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``prod`` along some dimension(s). @@ -3518,41 +2952,16 @@ def prod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").prod() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 6.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").prod(skipna=False) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 6.0 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.resample(time="3M").prod(skipna=True, min_count=2) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 nan 6.0 nan """ return self.reduce( duck_array_ops.prod, @@ -3565,13 +2974,13 @@ def prod( ) def sum( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``sum`` along some dimension(s). @@ -3630,41 +3039,16 @@ def sum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").sum() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 6.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").sum(skipna=False) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 6.0 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.resample(time="3M").sum(skipna=True, min_count=2) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 nan 6.0 nan """ return self.reduce( duck_array_ops.sum, @@ -3677,13 +3061,13 @@ def sum( ) def std( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``std`` along some dimension(s). @@ -3739,41 +3123,16 @@ def std( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").std() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 0.0 0.8165 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").std(skipna=False) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 0.0 0.8165 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.resample(time="3M").std(skipna=True, ddof=1) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 nan 1.0 nan """ return self.reduce( duck_array_ops.std, @@ -3786,13 +3145,13 @@ def std( ) def var( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``var`` along some dimension(s). @@ -3848,41 +3207,16 @@ def var( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").var() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 0.0 0.6667 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").var(skipna=False) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 0.0 0.6667 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.resample(time="3M").var(skipna=True, ddof=1) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 nan 1.0 nan """ return self.reduce( duck_array_ops.var, @@ -3895,12 +3229,12 @@ def var( ) def median( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``median`` along some dimension(s). @@ -3953,31 +3287,12 @@ def median( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").median() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").median(skipna=False) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 2.0 nan """ return self.reduce( duck_array_ops.median, @@ -3992,12 +3307,23 @@ def median( class DataArrayGroupByReductions: __slots__ = () + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> "DataArray": + return NotImplemented + def count( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``count`` along some dimension(s). @@ -4040,17 +3366,8 @@ def count( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").count() - - array([1, 2, 2]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.count, @@ -4060,11 +3377,11 @@ def count( ) def all( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``all`` along some dimension(s). @@ -4107,17 +3424,8 @@ def all( ... ), ... ) >>> da - - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").all() - - array([False, True, True]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.array_all, @@ -4127,11 +3435,11 @@ def all( ) def any( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``any`` along some dimension(s). @@ -4174,17 +3482,8 @@ def any( ... ), ... ) >>> da - - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").any() - - array([ True, True, True]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.array_any, @@ -4194,12 +3493,12 @@ def any( ) def max( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``max`` along some dimension(s). @@ -4247,25 +3546,12 @@ def max( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").max() - - array([1., 2., 3.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").max(skipna=False) - - array([nan, 2., 3.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.max, @@ -4276,12 +3562,12 @@ def max( ) def min( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``min`` along some dimension(s). @@ -4329,25 +3615,12 @@ def min( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").min() - - array([1., 2., 1.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").min(skipna=False) - - array([nan, 2., 1.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.min, @@ -4358,12 +3631,12 @@ def min( ) def mean( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``mean`` along some dimension(s). @@ -4415,25 +3688,12 @@ def mean( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").mean() - - array([1., 2., 2.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").mean(skipna=False) - - array([nan, 2., 2.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.mean, @@ -4444,13 +3704,13 @@ def mean( ) def prod( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``prod`` along some dimension(s). @@ -4508,33 +3768,16 @@ def prod( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").prod() - - array([1., 4., 3.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").prod(skipna=False) - - array([nan, 4., 3.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.groupby("labels").prod(skipna=True, min_count=2) - - array([nan, 4., 3.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.prod, @@ -4546,13 +3789,13 @@ def prod( ) def sum( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``sum`` along some dimension(s). @@ -4610,33 +3853,16 @@ def sum( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").sum() - - array([1., 4., 4.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").sum(skipna=False) - - array([nan, 4., 4.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.groupby("labels").sum(skipna=True, min_count=2) - - array([nan, 4., 4.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.sum, @@ -4648,13 +3874,13 @@ def sum( ) def std( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``std`` along some dimension(s). @@ -4709,33 +3935,16 @@ def std( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").std() - - array([0., 0., 1.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").std(skipna=False) - - array([nan, 0., 1.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Specify ``ddof=1`` for an unbiased estimate. >>> da.groupby("labels").std(skipna=True, ddof=1) - - array([ nan, 0. , 1.41421356]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.std, @@ -4747,13 +3956,13 @@ def std( ) def var( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``var`` along some dimension(s). @@ -4808,33 +4017,16 @@ def var( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").var() - - array([0., 0., 1.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").var(skipna=False) - - array([nan, 0., 1.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Specify ``ddof=1`` for an unbiased estimate. >>> da.groupby("labels").var(skipna=True, ddof=1) - - array([nan, 0., 2.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.var, @@ -4846,12 +4038,12 @@ def var( ) def median( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``median`` along some dimension(s). @@ -4903,25 +4095,12 @@ def median( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").median() - - array([1., 2., 2.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").median(skipna=False) - - array([nan, 2., 2.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.median, @@ -4935,12 +4114,23 @@ def median( class DataArrayResampleReductions: __slots__ = () + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> "DataArray": + return NotImplemented + def count( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``count`` along some dimension(s). @@ -4983,17 +4173,8 @@ def count( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").count() - - array([1, 3, 1]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.count, @@ -5003,11 +4184,11 @@ def count( ) def all( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``all`` along some dimension(s). @@ -5050,17 +4231,8 @@ def all( ... ), ... ) >>> da - - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").all() - - array([ True, True, False]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.array_all, @@ -5070,11 +4242,11 @@ def all( ) def any( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``any`` along some dimension(s). @@ -5117,17 +4289,8 @@ def any( ... ), ... ) >>> da - - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").any() - - array([ True, True, True]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.array_any, @@ -5137,12 +4300,12 @@ def any( ) def max( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``max`` along some dimension(s). @@ -5190,25 +4353,12 @@ def max( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").max() - - array([1., 3., 2.]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").max(skipna=False) - - array([ 1., 3., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.max, @@ -5219,12 +4369,12 @@ def max( ) def min( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``min`` along some dimension(s). @@ -5272,25 +4422,12 @@ def min( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").min() - - array([1., 1., 2.]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").min(skipna=False) - - array([ 1., 1., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.min, @@ -5301,12 +4438,12 @@ def min( ) def mean( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``mean`` along some dimension(s). @@ -5358,25 +4495,12 @@ def mean( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").mean() - - array([1., 2., 2.]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").mean(skipna=False) - - array([ 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.mean, @@ -5387,13 +4511,13 @@ def mean( ) def prod( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``prod`` along some dimension(s). @@ -5451,33 +4575,16 @@ def prod( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").prod() - - array([1., 6., 2.]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").prod(skipna=False) - - array([ 1., 6., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.resample(time="3M").prod(skipna=True, min_count=2) - - array([nan, 6., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.prod, @@ -5489,13 +4596,13 @@ def prod( ) def sum( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``sum`` along some dimension(s). @@ -5553,33 +4660,16 @@ def sum( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").sum() - - array([1., 6., 2.]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").sum(skipna=False) - - array([ 1., 6., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.resample(time="3M").sum(skipna=True, min_count=2) - - array([nan, 6., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.sum, @@ -5591,13 +4681,13 @@ def sum( ) def std( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``std`` along some dimension(s). @@ -5652,33 +4742,16 @@ def std( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").std() - - array([0. , 0.81649658, 0. ]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").std(skipna=False) - - array([0. , 0.81649658, nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Specify ``ddof=1`` for an unbiased estimate. >>> da.resample(time="3M").std(skipna=True, ddof=1) - - array([nan, 1., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.std, @@ -5690,13 +4763,13 @@ def std( ) def var( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``var`` along some dimension(s). @@ -5751,33 +4824,16 @@ def var( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").var() - - array([0. , 0.66666667, 0. ]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").var(skipna=False) - - array([0. , 0.66666667, nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Specify ``ddof=1`` for an unbiased estimate. >>> da.resample(time="3M").var(skipna=True, ddof=1) - - array([nan, 1., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.var, @@ -5789,12 +4845,12 @@ def var( ) def median( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``median`` along some dimension(s). @@ -5846,25 +4902,12 @@ def median( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").median() - - array([1., 2., 2.]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").median(skipna=False) - - array([ 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.median, diff --git a/xarray/core/resample.py b/xarray/core/resample.py index e2f599e8b4e..ad11550372c 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -157,7 +157,7 @@ def _interpolate(self, kind="linear"): ) -class DataArrayResample(DataArrayResampleReductions, DataArrayGroupByBase, Resample): +class DataArrayResample(DataArrayGroupByBase, DataArrayResampleReductions, Resample): """DataArrayGroupBy object specialized to time resampling operations over a specified dimension """ @@ -248,7 +248,7 @@ def apply(self, func, args=(), shortcut=None, **kwargs): return self.map(func=func, shortcut=shortcut, args=args, **kwargs) -class DatasetResample(DatasetResampleReductions, DatasetGroupByBase, Resample): +class DatasetResample(DatasetGroupByBase, DatasetResampleReductions, Resample): """DatasetGroupBy object specialized to resampling a specified dimension""" def __init__(self, *args, dim=None, resample_dim=None, **kwargs): diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index ac3a1b500a0..ea479ccc9c3 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -20,23 +20,20 @@ """Mixin classes with reduction operations.""" # This file was generated using xarray.util.generate_reductions. Do not edit manually. -import sys from typing import TYPE_CHECKING, Any, Callable, Hashable, Optional, Sequence, Union from . import duck_array_ops -if sys.version_info >= (3, 8): - from typing import Protocol -else: - from typing_extensions import Protocol - if TYPE_CHECKING: from .dataset import Dataset - from.dataarray import DataArray''' + from .dataarray import DataArray''' + + +CLASS_PREAMBLE = """ -OBJ_PREAMBLE = """ +class {obj}{cls}Reductions: + __slots__ = () -class {obj}Reduce(Protocol): def reduce( self, func: Callable[..., Any], @@ -46,13 +43,7 @@ def reduce( keepdims: bool = False, **kwargs: Any, ) -> "{obj}": - ...""" - - -CLASS_PREAMBLE = """ - -class {obj}{cls}Reductions({obj}Reduce): - __slots__ = ()""" + return NotImplemented""" TEMPLATE_REDUCTION_SIGNATURE = ''' def {method}( @@ -388,8 +379,6 @@ class DataStructure: if __name__ == "__main__": print(MODULE_PREAMBLE) - print(OBJ_PREAMBLE.format(obj="Dataset")) - print(OBJ_PREAMBLE.format(obj="DataArray")) for gen in [ DatasetGenerator, DataArrayGenerator, From 6a9a1240aa95d71ef2081f6e98152981f9db336d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 20 Nov 2021 17:02:07 +0000 Subject: [PATCH 077/138] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/_reductions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index e17b9dbd6b8..f280f87434c 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -6,8 +6,8 @@ from . import duck_array_ops if TYPE_CHECKING: - from .dataset import Dataset from .dataarray import DataArray + from .dataset import Dataset class DatasetReductions: From dd28a57f66188db47a05fad184519295d688213d Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 20 Nov 2021 10:57:22 -0700 Subject: [PATCH 078/138] updates --- xarray/core/_reductions.py | 994 +++++++++++++++++++++++++++++ xarray/util/generate_reductions.py | 6 +- 2 files changed, 997 insertions(+), 3 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index f280f87434c..6b32c60fbaf 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -73,8 +73,19 @@ def count( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.count() + + Dimensions: () + Data variables: + da int64 5 """ return self.reduce( duck_array_ops.count, @@ -133,8 +144,19 @@ def all( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.all() + + Dimensions: () + Data variables: + da bool False """ return self.reduce( duck_array_ops.array_all, @@ -193,8 +215,19 @@ def any( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.any() + + Dimensions: () + Data variables: + da bool True """ return self.reduce( duck_array_ops.array_any, @@ -259,12 +292,27 @@ def max( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.max() + + Dimensions: () + Data variables: + da float64 3.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.max(skipna=False) + + Dimensions: () + Data variables: + da float64 nan """ return self.reduce( duck_array_ops.max, @@ -330,12 +378,27 @@ def min( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.min() + + Dimensions: () + Data variables: + da float64 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.min(skipna=False) + + Dimensions: () + Data variables: + da float64 nan """ return self.reduce( duck_array_ops.min, @@ -405,12 +468,27 @@ def mean( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.mean() + + Dimensions: () + Data variables: + da float64 1.8 Use ``skipna`` to control whether NaNs are ignored. >>> ds.mean(skipna=False) + + Dimensions: () + Data variables: + da float64 nan """ return self.reduce( duck_array_ops.mean, @@ -487,16 +565,35 @@ def prod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.prod() + + Dimensions: () + Data variables: + da float64 12.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.prod(skipna=False) + + Dimensions: () + Data variables: + da float64 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.prod(skipna=True, min_count=2) + + Dimensions: () + Data variables: + da float64 12.0 """ return self.reduce( duck_array_ops.prod, @@ -574,16 +671,35 @@ def sum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.sum() + + Dimensions: () + Data variables: + da float64 9.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.sum(skipna=False) + + Dimensions: () + Data variables: + da float64 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.sum(skipna=True, min_count=2) + + Dimensions: () + Data variables: + da float64 9.0 """ return self.reduce( duck_array_ops.sum, @@ -658,16 +774,35 @@ def std( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.std() + + Dimensions: () + Data variables: + da float64 0.7483 Use ``skipna`` to control whether NaNs are ignored. >>> ds.std(skipna=False) + + Dimensions: () + Data variables: + da float64 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.std(skipna=True, ddof=1) + + Dimensions: () + Data variables: + da float64 0.8367 """ return self.reduce( duck_array_ops.std, @@ -742,16 +877,35 @@ def var( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.var() + + Dimensions: () + Data variables: + da float64 0.56 Use ``skipna`` to control whether NaNs are ignored. >>> ds.var(skipna=False) + + Dimensions: () + Data variables: + da float64 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.var(skipna=True, ddof=1) + + Dimensions: () + Data variables: + da float64 0.7 """ return self.reduce( duck_array_ops.var, @@ -822,12 +976,27 @@ def median( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.median() + + Dimensions: () + Data variables: + da float64 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.median(skipna=False) + + Dimensions: () + Data variables: + da float64 nan """ return self.reduce( duck_array_ops.median, @@ -901,8 +1070,15 @@ def count( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.count() + + array(5) """ return self.reduce( duck_array_ops.count, @@ -959,8 +1135,15 @@ def all( ... ), ... ) >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.all() + + array(False) """ return self.reduce( duck_array_ops.array_all, @@ -1017,8 +1200,15 @@ def any( ... ), ... ) >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.any() + + array(True) """ return self.reduce( duck_array_ops.array_any, @@ -1081,12 +1271,21 @@ def max( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.max() + + array(3.) Use ``skipna`` to control whether NaNs are ignored. >>> da.max(skipna=False) + + array(nan) """ return self.reduce( duck_array_ops.max, @@ -1150,12 +1349,21 @@ def min( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.min() + + array(1.) Use ``skipna`` to control whether NaNs are ignored. >>> da.min(skipna=False) + + array(nan) """ return self.reduce( duck_array_ops.min, @@ -1223,12 +1431,21 @@ def mean( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.mean() + + array(1.8) Use ``skipna`` to control whether NaNs are ignored. >>> da.mean(skipna=False) + + array(nan) """ return self.reduce( duck_array_ops.mean, @@ -1303,16 +1520,27 @@ def prod( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.prod() + + array(12.) Use ``skipna`` to control whether NaNs are ignored. >>> da.prod(skipna=False) + + array(nan) Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.prod(skipna=True, min_count=2) + + array(12.) """ return self.reduce( duck_array_ops.prod, @@ -1388,16 +1616,27 @@ def sum( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.sum() + + array(9.) Use ``skipna`` to control whether NaNs are ignored. >>> da.sum(skipna=False) + + array(nan) Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.sum(skipna=True, min_count=2) + + array(9.) """ return self.reduce( duck_array_ops.sum, @@ -1470,16 +1709,27 @@ def std( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.std() + + array(0.74833148) Use ``skipna`` to control whether NaNs are ignored. >>> da.std(skipna=False) + + array(nan) Specify ``ddof=1`` for an unbiased estimate. >>> da.std(skipna=True, ddof=1) + + array(0.83666003) """ return self.reduce( duck_array_ops.std, @@ -1552,16 +1802,27 @@ def var( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.var() + + array(0.56) Use ``skipna`` to control whether NaNs are ignored. >>> da.var(skipna=False) + + array(nan) Specify ``ddof=1`` for an unbiased estimate. >>> da.var(skipna=True, ddof=1) + + array(0.7) """ return self.reduce( duck_array_ops.var, @@ -1630,12 +1891,21 @@ def median( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.median() + + array(2.) Use ``skipna`` to control whether NaNs are ignored. >>> da.median(skipna=False) + + array(nan) """ return self.reduce( duck_array_ops.median, @@ -1709,8 +1979,21 @@ def count( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").count() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) int64 1 2 2 """ return self.reduce( duck_array_ops.count, @@ -1769,8 +2052,21 @@ def all( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").all() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) bool False True True """ return self.reduce( duck_array_ops.array_all, @@ -1829,8 +2125,21 @@ def any( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").any() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) bool True True True """ return self.reduce( duck_array_ops.array_any, @@ -1895,12 +2204,31 @@ def max( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").max() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 1.0 2.0 3.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").max(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 3.0 """ return self.reduce( duck_array_ops.max, @@ -1966,12 +2294,31 @@ def min( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").min() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 1.0 2.0 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").min(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 1.0 """ return self.reduce( duck_array_ops.min, @@ -2041,12 +2388,31 @@ def mean( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").mean() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").mean(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 2.0 """ return self.reduce( duck_array_ops.mean, @@ -2123,16 +2489,41 @@ def prod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").prod() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 1.0 4.0 3.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").prod(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 4.0 3.0 Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.groupby("labels").prod(skipna=True, min_count=2) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 4.0 3.0 """ return self.reduce( duck_array_ops.prod, @@ -2210,16 +2601,41 @@ def sum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").sum() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 1.0 4.0 4.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").sum(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 4.0 4.0 Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.groupby("labels").sum(skipna=True, min_count=2) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 4.0 4.0 """ return self.reduce( duck_array_ops.sum, @@ -2294,16 +2710,41 @@ def std( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").std() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 0.0 0.0 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").std(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 1.0 Specify ``ddof=1`` for an unbiased estimate. >>> ds.groupby("labels").std(skipna=True, ddof=1) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 1.414 """ return self.reduce( duck_array_ops.std, @@ -2378,16 +2819,41 @@ def var( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").var() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 0.0 0.0 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").var(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 1.0 Specify ``ddof=1`` for an unbiased estimate. >>> ds.groupby("labels").var(skipna=True, ddof=1) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 2.0 """ return self.reduce( duck_array_ops.var, @@ -2458,12 +2924,31 @@ def median( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").median() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").median(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 2.0 """ return self.reduce( duck_array_ops.median, @@ -2538,8 +3023,21 @@ def count( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").count() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) int64 1 3 1 """ return self.reduce( duck_array_ops.count, @@ -2598,8 +3096,21 @@ def all( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").all() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) bool True True False """ return self.reduce( duck_array_ops.array_all, @@ -2658,8 +3169,21 @@ def any( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").any() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) bool True True True """ return self.reduce( duck_array_ops.array_any, @@ -2724,12 +3248,31 @@ def max( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").max() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 3.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").max(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 3.0 nan """ return self.reduce( duck_array_ops.max, @@ -2795,12 +3338,31 @@ def min( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").min() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 1.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").min(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 1.0 nan """ return self.reduce( duck_array_ops.min, @@ -2870,12 +3432,31 @@ def mean( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").mean() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").mean(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 2.0 nan """ return self.reduce( duck_array_ops.mean, @@ -2952,16 +3533,41 @@ def prod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").prod() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 6.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").prod(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 6.0 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.resample(time="3M").prod(skipna=True, min_count=2) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 nan 6.0 nan """ return self.reduce( duck_array_ops.prod, @@ -3039,16 +3645,41 @@ def sum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").sum() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 6.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").sum(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 6.0 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.resample(time="3M").sum(skipna=True, min_count=2) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 nan 6.0 nan """ return self.reduce( duck_array_ops.sum, @@ -3123,16 +3754,41 @@ def std( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").std() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 0.0 0.8165 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").std(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 0.0 0.8165 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.resample(time="3M").std(skipna=True, ddof=1) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 nan 1.0 nan """ return self.reduce( duck_array_ops.std, @@ -3207,16 +3863,41 @@ def var( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").var() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 0.0 0.6667 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").var(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 0.0 0.6667 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.resample(time="3M").var(skipna=True, ddof=1) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 nan 1.0 nan """ return self.reduce( duck_array_ops.var, @@ -3287,12 +3968,31 @@ def median( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").median() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").median(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 2.0 nan """ return self.reduce( duck_array_ops.median, @@ -3366,8 +4066,17 @@ def count( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").count() + + array([1, 2, 2]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.count, @@ -3424,8 +4133,17 @@ def all( ... ), ... ) >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").all() + + array([False, True, True]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.array_all, @@ -3482,8 +4200,17 @@ def any( ... ), ... ) >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").any() + + array([ True, True, True]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.array_any, @@ -3546,12 +4273,25 @@ def max( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").max() + + array([1., 2., 3.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").max(skipna=False) + + array([nan, 2., 3.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.max, @@ -3615,12 +4355,25 @@ def min( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").min() + + array([1., 2., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").min(skipna=False) + + array([nan, 2., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.min, @@ -3688,12 +4441,25 @@ def mean( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").mean() + + array([1., 2., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").mean(skipna=False) + + array([nan, 2., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.mean, @@ -3768,16 +4534,33 @@ def prod( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").prod() + + array([1., 4., 3.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").prod(skipna=False) + + array([nan, 4., 3.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.groupby("labels").prod(skipna=True, min_count=2) + + array([nan, 4., 3.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.prod, @@ -3853,16 +4636,33 @@ def sum( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").sum() + + array([1., 4., 4.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").sum(skipna=False) + + array([nan, 4., 4.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.groupby("labels").sum(skipna=True, min_count=2) + + array([nan, 4., 4.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.sum, @@ -3935,16 +4735,33 @@ def std( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").std() + + array([0., 0., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").std(skipna=False) + + array([nan, 0., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Specify ``ddof=1`` for an unbiased estimate. >>> da.groupby("labels").std(skipna=True, ddof=1) + + array([ nan, 0. , 1.41421356]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.std, @@ -4017,16 +4834,33 @@ def var( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").var() + + array([0., 0., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").var(skipna=False) + + array([nan, 0., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Specify ``ddof=1`` for an unbiased estimate. >>> da.groupby("labels").var(skipna=True, ddof=1) + + array([nan, 0., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.var, @@ -4095,12 +4929,25 @@ def median( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").median() + + array([1., 2., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").median(skipna=False) + + array([nan, 2., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.median, @@ -4173,8 +5020,17 @@ def count( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").count() + + array([1, 3, 1]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.count, @@ -4231,8 +5087,17 @@ def all( ... ), ... ) >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").all() + + array([ True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.array_all, @@ -4289,8 +5154,17 @@ def any( ... ), ... ) >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").any() + + array([ True, True, True]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.array_any, @@ -4353,12 +5227,25 @@ def max( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").max() + + array([1., 3., 2.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").max(skipna=False) + + array([ 1., 3., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.max, @@ -4422,12 +5309,25 @@ def min( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").min() + + array([1., 1., 2.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").min(skipna=False) + + array([ 1., 1., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.min, @@ -4495,12 +5395,25 @@ def mean( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").mean() + + array([1., 2., 2.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").mean(skipna=False) + + array([ 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.mean, @@ -4575,16 +5488,33 @@ def prod( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").prod() + + array([1., 6., 2.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").prod(skipna=False) + + array([ 1., 6., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.resample(time="3M").prod(skipna=True, min_count=2) + + array([nan, 6., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.prod, @@ -4660,16 +5590,33 @@ def sum( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").sum() + + array([1., 6., 2.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").sum(skipna=False) + + array([ 1., 6., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.resample(time="3M").sum(skipna=True, min_count=2) + + array([nan, 6., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.sum, @@ -4742,16 +5689,33 @@ def std( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").std() + + array([0. , 0.81649658, 0. ]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").std(skipna=False) + + array([0. , 0.81649658, nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Specify ``ddof=1`` for an unbiased estimate. >>> da.resample(time="3M").std(skipna=True, ddof=1) + + array([nan, 1., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.std, @@ -4824,16 +5788,33 @@ def var( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").var() + + array([0. , 0.66666667, 0. ]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").var(skipna=False) + + array([0. , 0.66666667, nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Specify ``ddof=1`` for an unbiased estimate. >>> da.resample(time="3M").var(skipna=True, ddof=1) + + array([nan, 1., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.var, @@ -4902,12 +5883,25 @@ def median( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").median() + + array([1., 2., 2.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").median(skipna=False) + + array([ 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.median, diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index ea479ccc9c3..4981efcc7e7 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -5,7 +5,7 @@ Usage: python xarray/util/generate_reductions.py > xarray/core/_reductions.py pytest --doctest-modules xarray/core/_reductions.py --accept || true - pytest --doctest-modules xarray/core/_reductions.py --accept + pytest --doctest-modules xarray/core/_reductions.py This requires [pytest-accept](https://github.com/max-sixty/pytest-accept). The second run of pytest is deliberate, since the first will return an error @@ -25,8 +25,8 @@ from . import duck_array_ops if TYPE_CHECKING: - from .dataset import Dataset - from .dataarray import DataArray''' + from .dataarray import DataArray + from .dataset import Dataset''' CLASS_PREAMBLE = """ From 2bbddafaacf46f65c950738a9db3cbaddb198763 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 20 Nov 2021 20:12:31 +0100 Subject: [PATCH 079/138] make reduce args consistent --- xarray/core/_reductions.py | 1000 +--------------------------- xarray/core/dataarray.py | 1 + xarray/core/dataset.py | 3 +- xarray/core/groupby.py | 46 +- xarray/core/resample.py | 20 +- xarray/util/generate_reductions.py | 1 + 6 files changed, 64 insertions(+), 1007 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 6b32c60fbaf..240cc655297 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -17,6 +17,7 @@ def reduce( self, func: Callable[..., Any], dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, axis: Union[None, int, Sequence[int]] = None, keep_attrs: bool = None, keepdims: bool = False, @@ -73,19 +74,8 @@ def count( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.count() - - Dimensions: () - Data variables: - da int64 5 """ return self.reduce( duck_array_ops.count, @@ -144,19 +134,8 @@ def all( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.all() - - Dimensions: () - Data variables: - da bool False """ return self.reduce( duck_array_ops.array_all, @@ -215,19 +194,8 @@ def any( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.any() - - Dimensions: () - Data variables: - da bool True """ return self.reduce( duck_array_ops.array_any, @@ -292,27 +260,12 @@ def max( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.max() - - Dimensions: () - Data variables: - da float64 3.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.max(skipna=False) - - Dimensions: () - Data variables: - da float64 nan """ return self.reduce( duck_array_ops.max, @@ -378,27 +331,12 @@ def min( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.min() - - Dimensions: () - Data variables: - da float64 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.min(skipna=False) - - Dimensions: () - Data variables: - da float64 nan """ return self.reduce( duck_array_ops.min, @@ -468,27 +406,12 @@ def mean( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.mean() - - Dimensions: () - Data variables: - da float64 1.8 Use ``skipna`` to control whether NaNs are ignored. >>> ds.mean(skipna=False) - - Dimensions: () - Data variables: - da float64 nan """ return self.reduce( duck_array_ops.mean, @@ -565,35 +488,16 @@ def prod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.prod() - - Dimensions: () - Data variables: - da float64 12.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.prod(skipna=False) - - Dimensions: () - Data variables: - da float64 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.prod(skipna=True, min_count=2) - - Dimensions: () - Data variables: - da float64 12.0 """ return self.reduce( duck_array_ops.prod, @@ -671,35 +575,16 @@ def sum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.sum() - - Dimensions: () - Data variables: - da float64 9.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.sum(skipna=False) - - Dimensions: () - Data variables: - da float64 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.sum(skipna=True, min_count=2) - - Dimensions: () - Data variables: - da float64 9.0 """ return self.reduce( duck_array_ops.sum, @@ -774,35 +659,16 @@ def std( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.std() - - Dimensions: () - Data variables: - da float64 0.7483 Use ``skipna`` to control whether NaNs are ignored. >>> ds.std(skipna=False) - - Dimensions: () - Data variables: - da float64 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.std(skipna=True, ddof=1) - - Dimensions: () - Data variables: - da float64 0.8367 """ return self.reduce( duck_array_ops.std, @@ -877,35 +743,16 @@ def var( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.var() - - Dimensions: () - Data variables: - da float64 0.56 Use ``skipna`` to control whether NaNs are ignored. >>> ds.var(skipna=False) - - Dimensions: () - Data variables: - da float64 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.var(skipna=True, ddof=1) - - Dimensions: () - Data variables: - da float64 0.7 """ return self.reduce( duck_array_ops.var, @@ -976,27 +823,12 @@ def median( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.median() - - Dimensions: () - Data variables: - da float64 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.median(skipna=False) - - Dimensions: () - Data variables: - da float64 nan """ return self.reduce( duck_array_ops.median, @@ -1015,6 +847,7 @@ def reduce( self, func: Callable[..., Any], dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, axis: Union[None, int, Sequence[int]] = None, keep_attrs: bool = None, keepdims: bool = False, @@ -1070,15 +903,8 @@ def count( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.count() - - array(5) """ return self.reduce( duck_array_ops.count, @@ -1135,15 +961,8 @@ def all( ... ), ... ) >>> da - - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.all() - - array(False) """ return self.reduce( duck_array_ops.array_all, @@ -1200,15 +1019,8 @@ def any( ... ), ... ) >>> da - - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.any() - - array(True) """ return self.reduce( duck_array_ops.array_any, @@ -1271,21 +1083,12 @@ def max( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.max() - - array(3.) Use ``skipna`` to control whether NaNs are ignored. >>> da.max(skipna=False) - - array(nan) """ return self.reduce( duck_array_ops.max, @@ -1349,21 +1152,12 @@ def min( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.min() - - array(1.) Use ``skipna`` to control whether NaNs are ignored. >>> da.min(skipna=False) - - array(nan) """ return self.reduce( duck_array_ops.min, @@ -1431,21 +1225,12 @@ def mean( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.mean() - - array(1.8) Use ``skipna`` to control whether NaNs are ignored. >>> da.mean(skipna=False) - - array(nan) """ return self.reduce( duck_array_ops.mean, @@ -1520,27 +1305,16 @@ def prod( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.prod() - - array(12.) Use ``skipna`` to control whether NaNs are ignored. >>> da.prod(skipna=False) - - array(nan) Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.prod(skipna=True, min_count=2) - - array(12.) """ return self.reduce( duck_array_ops.prod, @@ -1616,27 +1390,16 @@ def sum( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.sum() - - array(9.) Use ``skipna`` to control whether NaNs are ignored. >>> da.sum(skipna=False) - - array(nan) Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.sum(skipna=True, min_count=2) - - array(9.) """ return self.reduce( duck_array_ops.sum, @@ -1709,27 +1472,16 @@ def std( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.std() - - array(0.74833148) Use ``skipna`` to control whether NaNs are ignored. >>> da.std(skipna=False) - - array(nan) Specify ``ddof=1`` for an unbiased estimate. >>> da.std(skipna=True, ddof=1) - - array(0.83666003) """ return self.reduce( duck_array_ops.std, @@ -1802,27 +1554,16 @@ def var( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.var() - - array(0.56) Use ``skipna`` to control whether NaNs are ignored. >>> da.var(skipna=False) - - array(nan) Specify ``ddof=1`` for an unbiased estimate. >>> da.var(skipna=True, ddof=1) - - array(0.7) """ return self.reduce( duck_array_ops.var, @@ -1891,21 +1632,12 @@ def median( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.median() - - array(2.) Use ``skipna`` to control whether NaNs are ignored. >>> da.median(skipna=False) - - array(nan) """ return self.reduce( duck_array_ops.median, @@ -1923,6 +1655,7 @@ def reduce( self, func: Callable[..., Any], dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, axis: Union[None, int, Sequence[int]] = None, keep_attrs: bool = None, keepdims: bool = False, @@ -1979,21 +1712,8 @@ def count( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").count() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) int64 1 2 2 """ return self.reduce( duck_array_ops.count, @@ -2052,21 +1772,8 @@ def all( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").all() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) bool False True True """ return self.reduce( duck_array_ops.array_all, @@ -2125,21 +1832,8 @@ def any( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").any() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) bool True True True """ return self.reduce( duck_array_ops.array_any, @@ -2204,31 +1898,12 @@ def max( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").max() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 1.0 2.0 3.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").max(skipna=False) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 2.0 3.0 """ return self.reduce( duck_array_ops.max, @@ -2294,31 +1969,12 @@ def min( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").min() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 1.0 2.0 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").min(skipna=False) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 2.0 1.0 """ return self.reduce( duck_array_ops.min, @@ -2388,31 +2044,12 @@ def mean( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").mean() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").mean(skipna=False) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 2.0 2.0 """ return self.reduce( duck_array_ops.mean, @@ -2489,41 +2126,16 @@ def prod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").prod() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 1.0 4.0 3.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").prod(skipna=False) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 4.0 3.0 Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.groupby("labels").prod(skipna=True, min_count=2) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 4.0 3.0 """ return self.reduce( duck_array_ops.prod, @@ -2601,41 +2213,16 @@ def sum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").sum() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 1.0 4.0 4.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").sum(skipna=False) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 4.0 4.0 Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.groupby("labels").sum(skipna=True, min_count=2) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 4.0 4.0 """ return self.reduce( duck_array_ops.sum, @@ -2710,41 +2297,16 @@ def std( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").std() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 0.0 0.0 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").std(skipna=False) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 0.0 1.0 Specify ``ddof=1`` for an unbiased estimate. >>> ds.groupby("labels").std(skipna=True, ddof=1) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 0.0 1.414 """ return self.reduce( duck_array_ops.std, @@ -2819,41 +2381,16 @@ def var( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").var() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 0.0 0.0 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").var(skipna=False) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 0.0 1.0 Specify ``ddof=1`` for an unbiased estimate. >>> ds.groupby("labels").var(skipna=True, ddof=1) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 0.0 2.0 """ return self.reduce( duck_array_ops.var, @@ -2924,31 +2461,12 @@ def median( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.groupby("labels").median() - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").median(skipna=False) - - Dimensions: (labels: 3) - Coordinates: - * labels (labels) object 'a' 'b' 'c' - Data variables: - da (labels) float64 nan 2.0 2.0 """ return self.reduce( duck_array_ops.median, @@ -2967,6 +2485,7 @@ def reduce( self, func: Callable[..., Any], dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, axis: Union[None, int, Sequence[int]] = None, keep_attrs: bool = None, keepdims: bool = False, @@ -3023,21 +2542,8 @@ def count( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").count() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) int64 1 3 1 """ return self.reduce( duck_array_ops.count, @@ -3096,21 +2602,8 @@ def all( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").all() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) bool True True False """ return self.reduce( duck_array_ops.array_all, @@ -3169,21 +2662,8 @@ def any( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").any() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) bool True True True """ return self.reduce( duck_array_ops.array_any, @@ -3248,31 +2728,12 @@ def max( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").max() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 3.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").max(skipna=False) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 3.0 nan """ return self.reduce( duck_array_ops.max, @@ -3338,31 +2799,12 @@ def min( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").min() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 1.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").min(skipna=False) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 1.0 nan """ return self.reduce( duck_array_ops.min, @@ -3432,31 +2874,12 @@ def mean( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").mean() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").mean(skipna=False) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 2.0 nan """ return self.reduce( duck_array_ops.mean, @@ -3533,41 +2956,16 @@ def prod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").prod() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 6.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").prod(skipna=False) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 6.0 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.resample(time="3M").prod(skipna=True, min_count=2) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 nan 6.0 nan """ return self.reduce( duck_array_ops.prod, @@ -3645,41 +3043,16 @@ def sum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").sum() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 6.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").sum(skipna=False) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 6.0 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.resample(time="3M").sum(skipna=True, min_count=2) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 nan 6.0 nan """ return self.reduce( duck_array_ops.sum, @@ -3754,41 +3127,16 @@ def std( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").std() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 0.0 0.8165 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").std(skipna=False) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 0.0 0.8165 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.resample(time="3M").std(skipna=True, ddof=1) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 nan 1.0 nan """ return self.reduce( duck_array_ops.std, @@ -3863,41 +3211,16 @@ def var( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").var() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 0.0 0.6667 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").var(skipna=False) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 0.0 0.6667 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.resample(time="3M").var(skipna=True, ddof=1) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 nan 1.0 nan """ return self.reduce( duck_array_ops.var, @@ -3968,31 +3291,12 @@ def median( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds - - Dimensions: (time: 6) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> ds.resample(time="3M").median() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").median(skipna=False) - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) float64 1.0 2.0 nan """ return self.reduce( duck_array_ops.median, @@ -4011,6 +3315,7 @@ def reduce( self, func: Callable[..., Any], dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, axis: Union[None, int, Sequence[int]] = None, keep_attrs: bool = None, keepdims: bool = False, @@ -4066,17 +3371,8 @@ def count( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").count() - - array([1, 2, 2]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.count, @@ -4133,17 +3429,8 @@ def all( ... ), ... ) >>> da - - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").all() - - array([False, True, True]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.array_all, @@ -4200,17 +3487,8 @@ def any( ... ), ... ) >>> da - - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").any() - - array([ True, True, True]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.array_any, @@ -4273,25 +3551,12 @@ def max( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").max() - - array([1., 2., 3.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").max(skipna=False) - - array([nan, 2., 3.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.max, @@ -4355,25 +3620,12 @@ def min( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").min() - - array([1., 2., 1.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").min(skipna=False) - - array([nan, 2., 1.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.min, @@ -4441,25 +3693,12 @@ def mean( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").mean() - - array([1., 2., 2.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").mean(skipna=False) - - array([nan, 2., 2.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.mean, @@ -4534,33 +3773,16 @@ def prod( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").prod() - - array([1., 4., 3.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").prod(skipna=False) - - array([nan, 4., 3.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.groupby("labels").prod(skipna=True, min_count=2) - - array([nan, 4., 3.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.prod, @@ -4636,33 +3858,16 @@ def sum( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").sum() - - array([1., 4., 4.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").sum(skipna=False) - - array([nan, 4., 4.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.groupby("labels").sum(skipna=True, min_count=2) - - array([nan, 4., 4.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.sum, @@ -4735,33 +3940,16 @@ def std( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").std() - - array([0., 0., 1.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").std(skipna=False) - - array([nan, 0., 1.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Specify ``ddof=1`` for an unbiased estimate. >>> da.groupby("labels").std(skipna=True, ddof=1) - - array([ nan, 0. , 1.41421356]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.std, @@ -4834,33 +4022,16 @@ def var( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").var() - - array([0., 0., 1.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").var(skipna=False) - - array([nan, 0., 1.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Specify ``ddof=1`` for an unbiased estimate. >>> da.groupby("labels").var(skipna=True, ddof=1) - - array([nan, 0., 2.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.var, @@ -4929,25 +4100,12 @@ def median( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.groupby("labels").median() - - array([1., 2., 2.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").median(skipna=False) - - array([nan, 2., 2.]) - Coordinates: - * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.median, @@ -4965,6 +4123,7 @@ def reduce( self, func: Callable[..., Any], dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, axis: Union[None, int, Sequence[int]] = None, keep_attrs: bool = None, keepdims: bool = False, @@ -5020,17 +4179,8 @@ def count( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").count() - - array([1, 3, 1]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.count, @@ -5087,17 +4237,8 @@ def all( ... ), ... ) >>> da - - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").all() - - array([ True, True, False]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.array_all, @@ -5154,17 +4295,8 @@ def any( ... ), ... ) >>> da - - array([ True, True, True, True, True, False]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").any() - - array([ True, True, True]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.array_any, @@ -5227,25 +4359,12 @@ def max( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").max() - - array([1., 3., 2.]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").max(skipna=False) - - array([ 1., 3., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.max, @@ -5309,25 +4428,12 @@ def min( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").min() - - array([1., 1., 2.]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").min(skipna=False) - - array([ 1., 1., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.min, @@ -5395,25 +4501,12 @@ def mean( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").mean() - - array([1., 2., 2.]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").mean(skipna=False) - - array([ 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.mean, @@ -5488,33 +4581,16 @@ def prod( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").prod() - - array([1., 6., 2.]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").prod(skipna=False) - - array([ 1., 6., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.resample(time="3M").prod(skipna=True, min_count=2) - - array([nan, 6., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.prod, @@ -5590,33 +4666,16 @@ def sum( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").sum() - - array([1., 6., 2.]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").sum(skipna=False) - - array([ 1., 6., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.resample(time="3M").sum(skipna=True, min_count=2) - - array([nan, 6., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.sum, @@ -5689,33 +4748,16 @@ def std( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").std() - - array([0. , 0.81649658, 0. ]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").std(skipna=False) - - array([0. , 0.81649658, nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Specify ``ddof=1`` for an unbiased estimate. >>> da.resample(time="3M").std(skipna=True, ddof=1) - - array([nan, 1., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.std, @@ -5788,33 +4830,16 @@ def var( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").var() - - array([0. , 0.66666667, 0. ]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").var(skipna=False) - - array([0. , 0.66666667, nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Specify ``ddof=1`` for an unbiased estimate. >>> da.resample(time="3M").var(skipna=True, ddof=1) - - array([nan, 1., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.var, @@ -5883,25 +4908,12 @@ def median( ... ), ... ) >>> da - - array([ 1., 2., 3., 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 - labels (time) >> da.resample(time="3M").median() - - array([1., 2., 2.]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").median(skipna=False) - - array([ 1., 2., nan]) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.median, diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index ea03ac76f37..1b96f22b744 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2655,6 +2655,7 @@ def reduce( self, func: Callable[..., Any], dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, axis: Union[None, int, Sequence[int]] = None, keep_attrs: bool = None, keepdims: bool = False, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index e3339f2562f..cf52fed6974 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5000,6 +5000,7 @@ def reduce( self, func: Callable, dim: Union[Hashable, Iterable[Hashable]] = None, + *, keep_attrs: bool = None, keepdims: bool = False, numeric_only: bool = False, @@ -5035,7 +5036,7 @@ def reduce( Dataset with this object's DataArrays replaced with new DataArrays of summarized data and the indicated dimension(s) removed. """ - if "axis" in kwargs: + if kwargs.get("axis", None) is not None: raise ValueError( "passing 'axis' to Dataset reduce methods is ambiguous." " Please use 'dim' instead." diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 185b4ae5bec..bf5cded33ee 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -1,5 +1,6 @@ import datetime import warnings +from typing import Any, Callable, Hashable, Sequence, Union import numpy as np import pandas as pd @@ -834,7 +835,15 @@ def _combine(self, applied, shortcut=False): return combined def reduce( - self, func, dim=None, axis=None, keep_attrs=None, shortcut=True, **kwargs + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + *, + shortcut: bool = True, + **kwargs: Any, ): """Reduce the items in this group by applying `func` along some dimension(s). @@ -867,11 +876,15 @@ def reduce( if dim is None: dim = self._group_dim - if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) - def reduce_array(ar): - return ar.reduce(func, dim, axis, keep_attrs=keep_attrs, **kwargs) + return ar.reduce( + func=func, + dim=dim, + axis=axis, + keep_attrs=keep_attrs, + keepdims=keepdims, + **kwargs, + ) check_reduce_dims(dim, self.dims) @@ -949,7 +962,16 @@ def _combine(self, applied): combined = self._maybe_unstack(combined) return combined - def reduce(self, func, dim=None, keep_attrs=None, **kwargs): + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ): """Reduce the items in this group by applying `func` along some dimension(s). @@ -981,11 +1003,15 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs): if dim is None: dim = self._group_dim - if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) - def reduce_dataset(ds): - return ds.reduce(func, dim, keep_attrs, **kwargs) + return ds.reduce( + func=func, + dim=dim, + axis=axis, + keep_attrs=keep_attrs, + keepdims=keepdims, + **kwargs, + ) check_reduce_dims(dim, self.dims) diff --git a/xarray/core/resample.py b/xarray/core/resample.py index ad11550372c..f1776cf3078 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -1,4 +1,5 @@ import warnings +from typing import Any, Callable, Hashable, Sequence, Union from ._reductions import DataArrayResampleReductions, DatasetResampleReductions from .groupby import DataArrayGroupByBase, DatasetGroupByBase @@ -316,7 +317,15 @@ def apply(self, func, args=(), shortcut=None, **kwargs): ) return self.map(func=func, shortcut=shortcut, args=args, **kwargs) - def reduce(self, func, dim=None, keep_attrs=None, **kwargs): + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ): """Reduce the items in this group by applying `func` along the pre-defined resampling dimension. @@ -341,4 +350,11 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs): Array with summarized data and the indicated dimension(s) removed. """ - return super().reduce(func, dim, keep_attrs, **kwargs) + return super().reduce( + func=func, + dim=dim, + axis=axis, + keep_attrs=keep_attrs, + keepdims=keepdims, + **kwargs, + ) diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 4981efcc7e7..fbf2d82dd2c 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -38,6 +38,7 @@ def reduce( self, func: Callable[..., Any], dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, axis: Union[None, int, Sequence[int]] = None, keep_attrs: bool = None, keepdims: bool = False, From 3d854e52055de0f53f4ba16b0713ac581611ef94 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 20 Nov 2021 20:21:26 +0100 Subject: [PATCH 080/138] more reduce edits --- xarray/core/common.py | 2 +- xarray/core/groupby.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index b5dc3bf0e20..e2a5e0926a8 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -65,7 +65,7 @@ def wrapped_func(self, dim=None, axis=None, skipna=None, **kwargs): else: def wrapped_func(self, dim=None, axis=None, **kwargs): # type: ignore[misc] - return self.reduce(func, dim, axis, **kwargs) + return self.reduce(func=func, dim=dim, axis=axis, **kwargs) return wrapped_func diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index bf5cded33ee..df2adaad260 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -838,10 +838,10 @@ def reduce( self, func: Callable[..., Any], dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, axis: Union[None, int, Sequence[int]] = None, keep_attrs: bool = None, keepdims: bool = False, - *, shortcut: bool = True, **kwargs: Any, ): From be33560a14ac4b9379e5d0ff4f340cfbd6d552f1 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 20 Nov 2021 20:28:39 +0100 Subject: [PATCH 081/138] one more reduce --- xarray/core/common.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index e2a5e0926a8..7e6bbc8b05f 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -60,7 +60,9 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool if include_skipna: def wrapped_func(self, dim=None, axis=None, skipna=None, **kwargs): - return self.reduce(func, dim, axis, skipna=skipna, **kwargs) + return self.reduce( + func=func, dim=dim, axis=axis, skipna=skipna, **kwargs + ) else: From 0f94bec2953aa3e7eadd0c4efc25cd6111b7e663 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 20 Nov 2021 20:48:27 +0100 Subject: [PATCH 082/138] another reduce --- xarray/core/resample.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/core/resample.py b/xarray/core/resample.py index f1776cf3078..ed665ad4048 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -321,6 +321,7 @@ def reduce( self, func: Callable[..., Any], dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, axis: Union[None, int, Sequence[int]] = None, keep_attrs: bool = None, keepdims: bool = False, From 19d82cddf0a4ac30811803de9f7e70a881d52ea0 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat, 20 Nov 2021 22:00:29 +0100 Subject: [PATCH 083/138] more reduce --- xarray/core/common.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index 7e6bbc8b05f..2300f3dd8f5 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -100,13 +100,19 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool def wrapped_func(self, dim=None, skipna=None, **kwargs): return self.reduce( - func, dim, skipna=skipna, numeric_only=numeric_only, **kwargs + func=func, + dim=dim, + skipna=skipna, + numeric_only=numeric_only, + **kwargs, ) else: def wrapped_func(self, dim=None, **kwargs): # type: ignore[misc] - return self.reduce(func, dim, numeric_only=numeric_only, **kwargs) + return self.reduce( + func=func, dim=dim, numeric_only=numeric_only, **kwargs + ) return wrapped_func From cd8a898d5b003ea28ec8f3feacb56d76b6dc1096 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 20 Nov 2021 14:37:17 -0700 Subject: [PATCH 084/138] add doctests --- xarray/core/_reductions.py | 994 +++++++++++++++++++++++++++++++++++++ 1 file changed, 994 insertions(+) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 240cc655297..5389aa4e09d 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -74,8 +74,19 @@ def count( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.count() + + Dimensions: () + Data variables: + da int64 5 """ return self.reduce( duck_array_ops.count, @@ -134,8 +145,19 @@ def all( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.all() + + Dimensions: () + Data variables: + da bool False """ return self.reduce( duck_array_ops.array_all, @@ -194,8 +216,19 @@ def any( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.any() + + Dimensions: () + Data variables: + da bool True """ return self.reduce( duck_array_ops.array_any, @@ -260,12 +293,27 @@ def max( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.max() + + Dimensions: () + Data variables: + da float64 3.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.max(skipna=False) + + Dimensions: () + Data variables: + da float64 nan """ return self.reduce( duck_array_ops.max, @@ -331,12 +379,27 @@ def min( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.min() + + Dimensions: () + Data variables: + da float64 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.min(skipna=False) + + Dimensions: () + Data variables: + da float64 nan """ return self.reduce( duck_array_ops.min, @@ -406,12 +469,27 @@ def mean( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.mean() + + Dimensions: () + Data variables: + da float64 1.8 Use ``skipna`` to control whether NaNs are ignored. >>> ds.mean(skipna=False) + + Dimensions: () + Data variables: + da float64 nan """ return self.reduce( duck_array_ops.mean, @@ -488,16 +566,35 @@ def prod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.prod() + + Dimensions: () + Data variables: + da float64 12.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.prod(skipna=False) + + Dimensions: () + Data variables: + da float64 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.prod(skipna=True, min_count=2) + + Dimensions: () + Data variables: + da float64 12.0 """ return self.reduce( duck_array_ops.prod, @@ -575,16 +672,35 @@ def sum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.sum() + + Dimensions: () + Data variables: + da float64 9.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.sum(skipna=False) + + Dimensions: () + Data variables: + da float64 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.sum(skipna=True, min_count=2) + + Dimensions: () + Data variables: + da float64 9.0 """ return self.reduce( duck_array_ops.sum, @@ -659,16 +775,35 @@ def std( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.std() + + Dimensions: () + Data variables: + da float64 0.7483 Use ``skipna`` to control whether NaNs are ignored. >>> ds.std(skipna=False) + + Dimensions: () + Data variables: + da float64 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.std(skipna=True, ddof=1) + + Dimensions: () + Data variables: + da float64 0.8367 """ return self.reduce( duck_array_ops.std, @@ -743,16 +878,35 @@ def var( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.var() + + Dimensions: () + Data variables: + da float64 0.56 Use ``skipna`` to control whether NaNs are ignored. >>> ds.var(skipna=False) + + Dimensions: () + Data variables: + da float64 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.var(skipna=True, ddof=1) + + Dimensions: () + Data variables: + da float64 0.7 """ return self.reduce( duck_array_ops.var, @@ -823,12 +977,27 @@ def median( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.median() + + Dimensions: () + Data variables: + da float64 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.median(skipna=False) + + Dimensions: () + Data variables: + da float64 nan """ return self.reduce( duck_array_ops.median, @@ -903,8 +1072,15 @@ def count( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.count() + + array(5) """ return self.reduce( duck_array_ops.count, @@ -961,8 +1137,15 @@ def all( ... ), ... ) >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.all() + + array(False) """ return self.reduce( duck_array_ops.array_all, @@ -1019,8 +1202,15 @@ def any( ... ), ... ) >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.any() + + array(True) """ return self.reduce( duck_array_ops.array_any, @@ -1083,12 +1273,21 @@ def max( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.max() + + array(3.) Use ``skipna`` to control whether NaNs are ignored. >>> da.max(skipna=False) + + array(nan) """ return self.reduce( duck_array_ops.max, @@ -1152,12 +1351,21 @@ def min( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.min() + + array(1.) Use ``skipna`` to control whether NaNs are ignored. >>> da.min(skipna=False) + + array(nan) """ return self.reduce( duck_array_ops.min, @@ -1225,12 +1433,21 @@ def mean( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.mean() + + array(1.8) Use ``skipna`` to control whether NaNs are ignored. >>> da.mean(skipna=False) + + array(nan) """ return self.reduce( duck_array_ops.mean, @@ -1305,16 +1522,27 @@ def prod( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.prod() + + array(12.) Use ``skipna`` to control whether NaNs are ignored. >>> da.prod(skipna=False) + + array(nan) Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.prod(skipna=True, min_count=2) + + array(12.) """ return self.reduce( duck_array_ops.prod, @@ -1390,16 +1618,27 @@ def sum( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.sum() + + array(9.) Use ``skipna`` to control whether NaNs are ignored. >>> da.sum(skipna=False) + + array(nan) Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.sum(skipna=True, min_count=2) + + array(9.) """ return self.reduce( duck_array_ops.sum, @@ -1472,16 +1711,27 @@ def std( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.std() + + array(0.74833148) Use ``skipna`` to control whether NaNs are ignored. >>> da.std(skipna=False) + + array(nan) Specify ``ddof=1`` for an unbiased estimate. >>> da.std(skipna=True, ddof=1) + + array(0.83666003) """ return self.reduce( duck_array_ops.std, @@ -1554,16 +1804,27 @@ def var( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.var() + + array(0.56) Use ``skipna`` to control whether NaNs are ignored. >>> da.var(skipna=False) + + array(nan) Specify ``ddof=1`` for an unbiased estimate. >>> da.var(skipna=True, ddof=1) + + array(0.7) """ return self.reduce( duck_array_ops.var, @@ -1632,12 +1893,21 @@ def median( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.median() + + array(2.) Use ``skipna`` to control whether NaNs are ignored. >>> da.median(skipna=False) + + array(nan) """ return self.reduce( duck_array_ops.median, @@ -1712,8 +1982,21 @@ def count( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").count() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) int64 1 2 2 """ return self.reduce( duck_array_ops.count, @@ -1772,8 +2055,21 @@ def all( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").all() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) bool False True True """ return self.reduce( duck_array_ops.array_all, @@ -1832,8 +2128,21 @@ def any( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").any() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) bool True True True """ return self.reduce( duck_array_ops.array_any, @@ -1898,12 +2207,31 @@ def max( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").max() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 1.0 2.0 3.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").max(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 3.0 """ return self.reduce( duck_array_ops.max, @@ -1969,12 +2297,31 @@ def min( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").min() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 1.0 2.0 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").min(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 1.0 """ return self.reduce( duck_array_ops.min, @@ -2044,12 +2391,31 @@ def mean( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").mean() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").mean(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 2.0 """ return self.reduce( duck_array_ops.mean, @@ -2126,16 +2492,41 @@ def prod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").prod() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 1.0 4.0 3.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").prod(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 4.0 3.0 Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.groupby("labels").prod(skipna=True, min_count=2) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 4.0 3.0 """ return self.reduce( duck_array_ops.prod, @@ -2213,16 +2604,41 @@ def sum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").sum() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 1.0 4.0 4.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").sum(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 4.0 4.0 Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.groupby("labels").sum(skipna=True, min_count=2) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 4.0 4.0 """ return self.reduce( duck_array_ops.sum, @@ -2297,16 +2713,41 @@ def std( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").std() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 0.0 0.0 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").std(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 1.0 Specify ``ddof=1`` for an unbiased estimate. >>> ds.groupby("labels").std(skipna=True, ddof=1) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 1.414 """ return self.reduce( duck_array_ops.std, @@ -2381,16 +2822,41 @@ def var( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").var() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 0.0 0.0 1.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").var(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 1.0 Specify ``ddof=1`` for an unbiased estimate. >>> ds.groupby("labels").var(skipna=True, ddof=1) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 2.0 """ return self.reduce( duck_array_ops.var, @@ -2461,12 +2927,31 @@ def median( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").median() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.groupby("labels").median(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 2.0 """ return self.reduce( duck_array_ops.median, @@ -2542,8 +3027,21 @@ def count( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").count() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) int64 1 3 1 """ return self.reduce( duck_array_ops.count, @@ -2602,8 +3100,21 @@ def all( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").all() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) bool True True False """ return self.reduce( duck_array_ops.array_all, @@ -2662,8 +3173,21 @@ def any( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").any() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) bool True True True """ return self.reduce( duck_array_ops.array_any, @@ -2728,12 +3252,31 @@ def max( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").max() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 3.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").max(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 3.0 nan """ return self.reduce( duck_array_ops.max, @@ -2799,12 +3342,31 @@ def min( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").min() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 1.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").min(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 1.0 nan """ return self.reduce( duck_array_ops.min, @@ -2874,12 +3436,31 @@ def mean( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").mean() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").mean(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 2.0 nan """ return self.reduce( duck_array_ops.mean, @@ -2956,16 +3537,41 @@ def prod( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").prod() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 6.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").prod(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 6.0 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.resample(time="3M").prod(skipna=True, min_count=2) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 nan 6.0 nan """ return self.reduce( duck_array_ops.prod, @@ -3043,16 +3649,41 @@ def sum( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").sum() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 6.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").sum(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 6.0 nan Specify ``min_count`` for finer control over when NaNs are ignored. >>> ds.resample(time="3M").sum(skipna=True, min_count=2) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 nan 6.0 nan """ return self.reduce( duck_array_ops.sum, @@ -3127,16 +3758,41 @@ def std( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").std() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 0.0 0.8165 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").std(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 0.0 0.8165 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.resample(time="3M").std(skipna=True, ddof=1) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 nan 1.0 nan """ return self.reduce( duck_array_ops.std, @@ -3211,16 +3867,41 @@ def var( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").var() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 0.0 0.6667 0.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").var(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 0.0 0.6667 nan Specify ``ddof=1`` for an unbiased estimate. >>> ds.resample(time="3M").var(skipna=True, ddof=1) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 nan 1.0 nan """ return self.reduce( duck_array_ops.var, @@ -3291,12 +3972,31 @@ def median( ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").median() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 2.0 2.0 Use ``skipna`` to control whether NaNs are ignored. >>> ds.resample(time="3M").median(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 2.0 nan """ return self.reduce( duck_array_ops.median, @@ -3371,8 +4071,17 @@ def count( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").count() + + array([1, 2, 2]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.count, @@ -3429,8 +4138,17 @@ def all( ... ), ... ) >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").all() + + array([False, True, True]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.array_all, @@ -3487,8 +4205,17 @@ def any( ... ), ... ) >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").any() + + array([ True, True, True]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.array_any, @@ -3551,12 +4278,25 @@ def max( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").max() + + array([1., 2., 3.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").max(skipna=False) + + array([nan, 2., 3.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.max, @@ -3620,12 +4360,25 @@ def min( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").min() + + array([1., 2., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").min(skipna=False) + + array([nan, 2., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.min, @@ -3693,12 +4446,25 @@ def mean( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").mean() + + array([1., 2., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").mean(skipna=False) + + array([nan, 2., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.mean, @@ -3773,16 +4539,33 @@ def prod( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").prod() + + array([1., 4., 3.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").prod(skipna=False) + + array([nan, 4., 3.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.groupby("labels").prod(skipna=True, min_count=2) + + array([nan, 4., 3.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.prod, @@ -3858,16 +4641,33 @@ def sum( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").sum() + + array([1., 4., 4.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").sum(skipna=False) + + array([nan, 4., 4.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.groupby("labels").sum(skipna=True, min_count=2) + + array([nan, 4., 4.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.sum, @@ -3940,16 +4740,33 @@ def std( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").std() + + array([0., 0., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").std(skipna=False) + + array([nan, 0., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Specify ``ddof=1`` for an unbiased estimate. >>> da.groupby("labels").std(skipna=True, ddof=1) + + array([ nan, 0. , 1.41421356]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.std, @@ -4022,16 +4839,33 @@ def var( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").var() + + array([0., 0., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").var(skipna=False) + + array([nan, 0., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Specify ``ddof=1`` for an unbiased estimate. >>> da.groupby("labels").var(skipna=True, ddof=1) + + array([nan, 0., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.var, @@ -4100,12 +4934,25 @@ def median( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").median() + + array([1., 2., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' Use ``skipna`` to control whether NaNs are ignored. >>> da.groupby("labels").median(skipna=False) + + array([nan, 2., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.median, @@ -4179,8 +5026,17 @@ def count( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").count() + + array([1, 3, 1]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.count, @@ -4237,8 +5093,17 @@ def all( ... ), ... ) >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").all() + + array([ True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.array_all, @@ -4295,8 +5160,17 @@ def any( ... ), ... ) >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").any() + + array([ True, True, True]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.array_any, @@ -4359,12 +5233,25 @@ def max( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").max() + + array([1., 3., 2.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").max(skipna=False) + + array([ 1., 3., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.max, @@ -4428,12 +5315,25 @@ def min( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").min() + + array([1., 1., 2.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").min(skipna=False) + + array([ 1., 1., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.min, @@ -4501,12 +5401,25 @@ def mean( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").mean() + + array([1., 2., 2.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").mean(skipna=False) + + array([ 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.mean, @@ -4581,16 +5494,33 @@ def prod( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").prod() + + array([1., 6., 2.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").prod(skipna=False) + + array([ 1., 6., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.resample(time="3M").prod(skipna=True, min_count=2) + + array([nan, 6., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.prod, @@ -4666,16 +5596,33 @@ def sum( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").sum() + + array([1., 6., 2.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").sum(skipna=False) + + array([ 1., 6., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Specify ``min_count`` for finer control over when NaNs are ignored. >>> da.resample(time="3M").sum(skipna=True, min_count=2) + + array([nan, 6., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.sum, @@ -4748,16 +5695,33 @@ def std( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").std() + + array([0. , 0.81649658, 0. ]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").std(skipna=False) + + array([0. , 0.81649658, nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Specify ``ddof=1`` for an unbiased estimate. >>> da.resample(time="3M").std(skipna=True, ddof=1) + + array([nan, 1., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.std, @@ -4830,16 +5794,33 @@ def var( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").var() + + array([0. , 0.66666667, 0. ]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").var(skipna=False) + + array([0. , 0.66666667, nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Specify ``ddof=1`` for an unbiased estimate. >>> da.resample(time="3M").var(skipna=True, ddof=1) + + array([nan, 1., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.var, @@ -4908,12 +5889,25 @@ def median( ... ), ... ) >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").median() + + array([1., 2., 2.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Use ``skipna`` to control whether NaNs are ignored. >>> da.resample(time="3M").median(skipna=False) + + array([ 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.median, From 4f378a3fcbcda32d62c401f6f51065381b07074d Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 22 Nov 2021 11:09:04 -0700 Subject: [PATCH 085/138] Bugfix DataArray resampling. --- xarray/core/_reductions.py | 293 ++++++++++++++++++++++------- xarray/util/generate_reductions.py | 2 +- 2 files changed, 223 insertions(+), 72 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index db403f71cac..ef27413fb5b 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -5520,12 +5520,26 @@ def count( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ - return self.reduce( - duck_array_ops.count, - dim=dim, - keep_attrs=keep_attrs, - **kwargs, - ) + + if ( + flox + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): + return self._flox_reduce( + func="count", + dim=dim, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.count, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) def all( self: DataArrayGroupByReduce, @@ -5587,12 +5601,26 @@ def all( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ - return self.reduce( - duck_array_ops.array_all, - dim=dim, - keep_attrs=keep_attrs, - **kwargs, - ) + + if ( + flox + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): + return self._flox_reduce( + func="all", + dim=dim, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.array_all, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) def any( self: DataArrayGroupByReduce, @@ -5654,12 +5682,26 @@ def any( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ - return self.reduce( - duck_array_ops.array_any, - dim=dim, - keep_attrs=keep_attrs, - **kwargs, - ) + + if ( + flox + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): + return self._flox_reduce( + func="any", + dim=dim, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.array_any, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) def max( self: DataArrayGroupByReduce, @@ -5735,13 +5777,28 @@ def max( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ - return self.reduce( - duck_array_ops.max, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) + + if ( + flox + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): + return self._flox_reduce( + func="max", + dim=dim, + skipna=skipna, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.max, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) def min( self: DataArrayGroupByReduce, @@ -5817,13 +5874,28 @@ def min( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ - return self.reduce( - duck_array_ops.min, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) + + if ( + flox + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): + return self._flox_reduce( + func="min", + dim=dim, + skipna=skipna, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.min, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) def mean( self: DataArrayGroupByReduce, @@ -5903,13 +5975,28 @@ def mean( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ - return self.reduce( - duck_array_ops.mean, - dim=dim, - skipna=skipna, - keep_attrs=keep_attrs, - **kwargs, - ) + + if ( + flox + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): + return self._flox_reduce( + func="mean", + dim=dim, + skipna=skipna, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.mean, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) def prod( self: DataArrayGroupByReduce, @@ -6004,14 +6091,30 @@ def prod( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ - return self.reduce( - duck_array_ops.prod, - dim=dim, - skipna=skipna, - min_count=min_count, - keep_attrs=keep_attrs, - **kwargs, - ) + + if ( + flox + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): + return self._flox_reduce( + func="prod", + dim=dim, + skipna=skipna, + min_count=min_count, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.prod, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) def sum( self: DataArrayGroupByReduce, @@ -6106,14 +6209,30 @@ def sum( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ - return self.reduce( - duck_array_ops.sum, - dim=dim, - skipna=skipna, - min_count=min_count, - keep_attrs=keep_attrs, - **kwargs, - ) + + if ( + flox + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): + return self._flox_reduce( + func="sum", + dim=dim, + skipna=skipna, + min_count=min_count, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.sum, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) def std( self: DataArrayGroupByReduce, @@ -6205,14 +6324,30 @@ def std( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ - return self.reduce( - duck_array_ops.std, - dim=dim, - skipna=skipna, - ddof=ddof, - keep_attrs=keep_attrs, - **kwargs, - ) + + if ( + flox + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): + return self._flox_reduce( + func="std", + dim=dim, + skipna=skipna, + ddof=ddof, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.std, + dim=dim, + skipna=skipna, + ddof=ddof, + keep_attrs=keep_attrs, + **kwargs, + ) def var( self: DataArrayGroupByReduce, @@ -6304,14 +6439,30 @@ def var( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ - return self.reduce( - duck_array_ops.var, - dim=dim, - skipna=skipna, - ddof=ddof, - keep_attrs=keep_attrs, - **kwargs, - ) + + if ( + flox + and OPTIONS["use_numpy_groupies"] + and contains_only_dask_or_numpy(self._obj) + ): + return self._flox_reduce( + func="var", + dim=dim, + skipna=skipna, + ddof=ddof, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + ddof=ddof, + keep_attrs=keep_attrs, + **kwargs, + ) def median( self: DataArrayGroupByReduce, diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index ab4f3acea7f..c25c0605a11 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -442,7 +442,7 @@ class DataStructure: example_call_preamble='.groupby("labels")', self_type="DataArrayGroupByReduce", ) -DataArrayResampleGenerator = GenericReductionGenerator( +DataArrayResampleGenerator = GroupByReductionGenerator( cls="Resample", datastructure=DataArrayObject, methods=REDUCTION_METHODS, From 6916fa7debfe4ca5c5ce9796fe5fe3243d6c4d2a Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 22 Nov 2021 11:16:43 -0700 Subject: [PATCH 086/138] Update xarray/util/generate_reductions.py Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> --- xarray/util/generate_reductions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index fbf2d82dd2c..3a7f35ab567 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -44,7 +44,7 @@ def reduce( keepdims: bool = False, **kwargs: Any, ) -> "{obj}": - return NotImplemented""" + raise NotImplementedError()""" TEMPLATE_REDUCTION_SIGNATURE = ''' def {method}( From af03ca45ed4c54b867bacf4359df565b3878c220 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 25 Nov 2021 20:52:03 -0700 Subject: [PATCH 087/138] Small improvement to resampling --- xarray/core/groupby.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 80b6360c0dc..7f8f9802b59 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -536,6 +536,7 @@ def _maybe_unstack(self, obj): def _flox_reduce(self, dim, **kwargs): from flox.xarray import xarray_reduce + from .dataarray import DataArray from .dataset import Dataset # TODO: fix this @@ -555,19 +556,17 @@ def _flox_reduce(self, dim, **kwargs): # TODO: only do this for resample, not general groupers... # this creates a label DataArray since resample doesn't do that somehow if isinstance(self._group_indices[0], slice): - from .dataarray import DataArray - - tostack = [] - for idx, slicer in zip(self._unique_coord.data, self._group_indices): - if slicer.stop is None: - stop = self._obj.sizes[self._group_dim] - else: - stop = slicer.stop - tostack.append(np.full((stop - slicer.start,), fill_value=idx)) + repeats = [] + for slicer in self._group_indices: + stop = ( + slicer.stop + if slicer.stop is not None + else self._obj.sizes[self._group_dim] + ) + repeats.append(stop - slicer.start) + labels = np.repeat(self._unique_coord.data, repeats) group = DataArray( - np.hstack(tostack), - dims=(self._group_dim,), - name=self._unique_coord.name, + labels, dims=(self._group_dim,), name=self._unique_coord.name ) else: if isinstance(self._unstacked_group, _DummyGroup): @@ -603,7 +602,7 @@ def _flox_reduce(self, dim, **kwargs): if self._bins is not None: # bins provided to dask_groupby are at full precision - # the bin edge labels a default precision of 3 + # the bin edge labels have a default precision of 3 # reassign to fix that. new_coord = [ pd.Interval(inter.left, inter.right) for inter in self._full_index From cfd2c071cf8df984e5bc4c673abc84e17882d323 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 25 Nov 2021 21:02:44 -0700 Subject: [PATCH 088/138] minimize conflicts --- xarray/util/generate_reductions.py | 37 ++++++++++-------------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index c25c0605a11..5d4aa2145e1 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -20,19 +20,16 @@ """Mixin classes with reduction operations.""" # This file was generated using xarray.util.generate_reductions. Do not edit manually. -import sys -from typing import Any, Callable, Hashable, Optional, Sequence, Union +from typing import TYPE_CHECKING, Any, Callable, Hashable, Optional, Sequence, Union from . import duck_array_ops from .options import OPTIONS from .types import T_DataArray, T_Dataset from .utils import contains_only_dask_or_numpy -if sys.version_info >= (3, 8): - from typing import Protocol -else: - from typing_extensions import Protocol - +if TYPE_CHECKING: + from .dataarray import DataArray + from .dataset import Dataset try: import flox @@ -41,7 +38,7 @@ OBJ_PREAMBLE = """ -class {obj}Reduce(Protocol): +class {obj}Reductions(): def reduce( self, func: Callable[..., Any], @@ -50,12 +47,12 @@ def reduce( keep_attrs: bool = None, keepdims: bool = False, **kwargs: Any, - ) -> T_{obj}: + ) -> "{obj}": ... -class {obj}GroupByReduce(Protocol): - _obj: T_{obj} +class {obj}GroupByReductions(): + _obj: "{obj}" def reduce( self, @@ -65,14 +62,14 @@ def reduce( keep_attrs: bool = None, keepdims: bool = False, **kwargs: Any, - ) -> T_{obj}: + ) -> "{obj}": ... def _flox_reduce( self, dim: Union[None, Hashable, Sequence[Hashable]], **kwargs, - ) -> T_{obj}: + ) -> "{obj}": ...""" @@ -83,11 +80,11 @@ class {obj}{cls}Reductions: TEMPLATE_REDUCTION_SIGNATURE = ''' def {method}( - self: {self_type}, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None,{extra_kwargs} keep_attrs: bool = None, **kwargs, - ) -> T_{obj}: + ) -> "{obj}": """ Reduce this {obj}'s data by applying ``{method}`` along some dimension(s). @@ -212,7 +209,6 @@ def __init__( self, cls, datastructure, - self_type, methods, docref, docref_description, @@ -220,7 +216,6 @@ def __init__( see_also_obj=None, ): self.datastructure = datastructure - self.self_type = self_type self.cls = cls self.methods = methods self.docref = docref @@ -420,7 +415,6 @@ class DataStructure: docref_description="reduction or aggregation operations", example_call_preamble="", see_also_obj="DataArray", - self_type="DatasetReduce", ) DataArrayGenerator = GenericReductionGenerator( cls="", @@ -430,7 +424,6 @@ class DataStructure: docref_description="reduction or aggregation operations", example_call_preamble="", see_also_obj="Dataset", - self_type="DataArrayReduce", ) DataArrayGroupByGenerator = GroupByReductionGenerator( @@ -440,7 +433,6 @@ class DataStructure: docref="groupby", docref_description="groupby operations", example_call_preamble='.groupby("labels")', - self_type="DataArrayGroupByReduce", ) DataArrayResampleGenerator = GroupByReductionGenerator( cls="Resample", @@ -449,7 +441,6 @@ class DataStructure: docref="resampling", docref_description="resampling operations", example_call_preamble='.resample(time="3M")', - self_type="DataArrayGroupByReduce", ) DatasetGroupByGenerator = GroupByReductionGenerator( cls="GroupBy", @@ -458,7 +449,6 @@ class DataStructure: docref="groupby", docref_description="groupby operations", example_call_preamble='.groupby("labels")', - self_type="DatasetGroupByReduce", ) DatasetResampleGenerator = GroupByReductionGenerator( cls="Resample", @@ -467,14 +457,11 @@ class DataStructure: docref="resampling", docref_description="resampling operations", example_call_preamble='.resample(time="3M")', - self_type="DatasetGroupByReduce", ) if __name__ == "__main__": print(MODULE_PREAMBLE) - print(OBJ_PREAMBLE.format(obj="Dataset")) - print(OBJ_PREAMBLE.format(obj="DataArray")) for gen in [ DatasetGenerator, DataArrayGenerator, From 3c51b1a3ef4f6ab285886c478070334284905353 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 25 Nov 2021 21:35:59 -0700 Subject: [PATCH 089/138] Squash merge #5950 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Squashed commit of the following: commit 6916fa7debfe4ca5c5ce9796fe5fe3243d6c4d2a Author: Deepak Cherian Date: Mon Nov 22 11:16:43 2021 -0700 Update xarray/util/generate_reductions.py Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> commit cd8a898d5b003ea28ec8f3feacb56d76b6dc1096 Author: dcherian Date: Sat Nov 20 14:37:17 2021 -0700 add doctests commit 19d82cddf0a4ac30811803de9f7e70a881d52ea0 Author: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat Nov 20 22:00:29 2021 +0100 more reduce commit 0f94bec2953aa3e7eadd0c4efc25cd6111b7e663 Author: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat Nov 20 20:48:27 2021 +0100 another reduce commit be33560a14ac4b9379e5d0ff4f340cfbd6d552f1 Author: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat Nov 20 20:28:39 2021 +0100 one more reduce commit 3d854e52055de0f53f4ba16b0713ac581611ef94 Author: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat Nov 20 20:21:26 2021 +0100 more reduce edits commit 2bbddafaacf46f65c950738a9db3cbaddb198763 Author: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat Nov 20 20:12:31 2021 +0100 make reduce args consistent commit dfbe103c3425ba4d8aa91095ec5b7386fb785225 Merge: f03b67592 dd28a57f6 Author: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat Nov 20 19:01:59 2021 +0100 Merge branch 'generate-reductions-class' of https://github.com/dcherian/xarray into pr/5950 commit f03b67592cbff91172a50213cf0f9621062114cb Merge: 411d75d5c 7a201de64 Author: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat Nov 20 19:01:42 2021 +0100 Merge branch 'main' into pr/5950 commit dd28a57f66188db47a05fad184519295d688213d Author: dcherian Date: Sat Nov 20 10:57:22 2021 -0700 updates commit 6a9a1240aa95d71ef2081f6e98152981f9db336d Author: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat Nov 20 17:02:07 2021 +0000 [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci commit 411d75d5ced18349968c060918e9bdcd4be04537 Author: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat Nov 20 18:00:08 2021 +0100 Now get normal code running as well Protocols are not needed anymore when subclassing/defining directly in the class. When adding a dummy method in DatasetResampleReductions the order of subclassing had to be changed so the correct reduce was used. commit 5dcb5bfebe04302beb732259f3e805bd31691ed8 Author: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sat Nov 20 12:30:50 2021 +0100 Attempt fixing typing errors Mixing in DatasetReduce fixes: xarray/tests/test_groupby.py:460: error: Invalid self argument "Dataset" to attribute function "mean" with type "Callable[[DatasetReduce, Optional[Hashable], Optional[bool], Optional[bool], KwArg(Any)], T_Dataset]" [misc] Switching to "Dateset" as returned type fixes: xarray/tests/test_groupby.py:77: error: Need type annotation for "expected" [var-annotated] commit 7a201de643515aec9a0c88dc78253499528fa99f Author: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri Nov 19 11:37:20 2021 -0700 [pre-commit.ci] pre-commit autoupdate (#5990) commit 95394d5bcbd7d73bae34c091a080c42bcfc9f07d Author: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon Nov 15 21:40:37 2021 +0100 Use set_options for asv bottleneck tests (#5986) * Use set_options for bottleneck tests * Use set_options in rolling * Update rolling.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update rolling.py * Update rolling.py * set_options not needed. Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> commit b2d7cd8837ea9b3e7e0eb0390479a1986f62d4b4 Author: Kai Mühlbauer Date: Mon Nov 15 18:33:43 2021 +0100 Fix module name retrieval in `backend.plugins.remove_duplicates()`, plugin tests (#5959) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> commit c7e9d9647f2c7df7e5b14644926dc2126ad4318f Author: dcherian Date: Wed Nov 10 11:49:47 2021 -0700 Minor improvement commit dea8fd9f326a543807c26b6a62e84b28b5cb4cc3 Author: dcherian Date: Mon Nov 8 16:18:07 2021 -0700 REfactor commit 9bb2c321e8df2f5978c40a1c3c8f891c77e847ff Author: dcherian Date: Mon Nov 8 13:56:53 2021 -0700 Reorder docstring to match numpy commit 99bfe128066ec3ef1b297650a47e2dd0a45801a8 Author: dcherian Date: Mon Nov 8 12:44:23 2021 -0700 Fixes #5898 commit 7f39cc0d8c664e3fcf354536ed3a95882064b4b6 Author: dcherian Date: Mon Nov 8 12:39:00 2021 -0700 Minor docstring improvements. commit a04ed824a55b757937a4db4aa65729dccf62c1a7 Author: dcherian Date: Mon Nov 8 12:35:48 2021 -0700 Small changes commit 816e7941e47b14280103d5d10da94139f394c0cd Author: dcherian Date: Sun Nov 7 20:56:37 2021 -0700 Generate DataArray, Dataset reductions too. commit 569c67f28b3e7ff4c475793325a4388220932d02 Author: dcherian Date: Sun Nov 7 20:54:42 2021 -0700 Add ddof for var, std commit 6b9a81a6fbe3ba460d905f0e92105d8e25af3ebb Author: dcherian Date: Sun Nov 7 20:35:52 2021 -0700 Better generator for reductions. --- .pre-commit-config.yaml | 8 +- asv_bench/asv.conf.json | 2 +- asv_bench/benchmarks/dataarray_missing.py | 8 - asv_bench/benchmarks/rolling.py | 92 +++-- doc/user-guide/computation.rst | 2 + doc/whats-new.rst | 6 + xarray/backends/plugins.py | 10 +- xarray/core/_reductions.py | 440 ++++++++++++---------- xarray/core/arithmetic.py | 2 - xarray/core/common.py | 16 +- xarray/core/dataarray.py | 6 +- xarray/core/dataset.py | 6 +- xarray/core/groupby.py | 46 ++- xarray/core/resample.py | 25 +- xarray/tests/test_duck_array_ops.py | 81 ---- xarray/tests/test_plugins.py | 54 +-- xarray/util/generate_reductions.py | 35 +- 17 files changed, 438 insertions(+), 401 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8c9b61a7364..6db093b2dbb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,12 +8,12 @@ repos: - id: check-yaml # isort should run before black as black sometimes tweaks the isort output - repo: https://github.com/PyCQA/isort - rev: 5.9.3 + rev: 5.10.1 hooks: - id: isort # https://github.com/python/black#version-control-integration - repo: https://github.com/psf/black - rev: 21.9b0 + rev: 21.10b0 hooks: - id: black - id: black-jupyter @@ -22,8 +22,8 @@ repos: hooks: - id: blackdoc exclude: "generate_reductions.py" - - repo: https://gitlab.com/pycqa/flake8 - rev: 3.9.2 + - repo: https://github.com/PyCQA/flake8 + rev: 4.0.1 hooks: - id: flake8 # - repo: https://github.com/Carreau/velin diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index 0d9ce0d51a3..26738e2d357 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -62,7 +62,7 @@ "pandas": [""], "netcdf4": [""], "scipy": [""], - "bottleneck": ["", null], + "bottleneck": [""], "dask": [""], "distributed": [""], "flox": [""], diff --git a/asv_bench/benchmarks/dataarray_missing.py b/asv_bench/benchmarks/dataarray_missing.py index f89fe7f8eb9..d786c04e852 100644 --- a/asv_bench/benchmarks/dataarray_missing.py +++ b/asv_bench/benchmarks/dataarray_missing.py @@ -16,13 +16,6 @@ def make_bench_data(shape, frac_nan, chunks): return da -def requires_bottleneck(): - try: - import bottleneck # noqa: F401 - except ImportError: - raise NotImplementedError() - - class DataArrayMissingInterpolateNA: def setup(self, shape, chunks, limit): if chunks is not None: @@ -46,7 +39,6 @@ def time_interpolate_na(self, shape, chunks, limit): class DataArrayMissingBottleneck: def setup(self, shape, chunks, limit): - requires_bottleneck() if chunks is not None: requires_dask() self.da = make_bench_data(shape, 0.1, chunks) diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py index f0e18bf2153..1d3713f19bf 100644 --- a/asv_bench/benchmarks/rolling.py +++ b/asv_bench/benchmarks/rolling.py @@ -36,29 +36,45 @@ def setup(self, *args, **kwargs): randn_long, dims="x", coords={"x": np.arange(long_nx) * 0.1} ) - @parameterized(["func", "center"], (["mean", "count"], [True, False])) - def time_rolling(self, func, center): - getattr(self.ds.rolling(x=window, center=center), func)().load() - - @parameterized(["func", "pandas"], (["mean", "count"], [True, False])) - def time_rolling_long(self, func, pandas): + @parameterized( + ["func", "center", "use_bottleneck"], + (["mean", "count"], [True, False], [True, False]), + ) + def time_rolling(self, func, center, use_bottleneck): + with xr.set_options(use_bottleneck=use_bottleneck): + getattr(self.ds.rolling(x=window, center=center), func)().load() + + @parameterized( + ["func", "pandas", "use_bottleneck"], + (["mean", "count"], [True, False], [True, False]), + ) + def time_rolling_long(self, func, pandas, use_bottleneck): if pandas: se = self.da_long.to_series() getattr(se.rolling(window=window, min_periods=window), func)() else: - getattr(self.da_long.rolling(x=window, min_periods=window), func)().load() - - @parameterized(["window_", "min_periods"], ([20, 40], [5, 5])) - def time_rolling_np(self, window_, min_periods): - self.ds.rolling(x=window_, center=False, min_periods=min_periods).reduce( - getattr(np, "nansum") - ).load() - - @parameterized(["center", "stride"], ([True, False], [1, 1])) - def time_rolling_construct(self, center, stride): - self.ds.rolling(x=window, center=center).construct( - "window_dim", stride=stride - ).sum(dim="window_dim").load() + with xr.set_options(use_bottleneck=use_bottleneck): + getattr( + self.da_long.rolling(x=window, min_periods=window), func + )().load() + + @parameterized( + ["window_", "min_periods", "use_bottleneck"], ([20, 40], [5, 5], [True, False]) + ) + def time_rolling_np(self, window_, min_periods, use_bottleneck): + with xr.set_options(use_bottleneck=use_bottleneck): + self.ds.rolling(x=window_, center=False, min_periods=min_periods).reduce( + getattr(np, "nansum") + ).load() + + @parameterized( + ["center", "stride", "use_bottleneck"], ([True, False], [1, 1], [True, False]) + ) + def time_rolling_construct(self, center, stride, use_bottleneck): + with xr.set_options(use_bottleneck=use_bottleneck): + self.ds.rolling(x=window, center=center).construct( + "window_dim", stride=stride + ).sum(dim="window_dim").load() class RollingDask(Rolling): @@ -87,24 +103,28 @@ def setup(self, *args, **kwargs): class DataArrayRollingMemory(RollingMemory): - @parameterized("func", ["sum", "max", "mean"]) - def peakmem_ndrolling_reduce(self, func): - roll = self.ds.var1.rolling(x=10, y=4) - getattr(roll, func)() + @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False])) + def peakmem_ndrolling_reduce(self, func, use_bottleneck): + with xr.set_options(use_bottleneck=use_bottleneck): + roll = self.ds.var1.rolling(x=10, y=4) + getattr(roll, func)() - @parameterized("func", ["sum", "max", "mean"]) - def peakmem_1drolling_reduce(self, func): - roll = self.ds.var3.rolling(t=100) - getattr(roll, func)() + @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False])) + def peakmem_1drolling_reduce(self, func, use_bottleneck): + with xr.set_options(use_bottleneck=use_bottleneck): + roll = self.ds.var3.rolling(t=100) + getattr(roll, func)() class DatasetRollingMemory(RollingMemory): - @parameterized("func", ["sum", "max", "mean"]) - def peakmem_ndrolling_reduce(self, func): - roll = self.ds.rolling(x=10, y=4) - getattr(roll, func)() - - @parameterized("func", ["sum", "max", "mean"]) - def peakmem_1drolling_reduce(self, func): - roll = self.ds.rolling(t=100) - getattr(roll, func)() + @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False])) + def peakmem_ndrolling_reduce(self, func, use_bottleneck): + with xr.set_options(use_bottleneck=use_bottleneck): + roll = self.ds.rolling(x=10, y=4) + getattr(roll, func)() + + @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False])) + def peakmem_1drolling_reduce(self, func, use_bottleneck): + with xr.set_options(use_bottleneck=use_bottleneck): + roll = self.ds.rolling(t=100) + getattr(roll, func)() diff --git a/doc/user-guide/computation.rst b/doc/user-guide/computation.rst index fc3c457308f..a4ba606feeb 100644 --- a/doc/user-guide/computation.rst +++ b/doc/user-guide/computation.rst @@ -107,6 +107,8 @@ Xarray also provides the ``max_gap`` keyword argument to limit the interpolation data gaps of length ``max_gap`` or smaller. See :py:meth:`~xarray.DataArray.interpolate_na` for more. +.. _agg: + Aggregation =========== diff --git a/doc/whats-new.rst b/doc/whats-new.rst index f499cbe3d21..b66c99d0bcb 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -36,6 +36,8 @@ Bug fixes ~~~~~~~~~ - Fix plot.line crash for data of shape ``(1, N)`` in _title_for_slice on format_item (:pull:`5948`). By `Sebastian Weigand `_. +- Fix a regression in the removal of duplicate backend entrypoints (:issue:`5944`, :pull:`5959`) + By `Kai Mühlbauer `_. Documentation ~~~~~~~~~~~~~ @@ -49,6 +51,10 @@ Documentation Internal Changes ~~~~~~~~~~~~~~~~ +- Use ``importlib`` to replace functionality of ``pkg_resources`` in + backend plugins tests. (:pull:`5959`). + By `Kai Mühlbauer `_. + .. _whats-new.0.20.1: diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py index 32013f1f298..0a9ffcbda22 100644 --- a/xarray/backends/plugins.py +++ b/xarray/backends/plugins.py @@ -23,15 +23,17 @@ def remove_duplicates(entrypoints): # check if there are multiple entrypoints for the same name unique_entrypoints = [] for name, matches in entrypoints_grouped: - matches = list(matches) + # remove equal entrypoints + matches = list(set(matches)) unique_entrypoints.append(matches[0]) matches_len = len(matches) if matches_len > 1: - selected_module_name = matches[0].module_name - all_module_names = [e.module_name for e in matches] + all_module_names = [e.value.split(":")[0] for e in matches] + selected_module_name = all_module_names[0] warnings.warn( f"Found {matches_len} entrypoints for the engine name {name}:" - f"\n {all_module_names}.\n It will be used: {selected_module_name}.", + f"\n {all_module_names}.\n " + f"The entrypoint {selected_module_name} will be used.", RuntimeWarning, ) return unique_entrypoints diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index ef27413fb5b..c56e76cf5d3 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -1,19 +1,15 @@ """Mixin classes with reduction operations.""" # This file was generated using xarray.util.generate_reductions. Do not edit manually. -import sys -from typing import Any, Callable, Hashable, Optional, Sequence, Union +from typing import TYPE_CHECKING, Any, Callable, Hashable, Optional, Sequence, Union from . import duck_array_ops from .options import OPTIONS -from .types import T_DataArray, T_Dataset from .utils import contains_only_dask_or_numpy -if sys.version_info >= (3, 8): - from typing import Protocol -else: - from typing_extensions import Protocol - +if TYPE_CHECKING: + from .dataarray import DataArray + from .dataset import Dataset try: import flox @@ -21,85 +17,27 @@ flox = None -class DatasetReduce(Protocol): - def reduce( - self, - func: Callable[..., Any], - dim: Union[None, Hashable, Sequence[Hashable]] = None, - axis: Union[None, int, Sequence[int]] = None, - keep_attrs: bool = None, - keepdims: bool = False, - **kwargs: Any, - ) -> T_Dataset: - ... - - -class DatasetGroupByReduce(Protocol): - _obj: T_Dataset - - def reduce( - self, - func: Callable[..., Any], - dim: Union[None, Hashable, Sequence[Hashable]] = None, - axis: Union[None, int, Sequence[int]] = None, - keep_attrs: bool = None, - keepdims: bool = False, - **kwargs: Any, - ) -> T_Dataset: - ... - - def _flox_reduce( - self, - dim: Union[None, Hashable, Sequence[Hashable]], - **kwargs, - ) -> T_Dataset: - ... - - -class DataArrayReduce(Protocol): - def reduce( - self, - func: Callable[..., Any], - dim: Union[None, Hashable, Sequence[Hashable]] = None, - axis: Union[None, int, Sequence[int]] = None, - keep_attrs: bool = None, - keepdims: bool = False, - **kwargs: Any, - ) -> T_DataArray: - ... - - -class DataArrayGroupByReduce(Protocol): - _obj: T_DataArray +class DatasetReductions: + __slots__ = () def reduce( self, func: Callable[..., Any], dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, axis: Union[None, int, Sequence[int]] = None, keep_attrs: bool = None, keepdims: bool = False, **kwargs: Any, - ) -> T_DataArray: - ... - - def _flox_reduce( - self, - dim: Union[None, Hashable, Sequence[Hashable]], - **kwargs, - ) -> T_DataArray: - ... - - -class DatasetReductions: - __slots__ = () + ) -> "Dataset": + raise NotImplementedError() def count( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``count`` along some dimension(s). @@ -166,11 +104,11 @@ def count( ) def all( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``all`` along some dimension(s). @@ -237,11 +175,11 @@ def all( ) def any( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``any`` along some dimension(s). @@ -308,12 +246,12 @@ def any( ) def max( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``max`` along some dimension(s). @@ -394,12 +332,12 @@ def max( ) def min( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``min`` along some dimension(s). @@ -480,12 +418,12 @@ def min( ) def mean( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``mean`` along some dimension(s). @@ -570,13 +508,13 @@ def mean( ) def prod( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``prod`` along some dimension(s). @@ -676,13 +614,13 @@ def prod( ) def sum( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``sum`` along some dimension(s). @@ -782,13 +720,13 @@ def sum( ) def std( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``std`` along some dimension(s). @@ -885,13 +823,13 @@ def std( ) def var( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``var`` along some dimension(s). @@ -988,12 +926,12 @@ def var( ) def median( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``median`` along some dimension(s). @@ -1081,12 +1019,24 @@ def median( class DataArrayReductions: __slots__ = () + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> "DataArray": + raise NotImplementedError() + def count( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``count`` along some dimension(s). @@ -1147,11 +1097,11 @@ def count( ) def all( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``all`` along some dimension(s). @@ -1212,11 +1162,11 @@ def all( ) def any( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``any`` along some dimension(s). @@ -1277,12 +1227,12 @@ def any( ) def max( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``max`` along some dimension(s). @@ -1355,12 +1305,12 @@ def max( ) def min( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``min`` along some dimension(s). @@ -1433,12 +1383,12 @@ def min( ) def mean( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``mean`` along some dimension(s). @@ -1515,13 +1465,13 @@ def mean( ) def prod( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``prod`` along some dimension(s). @@ -1611,13 +1561,13 @@ def prod( ) def sum( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``sum`` along some dimension(s). @@ -1707,13 +1657,13 @@ def sum( ) def std( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``std`` along some dimension(s). @@ -1800,13 +1750,13 @@ def std( ) def var( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``var`` along some dimension(s). @@ -1893,12 +1843,12 @@ def var( ) def median( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``median`` along some dimension(s). @@ -1976,14 +1926,33 @@ def median( class DatasetGroupByReductions: - __slots__ = () + _obj: "Dataset" + + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> "Dataset": + raise NotImplementedError() + + def _flox_reduce( + self, + dim: Union[None, Hashable, Sequence[Hashable]], + **kwargs, + ) -> "Dataset": + raise NotImplementedError() def count( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``count`` along some dimension(s). @@ -2067,11 +2036,11 @@ def count( ) def all( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``all`` along some dimension(s). @@ -2155,11 +2124,11 @@ def all( ) def any( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``any`` along some dimension(s). @@ -2243,12 +2212,12 @@ def any( ) def max( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``max`` along some dimension(s). @@ -2349,12 +2318,12 @@ def max( ) def min( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``min`` along some dimension(s). @@ -2455,12 +2424,12 @@ def min( ) def mean( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``mean`` along some dimension(s). @@ -2565,13 +2534,13 @@ def mean( ) def prod( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``prod`` along some dimension(s). @@ -2694,13 +2663,13 @@ def prod( ) def sum( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``sum`` along some dimension(s). @@ -2823,13 +2792,13 @@ def sum( ) def std( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``std`` along some dimension(s). @@ -2949,13 +2918,13 @@ def std( ) def var( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``var`` along some dimension(s). @@ -3075,12 +3044,12 @@ def var( ) def median( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``median`` along some dimension(s). @@ -3170,14 +3139,33 @@ def median( class DatasetResampleReductions: - __slots__ = () + _obj: "Dataset" + + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> "Dataset": + raise NotImplementedError() + + def _flox_reduce( + self, + dim: Union[None, Hashable, Sequence[Hashable]], + **kwargs, + ) -> "Dataset": + raise NotImplementedError() def count( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``count`` along some dimension(s). @@ -3261,11 +3249,11 @@ def count( ) def all( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``all`` along some dimension(s). @@ -3349,11 +3337,11 @@ def all( ) def any( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``any`` along some dimension(s). @@ -3437,12 +3425,12 @@ def any( ) def max( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``max`` along some dimension(s). @@ -3543,12 +3531,12 @@ def max( ) def min( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``min`` along some dimension(s). @@ -3649,12 +3637,12 @@ def min( ) def mean( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``mean`` along some dimension(s). @@ -3759,13 +3747,13 @@ def mean( ) def prod( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``prod`` along some dimension(s). @@ -3888,13 +3876,13 @@ def prod( ) def sum( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``sum`` along some dimension(s). @@ -4017,13 +4005,13 @@ def sum( ) def std( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``std`` along some dimension(s). @@ -4143,13 +4131,13 @@ def std( ) def var( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``var`` along some dimension(s). @@ -4269,12 +4257,12 @@ def var( ) def median( - self: DatasetGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``median`` along some dimension(s). @@ -4364,14 +4352,33 @@ def median( class DataArrayGroupByReductions: - __slots__ = () + _obj: "DataArray" + + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> "DataArray": + raise NotImplementedError() + + def _flox_reduce( + self, + dim: Union[None, Hashable, Sequence[Hashable]], + **kwargs, + ) -> "DataArray": + raise NotImplementedError() def count( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``count`` along some dimension(s). @@ -4448,11 +4455,11 @@ def count( ) def all( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``all`` along some dimension(s). @@ -4529,11 +4536,11 @@ def all( ) def any( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``any`` along some dimension(s). @@ -4610,12 +4617,12 @@ def any( ) def max( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``max`` along some dimension(s). @@ -4707,12 +4714,12 @@ def max( ) def min( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``min`` along some dimension(s). @@ -4804,12 +4811,12 @@ def min( ) def mean( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``mean`` along some dimension(s). @@ -4905,13 +4912,13 @@ def mean( ) def prod( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``prod`` along some dimension(s). @@ -5023,13 +5030,13 @@ def prod( ) def sum( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``sum`` along some dimension(s). @@ -5141,13 +5148,13 @@ def sum( ) def std( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``std`` along some dimension(s). @@ -5256,13 +5263,13 @@ def std( ) def var( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``var`` along some dimension(s). @@ -5371,12 +5378,12 @@ def var( ) def median( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``median`` along some dimension(s). @@ -5458,14 +5465,33 @@ def median( class DataArrayResampleReductions: - __slots__ = () + _obj: "DataArray" + + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> "DataArray": + raise NotImplementedError() + + def _flox_reduce( + self, + dim: Union[None, Hashable, Sequence[Hashable]], + **kwargs, + ) -> "DataArray": + raise NotImplementedError() def count( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``count`` along some dimension(s). @@ -5542,11 +5568,11 @@ def count( ) def all( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``all`` along some dimension(s). @@ -5623,11 +5649,11 @@ def all( ) def any( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``any`` along some dimension(s). @@ -5704,12 +5730,12 @@ def any( ) def max( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``max`` along some dimension(s). @@ -5801,12 +5827,12 @@ def max( ) def min( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``min`` along some dimension(s). @@ -5898,12 +5924,12 @@ def min( ) def mean( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``mean`` along some dimension(s). @@ -5999,13 +6025,13 @@ def mean( ) def prod( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``prod`` along some dimension(s). @@ -6117,13 +6143,13 @@ def prod( ) def sum( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``sum`` along some dimension(s). @@ -6235,13 +6261,13 @@ def sum( ) def std( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``std`` along some dimension(s). @@ -6350,13 +6376,13 @@ def std( ) def var( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``var`` along some dimension(s). @@ -6465,12 +6491,12 @@ def var( ) def median( - self: DataArrayGroupByReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``median`` along some dimension(s). diff --git a/xarray/core/arithmetic.py b/xarray/core/arithmetic.py index 814e9a59877..bf8d6ccaeb6 100644 --- a/xarray/core/arithmetic.py +++ b/xarray/core/arithmetic.py @@ -105,7 +105,6 @@ class VariableArithmetic( class DatasetArithmetic( ImplementsDatasetReduce, - IncludeReduceMethods, IncludeCumMethods, SupportsArithmetic, DatasetOpsMixin, @@ -116,7 +115,6 @@ class DatasetArithmetic( class DataArrayArithmetic( ImplementsArrayReduce, - IncludeReduceMethods, IncludeCumMethods, IncludeNumpySameMethods, SupportsArithmetic, diff --git a/xarray/core/common.py b/xarray/core/common.py index b5dc3bf0e20..2300f3dd8f5 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -60,12 +60,14 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool if include_skipna: def wrapped_func(self, dim=None, axis=None, skipna=None, **kwargs): - return self.reduce(func, dim, axis, skipna=skipna, **kwargs) + return self.reduce( + func=func, dim=dim, axis=axis, skipna=skipna, **kwargs + ) else: def wrapped_func(self, dim=None, axis=None, **kwargs): # type: ignore[misc] - return self.reduce(func, dim, axis, **kwargs) + return self.reduce(func=func, dim=dim, axis=axis, **kwargs) return wrapped_func @@ -98,13 +100,19 @@ def _reduce_method(cls, func: Callable, include_skipna: bool, numeric_only: bool def wrapped_func(self, dim=None, skipna=None, **kwargs): return self.reduce( - func, dim, skipna=skipna, numeric_only=numeric_only, **kwargs + func=func, + dim=dim, + skipna=skipna, + numeric_only=numeric_only, + **kwargs, ) else: def wrapped_func(self, dim=None, **kwargs): # type: ignore[misc] - return self.reduce(func, dim, numeric_only=numeric_only, **kwargs) + return self.reduce( + func=func, dim=dim, numeric_only=numeric_only, **kwargs + ) return wrapped_func diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 24e5f5736b0..1b96f22b744 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -35,6 +35,7 @@ utils, weighted, ) +from ._reductions import DataArrayReductions from .accessor_dt import CombinedDatetimelikeAccessor from .accessor_str import StringAccessor from .alignment import ( @@ -215,7 +216,9 @@ def __setitem__(self, key, value) -> None: _THIS_ARRAY = ReprObject("") -class DataArray(AbstractArray, DataWithCoords, DataArrayArithmetic): +class DataArray( + AbstractArray, DataWithCoords, DataArrayArithmetic, DataArrayReductions +): """N-dimensional array with labeled coordinates and dimensions. DataArray provides a wrapper around numpy ndarrays that uses @@ -2652,6 +2655,7 @@ def reduce( self, func: Callable[..., Any], dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, axis: Union[None, int, Sequence[int]] = None, keep_attrs: bool = None, keepdims: bool = False, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index e882495dce5..cf52fed6974 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -50,6 +50,7 @@ utils, weighted, ) +from ._reductions import DatasetReductions from .alignment import _broadcast_helper, _get_broadcast_dims_map_common_coords, align from .arithmetic import DatasetArithmetic from .common import DataWithCoords, _contains_datetime_like_objects, get_chunksizes @@ -574,7 +575,7 @@ def __setitem__(self, key, value) -> None: self.dataset[pos_indexers] = value -class Dataset(DataWithCoords, DatasetArithmetic, Mapping): +class Dataset(DataWithCoords, DatasetReductions, DatasetArithmetic, Mapping): """A multi-dimensional, in memory, array database. A dataset resembles an in-memory representation of a NetCDF file, @@ -4999,6 +5000,7 @@ def reduce( self, func: Callable, dim: Union[Hashable, Iterable[Hashable]] = None, + *, keep_attrs: bool = None, keepdims: bool = False, numeric_only: bool = False, @@ -5034,7 +5036,7 @@ def reduce( Dataset with this object's DataArrays replaced with new DataArrays of summarized data and the indicated dimension(s) removed. """ - if "axis" in kwargs: + if kwargs.get("axis", None) is not None: raise ValueError( "passing 'axis' to Dataset reduce methods is ambiguous." " Please use 'dim' instead." diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 7f8f9802b59..8c0bde3a4f9 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -1,5 +1,6 @@ import datetime import warnings +from typing import Any, Callable, Hashable, Sequence, Union import numpy as np import pandas as pd @@ -932,7 +933,15 @@ def _combine(self, applied, shortcut=False): return combined def reduce( - self, func, dim=None, axis=None, keep_attrs=None, shortcut=True, **kwargs + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + shortcut: bool = True, + **kwargs: Any, ): """Reduce the items in this group by applying `func` along some dimension(s). @@ -965,11 +974,15 @@ def reduce( if dim is None: dim = self._group_dim - if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) - def reduce_array(ar): - return ar.reduce(func, dim, axis, keep_attrs=keep_attrs, **kwargs) + return ar.reduce( + func=func, + dim=dim, + axis=axis, + keep_attrs=keep_attrs, + keepdims=keepdims, + **kwargs, + ) check_reduce_dims(dim, self.dims) @@ -1047,7 +1060,16 @@ def _combine(self, applied): combined = self._maybe_unstack(combined) return combined - def reduce(self, func, dim=None, keep_attrs=None, **kwargs): + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ): """Reduce the items in this group by applying `func` along some dimension(s). @@ -1079,11 +1101,15 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs): if dim is None: dim = self._group_dim - if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) - def reduce_dataset(ds): - return ds.reduce(func, dim, keep_attrs, **kwargs) + return ds.reduce( + func=func, + dim=dim, + axis=axis, + keep_attrs=keep_attrs, + keepdims=keepdims, + **kwargs, + ) check_reduce_dims(dim, self.dims) diff --git a/xarray/core/resample.py b/xarray/core/resample.py index e2f599e8b4e..ed665ad4048 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -1,4 +1,5 @@ import warnings +from typing import Any, Callable, Hashable, Sequence, Union from ._reductions import DataArrayResampleReductions, DatasetResampleReductions from .groupby import DataArrayGroupByBase, DatasetGroupByBase @@ -157,7 +158,7 @@ def _interpolate(self, kind="linear"): ) -class DataArrayResample(DataArrayResampleReductions, DataArrayGroupByBase, Resample): +class DataArrayResample(DataArrayGroupByBase, DataArrayResampleReductions, Resample): """DataArrayGroupBy object specialized to time resampling operations over a specified dimension """ @@ -248,7 +249,7 @@ def apply(self, func, args=(), shortcut=None, **kwargs): return self.map(func=func, shortcut=shortcut, args=args, **kwargs) -class DatasetResample(DatasetResampleReductions, DatasetGroupByBase, Resample): +class DatasetResample(DatasetGroupByBase, DatasetResampleReductions, Resample): """DatasetGroupBy object specialized to resampling a specified dimension""" def __init__(self, *args, dim=None, resample_dim=None, **kwargs): @@ -316,7 +317,16 @@ def apply(self, func, args=(), shortcut=None, **kwargs): ) return self.map(func=func, shortcut=shortcut, args=args, **kwargs) - def reduce(self, func, dim=None, keep_attrs=None, **kwargs): + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ): """Reduce the items in this group by applying `func` along the pre-defined resampling dimension. @@ -341,4 +351,11 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs): Array with summarized data and the indicated dimension(s) removed. """ - return super().reduce(func, dim, keep_attrs, **kwargs) + return super().reduce( + func=func, + dim=dim, + axis=axis, + keep_attrs=keep_attrs, + keepdims=keepdims, + **kwargs, + ) diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index c032a781e47..392597f1bda 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -676,87 +676,6 @@ def test_multiple_dims(dtype, dask, skipna, func): assert_allclose(actual, expected) -def test_docs(): - # with min_count - actual = DataArray.sum.__doc__ - expected = dedent( - """\ - Reduce this DataArray's data by applying `sum` along some dimension(s). - - Parameters - ---------- - dim : str or sequence of str, optional - Dimension(s) over which to apply `sum`. - axis : int or sequence of int, optional - Axis(es) over which to apply `sum`. Only one of the 'dim' - and 'axis' arguments can be supplied. If neither are supplied, then - `sum` is calculated over axes. - skipna : bool, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been - implemented (object, datetime64 or timedelta64). - min_count : int, default: None - The required number of valid values to perform the operation. If - fewer than min_count non-NA values are present the result will be - NA. Only used if skipna is set to True or defaults to True for the - array's dtype. New in version 0.10.8: Added with the default being - None. Changed in version 0.17.0: if specified on an integer array - and skipna=True, the result will be a float array. - keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original - object to the new one. If False (default), the new object will be - returned without attributes. - **kwargs : dict - Additional keyword arguments passed on to the appropriate array - function for calculating `sum` on this object's data. - - Returns - ------- - reduced : DataArray - New DataArray object with `sum` applied to its data and the - indicated dimension(s) removed. - """ - ) - assert actual == expected - - # without min_count - actual = DataArray.std.__doc__ - expected = dedent( - """\ - Reduce this DataArray's data by applying `std` along some dimension(s). - - Parameters - ---------- - dim : str or sequence of str, optional - Dimension(s) over which to apply `std`. - axis : int or sequence of int, optional - Axis(es) over which to apply `std`. Only one of the 'dim' - and 'axis' arguments can be supplied. If neither are supplied, then - `std` is calculated over axes. - skipna : bool, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been - implemented (object, datetime64 or timedelta64). - keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original - object to the new one. If False (default), the new object will be - returned without attributes. - **kwargs : dict - Additional keyword arguments passed on to the appropriate array - function for calculating `std` on this object's data. - - Returns - ------- - reduced : DataArray - New DataArray object with `std` applied to its data and the - indicated dimension(s) removed. - """ - ) - assert actual == expected - - def test_datetime_to_numeric_datetime64(): times = pd.date_range("2000", periods=5, freq="7D").values result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h") diff --git a/xarray/tests/test_plugins.py b/xarray/tests/test_plugins.py index 7f77a677d6d..4d1eee6363d 100644 --- a/xarray/tests/test_plugins.py +++ b/xarray/tests/test_plugins.py @@ -1,10 +1,20 @@ +import sys from unittest import mock -import pkg_resources import pytest from xarray.backends import common, plugins +if sys.version_info >= (3, 8): + from importlib.metadata import EntryPoint + + importlib_metadata_mock = "importlib.metadata" +else: + # if the fallback library is missing, we are doomed. + from importlib_metadata import EntryPoint + + importlib_metadata_mock = "importlib_metadata" + class DummyBackendEntrypointArgs(common.BackendEntrypoint): def open_dataset(filename_or_obj, *args): @@ -29,12 +39,12 @@ def open_dataset(self, filename_or_obj, *, decoder): @pytest.fixture def dummy_duplicated_entrypoints(): specs = [ - "engine1 = xarray.tests.test_plugins:backend_1", - "engine1 = xarray.tests.test_plugins:backend_2", - "engine2 = xarray.tests.test_plugins:backend_1", - "engine2 = xarray.tests.test_plugins:backend_2", + ["engine1", "xarray.tests.test_plugins:backend_1", "xarray.backends"], + ["engine1", "xarray.tests.test_plugins:backend_2", "xarray.backends"], + ["engine2", "xarray.tests.test_plugins:backend_1", "xarray.backends"], + ["engine2", "xarray.tests.test_plugins:backend_2", "xarray.backends"], ] - eps = [pkg_resources.EntryPoint.parse(spec) for spec in specs] + eps = [EntryPoint(name, value, group) for name, value, group in specs] return eps @@ -46,8 +56,10 @@ def test_remove_duplicates(dummy_duplicated_entrypoints) -> None: def test_broken_plugin() -> None: - broken_backend = pkg_resources.EntryPoint.parse( - "broken_backend = xarray.tests.test_plugins:backend_1" + broken_backend = EntryPoint( + "broken_backend", + "xarray.tests.test_plugins:backend_1", + "xarray.backends", ) with pytest.warns(RuntimeWarning) as record: _ = plugins.build_engines([broken_backend]) @@ -68,13 +80,15 @@ def test_remove_duplicates_warnings(dummy_duplicated_entrypoints) -> None: assert "entrypoints" in message1 -@mock.patch("pkg_resources.EntryPoint.load", mock.MagicMock(return_value=None)) +@mock.patch( + f"{importlib_metadata_mock}.EntryPoint.load", mock.MagicMock(return_value=None) +) def test_backends_dict_from_pkg() -> None: specs = [ - "engine1 = xarray.tests.test_plugins:backend_1", - "engine2 = xarray.tests.test_plugins:backend_2", + ["engine1", "xarray.tests.test_plugins:backend_1", "xarray.backends"], + ["engine2", "xarray.tests.test_plugins:backend_2", "xarray.backends"], ] - entrypoints = [pkg_resources.EntryPoint.parse(spec) for spec in specs] + entrypoints = [EntryPoint(name, value, group) for name, value, group in specs] engines = plugins.backends_dict_from_pkg(entrypoints) assert len(engines) == 2 assert engines.keys() == set(("engine1", "engine2")) @@ -114,12 +128,12 @@ def test_set_missing_parameters_raise_error() -> None: @mock.patch( - "pkg_resources.EntryPoint.load", + f"{importlib_metadata_mock}.EntryPoint.load", mock.MagicMock(return_value=DummyBackendEntrypoint1), ) def test_build_engines() -> None: - dummy_pkg_entrypoint = pkg_resources.EntryPoint.parse( - "cfgrib = xarray.tests.test_plugins:backend_1" + dummy_pkg_entrypoint = EntryPoint( + "cfgrib", "xarray.tests.test_plugins:backend_1", "xarray_backends" ) backend_entrypoints = plugins.build_engines([dummy_pkg_entrypoint]) @@ -131,17 +145,13 @@ def test_build_engines() -> None: @mock.patch( - "pkg_resources.EntryPoint.load", + f"{importlib_metadata_mock}.EntryPoint.load", mock.MagicMock(return_value=DummyBackendEntrypoint1), ) def test_build_engines_sorted() -> None: dummy_pkg_entrypoints = [ - pkg_resources.EntryPoint.parse( - "dummy2 = xarray.tests.test_plugins:backend_1", - ), - pkg_resources.EntryPoint.parse( - "dummy1 = xarray.tests.test_plugins:backend_1", - ), + EntryPoint("dummy2", "xarray.tests.test_plugins:backend_1", "xarray.backends"), + EntryPoint("dummy1", "xarray.tests.test_plugins:backend_1", "xarray.backends"), ] backend_entrypoints = plugins.build_engines(dummy_pkg_entrypoints) backend_entrypoints = list(backend_entrypoints) diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 5d4aa2145e1..db4157764ac 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -5,7 +5,7 @@ Usage: python xarray/util/generate_reductions.py > xarray/core/_reductions.py pytest --doctest-modules xarray/core/_reductions.py --accept || true - pytest --doctest-modules xarray/core/_reductions.py --accept + pytest --doctest-modules xarray/core/_reductions.py This requires [pytest-accept](https://github.com/max-sixty/pytest-accept). The second run of pytest is deliberate, since the first will return an error @@ -24,7 +24,6 @@ from . import duck_array_ops from .options import OPTIONS -from .types import T_DataArray, T_Dataset from .utils import contains_only_dask_or_numpy if TYPE_CHECKING: @@ -36,48 +35,48 @@ except ImportError: flox = None''' -OBJ_PREAMBLE = """ +DEFAULT_PREAMBLE = """ + +class {obj}{cls}Reductions: + __slots__ = () -class {obj}Reductions(): def reduce( self, func: Callable[..., Any], dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, axis: Union[None, int, Sequence[int]] = None, keep_attrs: bool = None, keepdims: bool = False, **kwargs: Any, ) -> "{obj}": - ... + raise NotImplementedError()""" +GROUPBY_PREAMBLE = """ -class {obj}GroupByReductions(): +class {obj}{cls}Reductions: _obj: "{obj}" def reduce( self, func: Callable[..., Any], dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, axis: Union[None, int, Sequence[int]] = None, keep_attrs: bool = None, keepdims: bool = False, **kwargs: Any, ) -> "{obj}": - ... + raise NotImplementedError() def _flox_reduce( self, dim: Union[None, Hashable, Sequence[Hashable]], **kwargs, ) -> "{obj}": - ...""" + raise NotImplementedError()""" -CLASS_PREAMBLE = """ - -class {obj}{cls}Reductions: - __slots__ = ()""" - TEMPLATE_REDUCTION_SIGNATURE = ''' def {method}( self, @@ -213,6 +212,7 @@ def __init__( docref, docref_description, example_call_preamble, + definition_preamble, see_also_obj=None, ): self.datastructure = datastructure @@ -221,7 +221,7 @@ def __init__( self.docref = docref self.docref_description = docref_description self.example_call_preamble = example_call_preamble - self.preamble = CLASS_PREAMBLE.format(obj=datastructure.name, cls=cls) + self.preamble = definition_preamble.format(obj=datastructure.name, cls=cls) if not see_also_obj: self.see_also_obj = self.datastructure.name else: @@ -245,7 +245,6 @@ def generate_method(self, method): yield TEMPLATE_REDUCTION_SIGNATURE.format( **template_kwargs, extra_kwargs=extra_kwargs, - self_type=self.self_type, ) for text in [ @@ -415,6 +414,7 @@ class DataStructure: docref_description="reduction or aggregation operations", example_call_preamble="", see_also_obj="DataArray", + definition_preamble=DEFAULT_PREAMBLE, ) DataArrayGenerator = GenericReductionGenerator( cls="", @@ -424,6 +424,7 @@ class DataStructure: docref_description="reduction or aggregation operations", example_call_preamble="", see_also_obj="Dataset", + definition_preamble=DEFAULT_PREAMBLE, ) DataArrayGroupByGenerator = GroupByReductionGenerator( @@ -433,6 +434,7 @@ class DataStructure: docref="groupby", docref_description="groupby operations", example_call_preamble='.groupby("labels")', + definition_preamble=GROUPBY_PREAMBLE, ) DataArrayResampleGenerator = GroupByReductionGenerator( cls="Resample", @@ -441,6 +443,7 @@ class DataStructure: docref="resampling", docref_description="resampling operations", example_call_preamble='.resample(time="3M")', + definition_preamble=GROUPBY_PREAMBLE, ) DatasetGroupByGenerator = GroupByReductionGenerator( cls="GroupBy", @@ -449,6 +452,7 @@ class DataStructure: docref="groupby", docref_description="groupby operations", example_call_preamble='.groupby("labels")', + definition_preamble=GROUPBY_PREAMBLE, ) DatasetResampleGenerator = GroupByReductionGenerator( cls="Resample", @@ -457,6 +461,7 @@ class DataStructure: docref="resampling", docref_description="resampling operations", example_call_preamble='.resample(time="3M")', + definition_preamble=GROUPBY_PREAMBLE, ) From 2a1b12faf658bcd0079a33fe2d8bac4bc910d8e7 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 25 Nov 2021 21:41:21 -0700 Subject: [PATCH 090/138] Update xarray/util/generate_reductions.py Co-authored-by: Mathias Hauser --- xarray/util/generate_reductions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 3a7f35ab567..9301c2e172d 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -80,7 +80,7 @@ def {method}( ----- {notes}""" -_DIM_DOCSTRING = """dim : hashable or iterable of hashable, optional +_DIM_DOCSTRING = """dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``{method}``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions.""" From 45feeaba8f3f19c8740da061a33839c5b2cf2ece Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 25 Nov 2021 21:52:06 -0700 Subject: [PATCH 091/138] Annotate some reduction tests. --- xarray/tests/test_dataarray.py | 4 ++-- xarray/tests/test_dataset.py | 26 +++++++++++++------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 5e9c1b87ce2..82c05acdec9 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2392,7 +2392,7 @@ def test_cumops(self): expected = DataArray([[-1, 0, 0], [-3, 0, 0]], coords, dims=["x", "y"]) assert_identical(expected, actual) - def test_reduce(self): + def test_reduce(self) -> None: coords = { "x": [-1, -2], "y": ["ab", "cd", "ef"], @@ -2433,7 +2433,7 @@ def test_reduce(self): expected = DataArray(orig.values.astype(int), dims=["x", "y"]).mean("x") assert_equal(actual, expected) - def test_reduce_keepdims(self): + def test_reduce_keepdims(self) -> None: coords = { "x": [-1, -2], "y": ["ab", "cd", "ef"], diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index cdb8382c8ee..533496a7806 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4432,7 +4432,7 @@ def test_where_drop_no_indexes(self): actual = ds.where(ds == 1, drop=True) assert_identical(expected, actual) - def test_reduce(self): + def test_reduce(self) -> None: data = create_test_data() assert len(data.mean().coords) == 0 @@ -4443,21 +4443,21 @@ def test_reduce(self): assert_equal(data.min(dim=["dim1"]), data.min(dim="dim1")) - for reduct, expected in [ + for reduct, expected_dims in [ ("dim2", ["dim3", "time", "dim1"]), (["dim2", "time"], ["dim3", "dim1"]), (("dim2", "time"), ["dim3", "dim1"]), ((), ["dim2", "dim3", "time", "dim1"]), ]: - actual = list(data.min(dim=reduct).dims) - assert actual == expected + actual_dims = list(data.min(dim=reduct).dims) + assert actual_dims == expected_dims assert_equal(data.mean(dim=[]), data) with pytest.raises(ValueError): data.mean(axis=0) - def test_reduce_coords(self): + def test_reduce_coords(self) -> None: # regression test for GH1470 data = xr.Dataset({"a": ("x", [1, 2, 3])}, coords={"b": 4}) expected = xr.Dataset({"a": 2}, coords={"b": 4}) @@ -4481,7 +4481,7 @@ def test_mean_uint_dtype(self): ) assert_identical(actual, expected) - def test_reduce_bad_dim(self): + def test_reduce_bad_dim(self) -> None: data = create_test_data() with pytest.raises(ValueError, match=r"Dataset does not contain"): data.mean(dim="bad_dim") @@ -4516,7 +4516,7 @@ def test_reduce_cumsum_test_dims(self, reduct, expected, func): actual = getattr(data, func)(dim=reduct).dims assert list(actual) == expected - def test_reduce_non_numeric(self): + def test_reduce_non_numeric(self) -> None: data1 = create_test_data(seed=44) data2 = create_test_data(seed=44) add_vars = {"var4": ["dim1", "dim2"], "var5": ["dim1"]} @@ -4533,7 +4533,7 @@ def test_reduce_non_numeric(self): @pytest.mark.filterwarnings( "ignore:Once the behaviour of DataArray:DeprecationWarning" ) - def test_reduce_strings(self): + def test_reduce_strings(self) -> None: expected = Dataset({"x": "a"}) ds = Dataset({"x": ("y", ["a", "b"])}) ds.coords["y"] = [-10, 10] @@ -4570,7 +4570,7 @@ def test_reduce_strings(self): actual = ds.min() assert_identical(expected, actual) - def test_reduce_dtypes(self): + def test_reduce_dtypes(self) -> None: # regression test for GH342 expected = Dataset({"x": 1}) actual = Dataset({"x": True}).sum() @@ -4585,7 +4585,7 @@ def test_reduce_dtypes(self): actual = Dataset({"x": ("y", [1, 1j])}).sum() assert_identical(expected, actual) - def test_reduce_keep_attrs(self): + def test_reduce_keep_attrs(self) -> None: data = create_test_data() _attrs = {"attr1": "value1", "attr2": 2929} @@ -4627,7 +4627,7 @@ def test_reduce_scalars(self): actual = ds.var("a") assert_identical(expected, actual) - def test_reduce_only_one_axis(self): + def test_reduce_only_one_axis(self) -> None: def mean_only_one_axis(x, axis): if not isinstance(axis, integer_types): raise TypeError("non-integer axis") @@ -4643,7 +4643,7 @@ def mean_only_one_axis(x, axis): ): ds.reduce(mean_only_one_axis) - def test_reduce_no_axis(self): + def test_reduce_no_axis(self) -> None: def total_sum(x): return np.sum(x.flatten()) @@ -4655,7 +4655,7 @@ def total_sum(x): with pytest.raises(TypeError, match=r"unexpected keyword argument 'axis'"): ds.reduce(total_sum, dim="x") - def test_reduce_keepdims(self): + def test_reduce_keepdims(self) -> None: ds = Dataset( {"a": (["x", "y"], [[0, 1, 2, 3, 4]])}, coords={ From 3dc94ae47165616878023b56a02b731549f920db Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 21 Dec 2021 21:42:00 +0100 Subject: [PATCH 092/138] force keyword args after dim --- xarray/util/generate_reductions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index fbf2d82dd2c..413c745194f 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -49,7 +49,8 @@ def reduce( TEMPLATE_REDUCTION_SIGNATURE = ''' def {method}( self, - dim: Union[None, Hashable, Sequence[Hashable]] = None,{extra_kwargs} + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *,{extra_kwargs} keep_attrs: bool = None, **kwargs, ) -> "{obj}": From bc55db33e63b10ef01e7c12518fcd7731e6d7d96 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 21 Dec 2021 22:48:57 +0100 Subject: [PATCH 093/138] Write to file using open() instead. --- xarray/util/generate_reductions.py | 32 ++++++++++++++++++------------ 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 413c745194f..58335a11056 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -3,7 +3,7 @@ For internal xarray development use only. Usage: - python xarray/util/generate_reductions.py > xarray/core/_reductions.py + python xarray/util/generate_reductions.py pytest --doctest-modules xarray/core/_reductions.py --accept || true pytest --doctest-modules xarray/core/_reductions.py @@ -380,15 +380,21 @@ class DataStructure: if __name__ == "__main__": - print(MODULE_PREAMBLE) - for gen in [ - DatasetGenerator, - DataArrayGenerator, - DatasetGroupByGenerator, - DatasetResampleGenerator, - DataArrayGroupByGenerator, - DataArrayResampleGenerator, - ]: - for lines in gen.generate_methods(): - for line in lines: - print(line) + import os + from pathlib import Path + + p = Path(os.getcwd()) + filepath = p.parent / "core" / "_reductions.py" + with open(filepath, mode="w", encoding="utf-8") as f: + f.write(MODULE_PREAMBLE + "\n") + for gen in [ + DatasetGenerator, + DataArrayGenerator, + DatasetGroupByGenerator, + DatasetResampleGenerator, + DataArrayGroupByGenerator, + DataArrayResampleGenerator, + ]: + for lines in gen.generate_methods(): + for line in lines: + f.write(line + "\n") From b78df18bd11c496a4d5666c6972fb5d2f86e29d3 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 21 Dec 2021 22:49:07 +0100 Subject: [PATCH 094/138] Update _reductions.py --- xarray/core/_reductions.py | 72 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 69 insertions(+), 3 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 5389aa4e09d..bd3d8e381d1 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -28,6 +28,7 @@ def reduce( def count( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, ) -> "Dataset": @@ -86,7 +87,7 @@ def count( Dimensions: () Data variables: - da int64 5 + da int32 5 """ return self.reduce( duck_array_ops.count, @@ -99,6 +100,7 @@ def count( def all( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, ) -> "Dataset": @@ -170,6 +172,7 @@ def all( def any( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, ) -> "Dataset": @@ -241,6 +244,7 @@ def any( def max( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, keep_attrs: bool = None, **kwargs, @@ -327,6 +331,7 @@ def max( def min( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, keep_attrs: bool = None, **kwargs, @@ -413,6 +418,7 @@ def min( def mean( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, keep_attrs: bool = None, **kwargs, @@ -503,6 +509,7 @@ def mean( def prod( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, @@ -609,6 +616,7 @@ def prod( def sum( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, @@ -715,6 +723,7 @@ def sum( def std( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, @@ -818,6 +827,7 @@ def std( def var( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, @@ -921,6 +931,7 @@ def var( def median( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, keep_attrs: bool = None, **kwargs, @@ -1027,6 +1038,7 @@ def reduce( def count( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, ) -> "DataArray": @@ -1092,6 +1104,7 @@ def count( def all( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, ) -> "DataArray": @@ -1157,6 +1170,7 @@ def all( def any( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, ) -> "DataArray": @@ -1222,6 +1236,7 @@ def any( def max( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, keep_attrs: bool = None, **kwargs, @@ -1300,6 +1315,7 @@ def max( def min( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, keep_attrs: bool = None, **kwargs, @@ -1378,6 +1394,7 @@ def min( def mean( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, keep_attrs: bool = None, **kwargs, @@ -1460,6 +1477,7 @@ def mean( def prod( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, @@ -1556,6 +1574,7 @@ def prod( def sum( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, @@ -1652,6 +1671,7 @@ def sum( def std( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, @@ -1745,6 +1765,7 @@ def std( def var( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, @@ -1838,6 +1859,7 @@ def var( def median( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, keep_attrs: bool = None, **kwargs, @@ -1936,6 +1958,7 @@ def reduce( def count( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, ) -> "Dataset": @@ -1996,7 +2019,7 @@ def count( Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) int64 1 2 2 + da (labels) int32 1 2 2 """ return self.reduce( duck_array_ops.count, @@ -2009,6 +2032,7 @@ def count( def all( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, ) -> "Dataset": @@ -2082,6 +2106,7 @@ def all( def any( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, ) -> "Dataset": @@ -2155,6 +2180,7 @@ def any( def max( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, keep_attrs: bool = None, **kwargs, @@ -2245,6 +2271,7 @@ def max( def min( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, keep_attrs: bool = None, **kwargs, @@ -2335,6 +2362,7 @@ def min( def mean( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, keep_attrs: bool = None, **kwargs, @@ -2429,6 +2457,7 @@ def mean( def prod( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, @@ -2541,6 +2570,7 @@ def prod( def sum( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, @@ -2653,6 +2683,7 @@ def sum( def std( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, @@ -2762,6 +2793,7 @@ def std( def var( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, @@ -2871,6 +2903,7 @@ def var( def median( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, keep_attrs: bool = None, **kwargs, @@ -2981,6 +3014,7 @@ def reduce( def count( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, ) -> "Dataset": @@ -3041,7 +3075,7 @@ def count( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) int64 1 3 1 + da (time) int32 1 3 1 """ return self.reduce( duck_array_ops.count, @@ -3054,6 +3088,7 @@ def count( def all( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, ) -> "Dataset": @@ -3127,6 +3162,7 @@ def all( def any( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, ) -> "Dataset": @@ -3200,6 +3236,7 @@ def any( def max( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, keep_attrs: bool = None, **kwargs, @@ -3290,6 +3327,7 @@ def max( def min( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, keep_attrs: bool = None, **kwargs, @@ -3380,6 +3418,7 @@ def min( def mean( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, keep_attrs: bool = None, **kwargs, @@ -3474,6 +3513,7 @@ def mean( def prod( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, @@ -3586,6 +3626,7 @@ def prod( def sum( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, @@ -3698,6 +3739,7 @@ def sum( def std( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, @@ -3807,6 +3849,7 @@ def std( def var( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, @@ -3916,6 +3959,7 @@ def var( def median( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, keep_attrs: bool = None, **kwargs, @@ -4026,6 +4070,7 @@ def reduce( def count( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, ) -> "DataArray": @@ -4093,6 +4138,7 @@ def count( def all( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, ) -> "DataArray": @@ -4160,6 +4206,7 @@ def all( def any( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, ) -> "DataArray": @@ -4227,6 +4274,7 @@ def any( def max( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, keep_attrs: bool = None, **kwargs, @@ -4309,6 +4357,7 @@ def max( def min( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, keep_attrs: bool = None, **kwargs, @@ -4391,6 +4440,7 @@ def min( def mean( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, keep_attrs: bool = None, **kwargs, @@ -4477,6 +4527,7 @@ def mean( def prod( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, @@ -4579,6 +4630,7 @@ def prod( def sum( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, @@ -4681,6 +4733,7 @@ def sum( def std( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, @@ -4780,6 +4833,7 @@ def std( def var( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, @@ -4879,6 +4933,7 @@ def var( def median( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, keep_attrs: bool = None, **kwargs, @@ -4981,6 +5036,7 @@ def reduce( def count( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, ) -> "DataArray": @@ -5048,6 +5104,7 @@ def count( def all( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, ) -> "DataArray": @@ -5115,6 +5172,7 @@ def all( def any( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, ) -> "DataArray": @@ -5182,6 +5240,7 @@ def any( def max( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, keep_attrs: bool = None, **kwargs, @@ -5264,6 +5323,7 @@ def max( def min( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, keep_attrs: bool = None, **kwargs, @@ -5346,6 +5406,7 @@ def min( def mean( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, keep_attrs: bool = None, **kwargs, @@ -5432,6 +5493,7 @@ def mean( def prod( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, @@ -5534,6 +5596,7 @@ def prod( def sum( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, @@ -5636,6 +5699,7 @@ def sum( def std( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, @@ -5735,6 +5799,7 @@ def std( def var( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, ddof: int = 0, keep_attrs: bool = None, @@ -5834,6 +5899,7 @@ def var( def median( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, skipna: bool = None, keep_attrs: bool = None, **kwargs, From 74064b957cb7c5d0dacec9708d17126fa11886bd Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Tue, 21 Dec 2021 23:03:05 +0100 Subject: [PATCH 095/138] manual tweaks to make ci happy --- xarray/core/_reductions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index bd3d8e381d1..3aab793fdf2 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -87,7 +87,7 @@ def count( Dimensions: () Data variables: - da int32 5 + da int64 5 """ return self.reduce( duck_array_ops.count, @@ -2019,7 +2019,7 @@ def count( Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) int32 1 2 2 + da (labels) int64 1 2 2 """ return self.reduce( duck_array_ops.count, @@ -3075,7 +3075,7 @@ def count( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) int32 1 3 1 + da (time) int64 1 3 1 """ return self.reduce( duck_array_ops.count, From e1ba8a2affa270b72b28812db8d53eebb32e3d29 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 28 Dec 2021 11:27:29 -0700 Subject: [PATCH 096/138] =?UTF-8?q?use=5Fnumpy=5Fgroupies=20=E2=86=92=20us?= =?UTF-8?q?e=5Fflox?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- xarray/core/_reductions.py | 240 +++++------------------------ xarray/core/options.py | 10 +- xarray/tests/test_groupby.py | 12 +- xarray/util/generate_reductions.py | 6 +- 4 files changed, 52 insertions(+), 216 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index c56e76cf5d3..58fca69b187 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -2013,11 +2013,7 @@ def count( da (labels) int64 1 2 2 """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="count", dim=dim, @@ -2101,11 +2097,7 @@ def all( da (labels) bool False True True """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="all", dim=dim, @@ -2189,11 +2181,7 @@ def any( da (labels) bool True True True """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="any", dim=dim, @@ -2293,11 +2281,7 @@ def max( da (labels) float64 nan 2.0 3.0 """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="max", dim=dim, @@ -2399,11 +2383,7 @@ def min( da (labels) float64 nan 2.0 1.0 """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="min", dim=dim, @@ -2509,11 +2489,7 @@ def mean( da (labels) float64 nan 2.0 2.0 """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="mean", dim=dim, @@ -2636,11 +2612,7 @@ def prod( da (labels) float64 nan 4.0 3.0 """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="prod", dim=dim, @@ -2765,11 +2737,7 @@ def sum( da (labels) float64 nan 4.0 4.0 """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="sum", dim=dim, @@ -2891,11 +2859,7 @@ def std( da (labels) float64 nan 0.0 1.414 """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="std", dim=dim, @@ -3017,11 +2981,7 @@ def var( da (labels) float64 nan 0.0 2.0 """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="var", dim=dim, @@ -3226,11 +3186,7 @@ def count( da (time) int64 1 3 1 """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="count", dim=dim, @@ -3314,11 +3270,7 @@ def all( da (time) bool True True False """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="all", dim=dim, @@ -3402,11 +3354,7 @@ def any( da (time) bool True True True """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="any", dim=dim, @@ -3506,11 +3454,7 @@ def max( da (time) float64 1.0 3.0 nan """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="max", dim=dim, @@ -3612,11 +3556,7 @@ def min( da (time) float64 1.0 1.0 nan """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="min", dim=dim, @@ -3722,11 +3662,7 @@ def mean( da (time) float64 1.0 2.0 nan """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="mean", dim=dim, @@ -3849,11 +3785,7 @@ def prod( da (time) float64 nan 6.0 nan """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="prod", dim=dim, @@ -3978,11 +3910,7 @@ def sum( da (time) float64 nan 6.0 nan """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="sum", dim=dim, @@ -4104,11 +4032,7 @@ def std( da (time) float64 nan 1.0 nan """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="std", dim=dim, @@ -4230,11 +4154,7 @@ def var( da (time) float64 nan 1.0 nan """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="var", dim=dim, @@ -4434,11 +4354,7 @@ def count( * labels (labels) object 'a' 'b' 'c' """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="count", dim=dim, @@ -4515,11 +4431,7 @@ def all( * labels (labels) object 'a' 'b' 'c' """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="all", dim=dim, @@ -4596,11 +4508,7 @@ def any( * labels (labels) object 'a' 'b' 'c' """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="any", dim=dim, @@ -4691,11 +4599,7 @@ def max( * labels (labels) object 'a' 'b' 'c' """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="max", dim=dim, @@ -4788,11 +4692,7 @@ def min( * labels (labels) object 'a' 'b' 'c' """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="min", dim=dim, @@ -4889,11 +4789,7 @@ def mean( * labels (labels) object 'a' 'b' 'c' """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="mean", dim=dim, @@ -5005,11 +4901,7 @@ def prod( * labels (labels) object 'a' 'b' 'c' """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="prod", dim=dim, @@ -5123,11 +5015,7 @@ def sum( * labels (labels) object 'a' 'b' 'c' """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="sum", dim=dim, @@ -5238,11 +5126,7 @@ def std( * labels (labels) object 'a' 'b' 'c' """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="std", dim=dim, @@ -5353,11 +5237,7 @@ def var( * labels (labels) object 'a' 'b' 'c' """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="var", dim=dim, @@ -5547,11 +5427,7 @@ def count( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="count", dim=dim, @@ -5628,11 +5504,7 @@ def all( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="all", dim=dim, @@ -5709,11 +5581,7 @@ def any( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="any", dim=dim, @@ -5804,11 +5672,7 @@ def max( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="max", dim=dim, @@ -5901,11 +5765,7 @@ def min( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="min", dim=dim, @@ -6002,11 +5862,7 @@ def mean( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="mean", dim=dim, @@ -6118,11 +5974,7 @@ def prod( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="prod", dim=dim, @@ -6236,11 +6088,7 @@ def sum( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="sum", dim=dim, @@ -6351,11 +6199,7 @@ def std( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="std", dim=dim, @@ -6466,11 +6310,7 @@ def var( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="var", dim=dim, diff --git a/xarray/core/options.py b/xarray/core/options.py index 20ec34e764e..e9657214f6b 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -35,7 +35,7 @@ class T_Options(TypedDict): keep_attrs: Literal["default", True, False] warn_for_unclosed_files: bool use_bottleneck: bool - use_numpy_groupies: bool + use_flox: bool OPTIONS: T_Options = { @@ -53,7 +53,7 @@ class T_Options(TypedDict): "file_cache_maxsize": 128, "keep_attrs": "default", "use_bottleneck": True, - "use_numpy_groupies": True, + "use_flox": True, "warn_for_unclosed_files": False, } @@ -78,7 +78,7 @@ def _positive_integer(value): "file_cache_maxsize": _positive_integer, "keep_attrs": lambda choice: choice in [True, False, "default"], "use_bottleneck": lambda value: isinstance(value, bool), - "use_numpy_groupies": lambda value: isinstance(value, bool), + "use_flox": lambda value: isinstance(value, bool), "warn_for_unclosed_files": lambda value: isinstance(value, bool), } @@ -186,8 +186,8 @@ class set_options: use_bottleneck : bool, default: True Whether to use ``bottleneck`` to accelerate 1D reductions and 1D rolling reduction operations. - use_numpy_groupies : bool, default: True - Whether to use ``numpy_groupies`` and ``flox`` to + use_flox : bool, default: True + Whether to use ``numpy_groupies`` and `flox`` to accelerate groupby and resampling reductions. warn_for_unclosed_files : bool, default: False Whether or not to issue a warning when unclosed files are diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index f188c45698f..171dfa49c1b 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -948,10 +948,10 @@ def test_groupby_reductions(self, method): } )["foo"] - with xr.set_options(use_numpy_groupies=False): + with xr.set_options(use_flox=False): actual_legacy = getattr(grouped, method)(dim="y") - with xr.set_options(use_numpy_groupies=True): + with xr.set_options(use_flox=True): actual_npg = getattr(grouped, method)(dim="y") assert_allclose(expected, actual_legacy) @@ -1179,9 +1179,9 @@ def test_groupby_bins(self): da = xr.DataArray(np.ones((2, 3, 4))) bins = [-1, 0, 1, 2] - with xr.set_options(use_numpy_groupies=False): + with xr.set_options(use_flox=False): actual = da.groupby_bins("dim_0", bins).mean(...) - with xr.set_options(use_numpy_groupies=True): + with xr.set_options(use_flox=True): expected = da.groupby_bins("dim_0", bins).mean(...) assert_allclose(actual, expected) @@ -1238,9 +1238,9 @@ def test_groupby_bins_sort(self): binned_mean = data.groupby_bins("x", bins=11).mean() assert binned_mean.to_index().is_monotonic - with xr.set_options(use_numpy_groupies=True): + with xr.set_options(use_flox=True): actual = data.groupby_bins("x", bins=11).count() - with xr.set_options(use_numpy_groupies=False): + with xr.set_options(use_flox=False): expected = data.groupby_bins("x", bins=11).count() assert_identical(actual, expected) diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index db4157764ac..850d96d9a90 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -346,11 +346,7 @@ def generate_code(self, method): else: return f""" - if ( - flox - and OPTIONS["use_numpy_groupies"] - and contains_only_dask_or_numpy(self._obj) - ): + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): return self._flox_reduce( func="{method.name}", dim=dim,{extra_kwargs} From bdb999fa4d564148bfd669d8f91c4c509443cb9a Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 29 Dec 2021 09:47:59 -0700 Subject: [PATCH 097/138] fix tests --- xarray/core/groupby.py | 4 +++- xarray/tests/test_groupby.py | 8 ++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 8c0bde3a4f9..dee2e2add3f 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -552,7 +552,9 @@ def _flox_reduce(self, dim, **kwargs): # TODO: switch to xindexes after we can use is_unique index = self._obj.indexes[self._group.name] if index.is_unique and self._squeeze: - raise ValueError(f"cannot reduce over dimensions {self._group.name!r}") + raise ValueError( + f"Cannot reduce over absent dimensions {self._group.name!r}" + ) # TODO: only do this for resample, not general groupers... # this creates a label DataArray since resample doesn't do that somehow diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 171dfa49c1b..ca1bb4d4e5d 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -514,13 +514,13 @@ def test_groupby_grouping_errors() -> None: def test_groupby_reduce_dimension_error(array) -> None: grouped = array.groupby("y") - with pytest.raises(ValueError, match=r"cannot reduce over dimensions"): + with pytest.raises(ValueError, match=r"Cannot reduce over absent dimensions"): grouped.mean() - with pytest.raises(ValueError, match=r"cannot reduce over dimensions"): + with pytest.raises(ValueError, match=r"Cannot reduce over absent dimensions"): grouped.mean("huh") - with pytest.raises(ValueError, match=r"cannot reduce over dimensions"): + with pytest.raises(ValueError, match=r"Cannot reduce over absent dimensions"): grouped.mean(("x", "y", "asd")) grouped = array.groupby("y", squeeze=False) @@ -1656,7 +1656,7 @@ def test_resample_min_count(self): ], dim=actual["time"], ) - assert_equal(expected, actual) + assert_allclose(expected, actual) def test_resample_by_mean_with_keep_attrs(self): times = pd.date_range("2000-01-01", freq="6H", periods=10) From 70266e176b60763dc84dc8c6852039014c0032c6 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 29 Dec 2021 11:30:34 -0700 Subject: [PATCH 098/138] fix tests --- xarray/core/groupby.py | 16 +++++++++++++--- xarray/tests/test_groupby.py | 6 +++--- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index dee2e2add3f..0d983adea1e 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -552,9 +552,7 @@ def _flox_reduce(self, dim, **kwargs): # TODO: switch to xindexes after we can use is_unique index = self._obj.indexes[self._group.name] if index.is_unique and self._squeeze: - raise ValueError( - f"Cannot reduce over absent dimensions {self._group.name!r}" - ) + raise ValueError(f"cannot reduce over dimensions {self._group.name!r}") # TODO: only do this for resample, not general groupers... # this creates a label DataArray since resample doesn't do that somehow @@ -577,6 +575,18 @@ def _flox_reduce(self, dim, **kwargs): else: group = self._unstacked_group + # Do this so we raise the same error message whether flox is present or not. + # Better to control it here than in flox. + if isinstance(group, str): + group = self._original_obj[group] + if dim not in (None, Ellipsis): + if isinstance(dim, str): + dim = (dim,) + if any( + d not in group.dims and d not in self._original_obj.dims for d in dim + ): + raise ValueError(f"cannot reduce over dimensions {dim}.") + # TODO: handle bins=N in dask_groupby if self._bins is not None: expected_groups = (self._bins,) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index ca1bb4d4e5d..ad190b9ed17 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -514,13 +514,13 @@ def test_groupby_grouping_errors() -> None: def test_groupby_reduce_dimension_error(array) -> None: grouped = array.groupby("y") - with pytest.raises(ValueError, match=r"Cannot reduce over absent dimensions"): + with pytest.raises(ValueError, match=r"cannot reduce over dimensions"): grouped.mean() - with pytest.raises(ValueError, match=r"Cannot reduce over absent dimensions"): + with pytest.raises(ValueError, match=r"cannot reduce over dimensions"): grouped.mean("huh") - with pytest.raises(ValueError, match=r"Cannot reduce over absent dimensions"): + with pytest.raises(ValueError, match=r"cannot reduce over dimensions"): grouped.mean(("x", "y", "asd")) grouped = array.groupby("y", squeeze=False) From 41e43fe22e39179310032b7b9927e094721e332f Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 29 Dec 2021 17:08:38 -0700 Subject: [PATCH 099/138] fix tests --- xarray/core/groupby.py | 5 ++++- xarray/tests/test_units.py | 8 ++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 0d983adea1e..c79ef2778b8 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -587,7 +587,7 @@ def _flox_reduce(self, dim, **kwargs): ): raise ValueError(f"cannot reduce over dimensions {dim}.") - # TODO: handle bins=N in dask_groupby + # TODO: handle bins=N in flox if self._bins is not None: expected_groups = (self._bins,) isbin = (True,) @@ -600,6 +600,9 @@ def _flox_reduce(self, dim, **kwargs): # note min_count makes no sense in the xarray world # as a kwarg for count, so this should be OK kwargs["min_count"] = 1 + # empty bins have np.nan regardless of dtype + # flox's default would not set np.nan for integer dtypes + kwargs.setdefault("fill_value", np.nan) else: expected_groups = (self._unique_coord.values,) isbin = False diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index f36143c52c3..faaf3e50a81 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -5320,8 +5320,12 @@ def test_computation_objects(self, func, variant, dtype): units = extract_units(ds) args = [] if func.name != "groupby" else ["y"] - expected = attach_units(func(strip_units(ds)).mean(*args), units) - actual = func(ds).mean(*args) + # Doesn't work with flox because pint doesn't implement + # ufunc.reduceat or np.bincount + # kwargs = {"engine": "numpy"} if "groupby" in func.name else {} + kwargs = {} + expected = attach_units(func(strip_units(ds)).mean(*args, **kwargs), units) + actual = func(ds).mean(*args, **kwargs) assert_units_equal(expected, actual) assert_allclose(expected, actual) From 7b34077c1355d006155abf3cdb5bc289caae96a9 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 16 Feb 2022 09:50:49 -0700 Subject: [PATCH 100/138] Fix path --- xarray/core/_reductions.py | 144 ++++++++++++++--------------- xarray/util/generate_reductions.py | 2 +- 2 files changed, 73 insertions(+), 73 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 3aab793fdf2..31365f39e65 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -23,7 +23,7 @@ def reduce( keepdims: bool = False, **kwargs: Any, ) -> "Dataset": - return NotImplemented + raise NotImplementedError() def count( self, @@ -37,7 +37,7 @@ def count( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional @@ -109,7 +109,7 @@ def all( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional @@ -181,7 +181,7 @@ def any( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional @@ -254,7 +254,7 @@ def max( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -341,7 +341,7 @@ def min( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -428,7 +428,7 @@ def mean( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -520,7 +520,7 @@ def prod( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -627,7 +627,7 @@ def sum( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -734,7 +734,7 @@ def std( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -838,7 +838,7 @@ def var( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -941,7 +941,7 @@ def median( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -1033,7 +1033,7 @@ def reduce( keepdims: bool = False, **kwargs: Any, ) -> "DataArray": - return NotImplemented + raise NotImplementedError() def count( self, @@ -1047,7 +1047,7 @@ def count( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional @@ -1113,7 +1113,7 @@ def all( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional @@ -1179,7 +1179,7 @@ def any( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional @@ -1246,7 +1246,7 @@ def max( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -1325,7 +1325,7 @@ def min( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -1404,7 +1404,7 @@ def mean( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -1488,7 +1488,7 @@ def prod( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -1585,7 +1585,7 @@ def sum( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -1682,7 +1682,7 @@ def std( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -1776,7 +1776,7 @@ def var( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -1869,7 +1869,7 @@ def median( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -1953,7 +1953,7 @@ def reduce( keepdims: bool = False, **kwargs: Any, ) -> "Dataset": - return NotImplemented + raise NotImplementedError() def count( self, @@ -1967,7 +1967,7 @@ def count( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional @@ -2041,7 +2041,7 @@ def all( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional @@ -2115,7 +2115,7 @@ def any( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional @@ -2190,7 +2190,7 @@ def max( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -2281,7 +2281,7 @@ def min( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -2372,7 +2372,7 @@ def mean( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -2468,7 +2468,7 @@ def prod( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -2581,7 +2581,7 @@ def sum( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -2694,7 +2694,7 @@ def std( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -2804,7 +2804,7 @@ def var( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -2913,7 +2913,7 @@ def median( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -3009,7 +3009,7 @@ def reduce( keepdims: bool = False, **kwargs: Any, ) -> "Dataset": - return NotImplemented + raise NotImplementedError() def count( self, @@ -3023,7 +3023,7 @@ def count( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional @@ -3097,7 +3097,7 @@ def all( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional @@ -3171,7 +3171,7 @@ def any( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional @@ -3246,7 +3246,7 @@ def max( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -3337,7 +3337,7 @@ def min( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -3428,7 +3428,7 @@ def mean( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -3524,7 +3524,7 @@ def prod( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -3637,7 +3637,7 @@ def sum( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -3750,7 +3750,7 @@ def std( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -3860,7 +3860,7 @@ def var( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -3969,7 +3969,7 @@ def median( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -4065,7 +4065,7 @@ def reduce( keepdims: bool = False, **kwargs: Any, ) -> "DataArray": - return NotImplemented + raise NotImplementedError() def count( self, @@ -4079,7 +4079,7 @@ def count( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional @@ -4147,7 +4147,7 @@ def all( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional @@ -4215,7 +4215,7 @@ def any( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional @@ -4284,7 +4284,7 @@ def max( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -4367,7 +4367,7 @@ def min( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -4450,7 +4450,7 @@ def mean( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -4538,7 +4538,7 @@ def prod( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -4641,7 +4641,7 @@ def sum( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -4744,7 +4744,7 @@ def std( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -4844,7 +4844,7 @@ def var( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -4943,7 +4943,7 @@ def median( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -5031,7 +5031,7 @@ def reduce( keepdims: bool = False, **kwargs: Any, ) -> "DataArray": - return NotImplemented + raise NotImplementedError() def count( self, @@ -5045,7 +5045,7 @@ def count( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional @@ -5113,7 +5113,7 @@ def all( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional @@ -5181,7 +5181,7 @@ def any( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional @@ -5250,7 +5250,7 @@ def max( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -5333,7 +5333,7 @@ def min( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -5416,7 +5416,7 @@ def mean( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -5504,7 +5504,7 @@ def prod( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -5607,7 +5607,7 @@ def sum( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -5710,7 +5710,7 @@ def std( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -5810,7 +5810,7 @@ def var( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None @@ -5909,7 +5909,7 @@ def median( Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` or ``dim=["x", "y"]``. If None, will reduce over all dimensions. skipna : bool, default: None diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 9382ce17dab..6dc1bc04cec 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -384,7 +384,7 @@ class DataStructure: from pathlib import Path p = Path(os.getcwd()) - filepath = p.parent / "core" / "_reductions.py" + filepath = p.parent / "xarray" / "xarray" / "core" / "_reductions.py" with open(filepath, mode="w", encoding="utf-8") as f: f.write(MODULE_PREAMBLE + "\n") for gen in [ From 9799d87f1e45f2e20a55bab13af63f2d895666ee Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 8 Mar 2022 16:57:55 -0700 Subject: [PATCH 101/138] Apply suggestions from code review Co-authored-by: Stephan Hoyer --- xarray/util/generate_reductions.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 6dc1bc04cec..95b2fc640f9 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -120,7 +120,7 @@ def {method}( ] _NUMERIC_ONLY_NOTES = "Non-numeric variables will be removed prior to reducing." -extra_kwarg = collections.namedtuple("extra_kwarg", "docs kwarg call example") +ExtraKwarg = collections.namedtuple("ExtraKwarg", "docs kwarg call example") skipna = extra_kwarg( docs=_SKIPNA_DOCSTRING, kwarg="skipna: bool = None,", @@ -279,7 +279,8 @@ def generate_code(self, method): extra_kwargs = textwrap.indent("\n" + "\n".join(extra_kwargs), 12 * " ") else: extra_kwargs = "" - return f""" return self.reduce( + return f"""\ + return self.reduce( duck_array_ops.{method.array_method}, dim=dim,{extra_kwargs} keep_attrs=keep_attrs, @@ -310,7 +311,7 @@ class DataStructure: numeric_only: bool = False -DatasetObject = DataStructure( +DATASET_OBJECT = DataStructure( name="Dataset", docstring_create=""" >>> ds = xr.Dataset(dict(da=da)) @@ -326,7 +327,7 @@ class DataStructure: numeric_only=False, ) -DatasetGenerator = GenericReductionGenerator( +DATASET_GENERATOR = GenericReductionGenerator( cls="", datastructure=DatasetObject, methods=REDUCTION_METHODS, From 1fcd0808d3f925ecd5bb4f03c94d70136a332487 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 8 Mar 2022 16:59:43 -0700 Subject: [PATCH 102/138] Fixes --- xarray/util/generate_reductions.py | 42 +++++++++++++++--------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 95b2fc640f9..e79c94e8907 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -121,7 +121,7 @@ def {method}( _NUMERIC_ONLY_NOTES = "Non-numeric variables will be removed prior to reducing." ExtraKwarg = collections.namedtuple("ExtraKwarg", "docs kwarg call example") -skipna = extra_kwarg( +skipna = ExtraKwarg( docs=_SKIPNA_DOCSTRING, kwarg="skipna: bool = None,", call="skipna=skipna,", @@ -130,7 +130,7 @@ def {method}( >>> {calculation}(skipna=False)""", ) -min_count = extra_kwarg( +min_count = ExtraKwarg( docs=_MINCOUNT_DOCSTRING, kwarg="min_count: Optional[int] = None,", call="min_count=min_count,", @@ -139,7 +139,7 @@ def {method}( >>> {calculation}(skipna=True, min_count=2)""", ) -ddof = extra_kwarg( +ddof = ExtraKwarg( docs=_DDOF_DOCSTRING, kwarg="ddof: int = 0,", call="ddof=ddof,", @@ -319,7 +319,7 @@ class DataStructure: example_var_name="ds", numeric_only=True, ) -DataArrayObject = DataStructure( +DATAARRAY_OBJECT = DataStructure( name="DataArray", docstring_create=""" >>> da""", @@ -329,16 +329,16 @@ class DataStructure: DATASET_GENERATOR = GenericReductionGenerator( cls="", - datastructure=DatasetObject, + datastructure=DATASET_OBJECT, methods=REDUCTION_METHODS, docref="agg", docref_description="reduction or aggregation operations", example_call_preamble="", see_also_obj="DataArray", ) -DataArrayGenerator = GenericReductionGenerator( +DATAARRAY_GENERATOR = GenericReductionGenerator( cls="", - datastructure=DataArrayObject, + datastructure=DATAARRAY_OBJECT, methods=REDUCTION_METHODS, docref="agg", docref_description="reduction or aggregation operations", @@ -346,33 +346,33 @@ class DataStructure: see_also_obj="Dataset", ) -DataArrayGroupByGenerator = GenericReductionGenerator( +DATAARRAY_GROUPBY_GENERATOR = GenericReductionGenerator( cls="GroupBy", - datastructure=DataArrayObject, + datastructure=DATAARRAY_OBJECT, methods=REDUCTION_METHODS, docref="groupby", docref_description="groupby operations", example_call_preamble='.groupby("labels")', ) -DataArrayResampleGenerator = GenericReductionGenerator( +DATAARRAY_RESAMPLE_GENERATOR = GenericReductionGenerator( cls="Resample", - datastructure=DataArrayObject, + datastructure=DATAARRAY_OBJECT, methods=REDUCTION_METHODS, docref="resampling", docref_description="resampling operations", example_call_preamble='.resample(time="3M")', ) -DatasetGroupByGenerator = GenericReductionGenerator( +DATASET_GROUPBY_GENERATOR = GenericReductionGenerator( cls="GroupBy", - datastructure=DatasetObject, + datastructure=DATASET_OBJECT, methods=REDUCTION_METHODS, docref="groupby", docref_description="groupby operations", example_call_preamble='.groupby("labels")', ) -DatasetResampleGenerator = GenericReductionGenerator( +DATASET_RESAMPLE_GENERATOR = GenericReductionGenerator( cls="Resample", - datastructure=DatasetObject, + datastructure=DATASET_OBJECT, methods=REDUCTION_METHODS, docref="resampling", docref_description="resampling operations", @@ -389,12 +389,12 @@ class DataStructure: with open(filepath, mode="w", encoding="utf-8") as f: f.write(MODULE_PREAMBLE + "\n") for gen in [ - DatasetGenerator, - DataArrayGenerator, - DatasetGroupByGenerator, - DatasetResampleGenerator, - DataArrayGroupByGenerator, - DataArrayResampleGenerator, + DATASET_GENERATOR, + DATAARRAY_GENERATOR, + DATASET_GROUPBY_GENERATOR, + DATASET_RESAMPLE_GENERATOR, + DATAARRAY_GROUPBY_GENERATOR, + DATAARRAY_RESAMPLE_GENERATOR, ]: for lines in gen.generate_methods(): for line in lines: From d5f627cf67204b78957a3bbc6147c455e54d080b Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 8 Mar 2022 17:48:00 -0700 Subject: [PATCH 103/138] update _reductions --- xarray/core/_reductions.py | 3527 +++++++++++++++++++++++----- xarray/util/generate_reductions.py | 1 + 2 files changed, 2890 insertions(+), 638 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 83aaa10a20c..31365f39e65 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -1,43 +1,1975 @@ """Mixin classes with reduction operations.""" # This file was generated using xarray.util.generate_reductions. Do not edit manually. -from typing import Any, Callable, Hashable, Optional, Protocol, Sequence, Union +from typing import TYPE_CHECKING, Any, Callable, Hashable, Optional, Sequence, Union from . import duck_array_ops -from .types import T_DataArray, T_Dataset +if TYPE_CHECKING: + from .dataarray import DataArray + from .dataset import Dataset + + +class DatasetReductions: + __slots__ = () + + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> "Dataset": + raise NotImplementedError() + + def count( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + keep_attrs: bool = None, + **kwargs, + ) -> "Dataset": + """ + Reduce this Dataset's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``count`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.count + dask.array.count + DataArray.count + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.count() + + Dimensions: () + Data variables: + da int64 5 + """ + return self.reduce( + duck_array_ops.count, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def all( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + keep_attrs: bool = None, + **kwargs, + ) -> "Dataset": + """ + Reduce this Dataset's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``all`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.all + dask.array.all + DataArray.all + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.all() + + Dimensions: () + Data variables: + da bool False + """ + return self.reduce( + duck_array_ops.array_all, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def any( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + keep_attrs: bool = None, + **kwargs, + ) -> "Dataset": + """ + Reduce this Dataset's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``any`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.any + dask.array.any + DataArray.any + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.any() + + Dimensions: () + Data variables: + da bool True + """ + return self.reduce( + duck_array_ops.array_any, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def max( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> "Dataset": + """ + Reduce this Dataset's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``max`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.max + dask.array.max + DataArray.max + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.max() + + Dimensions: () + Data variables: + da float64 3.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.max(skipna=False) + + Dimensions: () + Data variables: + da float64 nan + """ + return self.reduce( + duck_array_ops.max, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def min( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> "Dataset": + """ + Reduce this Dataset's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``min`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.min + dask.array.min + DataArray.min + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.min() + + Dimensions: () + Data variables: + da float64 1.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.min(skipna=False) + + Dimensions: () + Data variables: + da float64 nan + """ + return self.reduce( + duck_array_ops.min, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def mean( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> "Dataset": + """ + Reduce this Dataset's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``mean`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.mean + dask.array.mean + DataArray.mean + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.mean() + + Dimensions: () + Data variables: + da float64 1.8 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.mean(skipna=False) + + Dimensions: () + Data variables: + da float64 nan + """ + return self.reduce( + duck_array_ops.mean, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def prod( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + min_count: Optional[int] = None, + keep_attrs: bool = None, + **kwargs, + ) -> "Dataset": + """ + Reduce this Dataset's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``prod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.prod + dask.array.prod + DataArray.prod + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.prod() + + Dimensions: () + Data variables: + da float64 12.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.prod(skipna=False) + + Dimensions: () + Data variables: + da float64 nan + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> ds.prod(skipna=True, min_count=2) + + Dimensions: () + Data variables: + da float64 12.0 + """ + return self.reduce( + duck_array_ops.prod, + dim=dim, + skipna=skipna, + min_count=min_count, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def sum( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + min_count: Optional[int] = None, + keep_attrs: bool = None, + **kwargs, + ) -> "Dataset": + """ + Reduce this Dataset's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``sum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.sum + dask.array.sum + DataArray.sum + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.sum() + + Dimensions: () + Data variables: + da float64 9.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.sum(skipna=False) + + Dimensions: () + Data variables: + da float64 nan + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> ds.sum(skipna=True, min_count=2) + + Dimensions: () + Data variables: + da float64 9.0 + """ + return self.reduce( + duck_array_ops.sum, + dim=dim, + skipna=skipna, + min_count=min_count, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def std( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + ddof: int = 0, + keep_attrs: bool = None, + **kwargs, + ) -> "Dataset": + """ + Reduce this Dataset's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``std`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.std + dask.array.std + DataArray.std + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.std() + + Dimensions: () + Data variables: + da float64 0.7483 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.std(skipna=False) + + Dimensions: () + Data variables: + da float64 nan + + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.std(skipna=True, ddof=1) + + Dimensions: () + Data variables: + da float64 0.8367 + """ + return self.reduce( + duck_array_ops.std, + dim=dim, + skipna=skipna, + ddof=ddof, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def var( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + ddof: int = 0, + keep_attrs: bool = None, + **kwargs, + ) -> "Dataset": + """ + Reduce this Dataset's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``var`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.var + dask.array.var + DataArray.var + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.var() + + Dimensions: () + Data variables: + da float64 0.56 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.var(skipna=False) + + Dimensions: () + Data variables: + da float64 nan + + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.var(skipna=True, ddof=1) + + Dimensions: () + Data variables: + da float64 0.7 + """ + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + ddof=ddof, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def median( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> "Dataset": + """ + Reduce this Dataset's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : Dataset + New Dataset with ``median`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.median + dask.array.median + DataArray.median + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.median() + + Dimensions: () + Data variables: + da float64 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + + >>> ds.median(skipna=False) + + Dimensions: () + Data variables: + da float64 nan + """ + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + +class DataArrayReductions: + __slots__ = () + + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> "DataArray": + raise NotImplementedError() + + def count( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + keep_attrs: bool = None, + **kwargs, + ) -> "DataArray": + """ + Reduce this DataArray's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``count`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.count + dask.array.count + Dataset.count + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.count() + + array(5) + """ + return self.reduce( + duck_array_ops.count, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def all( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + keep_attrs: bool = None, + **kwargs, + ) -> "DataArray": + """ + Reduce this DataArray's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``all`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.all + dask.array.all + Dataset.all + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.all() + + array(False) + """ + return self.reduce( + duck_array_ops.array_all, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def any( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + keep_attrs: bool = None, + **kwargs, + ) -> "DataArray": + """ + Reduce this DataArray's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``any`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.any + dask.array.any + Dataset.any + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.any() + + array(True) + """ + return self.reduce( + duck_array_ops.array_any, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def max( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> "DataArray": + """ + Reduce this DataArray's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``max`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.max + dask.array.max + Dataset.max + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.max() + + array(3.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.max(skipna=False) + + array(nan) + """ + return self.reduce( + duck_array_ops.max, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def min( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> "DataArray": + """ + Reduce this DataArray's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``min`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.min + dask.array.min + Dataset.min + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.min() + + array(1.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.min(skipna=False) + + array(nan) + """ + return self.reduce( + duck_array_ops.min, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def mean( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> "DataArray": + """ + Reduce this DataArray's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``mean`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.mean + dask.array.mean + Dataset.mean + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.mean() + + array(1.8) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.mean(skipna=False) + + array(nan) + """ + return self.reduce( + duck_array_ops.mean, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def prod( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + min_count: Optional[int] = None, + keep_attrs: bool = None, + **kwargs, + ) -> "DataArray": + """ + Reduce this DataArray's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``prod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.prod + dask.array.prod + Dataset.prod + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.prod() + + array(12.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.prod(skipna=False) + + array(nan) + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> da.prod(skipna=True, min_count=2) + + array(12.) + """ + return self.reduce( + duck_array_ops.prod, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) + + def sum( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + min_count: Optional[int] = None, + keep_attrs: bool = None, + **kwargs, + ) -> "DataArray": + """ + Reduce this DataArray's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``sum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.sum + dask.array.sum + Dataset.sum + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.sum() + + array(9.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.sum(skipna=False) + + array(nan) + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> da.sum(skipna=True, min_count=2) + + array(9.) + """ + return self.reduce( + duck_array_ops.sum, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) + + def std( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + ddof: int = 0, + keep_attrs: bool = None, + **kwargs, + ) -> "DataArray": + """ + Reduce this DataArray's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``std`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.std + dask.array.std + Dataset.std + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.std() + + array(0.74833148) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.std(skipna=False) + + array(nan) + + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.std(skipna=True, ddof=1) + + array(0.83666003) + """ + return self.reduce( + duck_array_ops.std, + dim=dim, + skipna=skipna, + ddof=ddof, + keep_attrs=keep_attrs, + **kwargs, + ) + + def var( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + ddof: int = 0, + keep_attrs: bool = None, + **kwargs, + ) -> "DataArray": + """ + Reduce this DataArray's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``var`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.var + dask.array.var + Dataset.var + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.var() + + array(0.56) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.var(skipna=False) + + array(nan) + + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.var(skipna=True, ddof=1) + + array(0.7) + """ + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + ddof=ddof, + keep_attrs=keep_attrs, + **kwargs, + ) + + def median( + self, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + skipna: bool = None, + keep_attrs: bool = None, + **kwargs, + ) -> "DataArray": + """ + Reduce this DataArray's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, default: None + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : DataArray + New DataArray with ``median`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.median + dask.array.median + Dataset.median + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan]), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.median() + + array(2.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> da.median(skipna=False) + + array(nan) + """ + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + +class DatasetGroupByReductions: + __slots__ = () -class DatasetReduce(Protocol): def reduce( self, func: Callable[..., Any], dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, axis: Union[None, int, Sequence[int]] = None, keep_attrs: bool = None, keepdims: bool = False, **kwargs: Any, - ) -> T_Dataset: - ... - - -class DatasetGroupByReductions: - __slots__ = () + ) -> "Dataset": + raise NotImplementedError() def count( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``count`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -45,6 +1977,7 @@ def count( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -52,6 +1985,14 @@ def count( New Dataset with ``count`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.count + dask.array.count + Dataset.count + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -79,13 +2020,6 @@ def count( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) int64 1 2 2 - - See Also - -------- - numpy.count - Dataset.count - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.count, @@ -96,20 +2030,20 @@ def count( ) def all( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``all`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -117,6 +2051,7 @@ def all( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -124,6 +2059,14 @@ def all( New Dataset with ``all`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.all + dask.array.all + Dataset.all + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -151,13 +2094,6 @@ def all( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) bool False True True - - See Also - -------- - numpy.all - Dataset.all - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.array_all, @@ -168,20 +2104,20 @@ def all( ) def any( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``any`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -189,6 +2125,7 @@ def any( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -196,6 +2133,14 @@ def any( New Dataset with ``any`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.any + dask.array.any + Dataset.any + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -223,13 +2168,6 @@ def any( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) bool True True True - - See Also - -------- - numpy.any - Dataset.any - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.array_any, @@ -240,25 +2178,25 @@ def any( ) def max( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``max`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -267,6 +2205,7 @@ def max( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -274,6 +2213,14 @@ def max( New Dataset with ``max`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.max + dask.array.max + Dataset.max + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -311,13 +2258,6 @@ def max( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) float64 nan 2.0 3.0 - - See Also - -------- - numpy.max - Dataset.max - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.max, @@ -329,25 +2269,25 @@ def max( ) def min( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``min`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -356,6 +2296,7 @@ def min( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -363,6 +2304,14 @@ def min( New Dataset with ``min`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.min + dask.array.min + Dataset.min + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -400,13 +2349,6 @@ def min( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) float64 nan 2.0 1.0 - - See Also - -------- - numpy.min - Dataset.min - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.min, @@ -418,25 +2360,25 @@ def min( ) def mean( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``mean`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -445,6 +2387,7 @@ def mean( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -452,6 +2395,18 @@ def mean( New Dataset with ``mean`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.mean + dask.array.mean + Dataset.mean + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -489,13 +2444,6 @@ def mean( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) float64 nan 2.0 2.0 - - See Also - -------- - numpy.mean - Dataset.mean - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.mean, @@ -507,26 +2455,26 @@ def mean( ) def prod( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``prod`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -541,6 +2489,7 @@ def prod( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -548,6 +2497,18 @@ def prod( New Dataset with ``prod`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.prod + dask.array.prod + Dataset.prod + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -595,13 +2556,6 @@ def prod( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) float64 nan 4.0 3.0 - - See Also - -------- - numpy.prod - Dataset.prod - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.prod, @@ -614,26 +2568,26 @@ def prod( ) def sum( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``sum`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -648,6 +2602,7 @@ def sum( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -655,6 +2610,18 @@ def sum( New Dataset with ``sum`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.sum + dask.array.sum + Dataset.sum + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -702,13 +2669,6 @@ def sum( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) float64 nan 4.0 4.0 - - See Also - -------- - numpy.sum - Dataset.sum - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.sum, @@ -721,26 +2681,30 @@ def sum( ) def std( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, + ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``std`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -748,6 +2712,7 @@ def std( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -755,6 +2720,18 @@ def std( New Dataset with ``std`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.std + dask.array.std + Dataset.std + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -793,43 +2770,51 @@ def std( Data variables: da (labels) float64 nan 0.0 1.0 - See Also - -------- - numpy.std - Dataset.std - :ref:`groupby` - User guide on groupby operations. + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.groupby("labels").std(skipna=True, ddof=1) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 1.414 """ return self.reduce( duck_array_ops.std, dim=dim, skipna=skipna, + ddof=ddof, numeric_only=True, keep_attrs=keep_attrs, **kwargs, ) def var( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, + ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``var`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -837,6 +2822,7 @@ def var( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -844,6 +2830,18 @@ def var( New Dataset with ``var`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.var + dask.array.var + Dataset.var + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -882,42 +2880,46 @@ def var( Data variables: da (labels) float64 nan 0.0 1.0 - See Also - -------- - numpy.var - Dataset.var - :ref:`groupby` - User guide on groupby operations. + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.groupby("labels").var(skipna=True, ddof=1) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 2.0 """ return self.reduce( duck_array_ops.var, dim=dim, skipna=skipna, + ddof=ddof, numeric_only=True, keep_attrs=keep_attrs, **kwargs, ) def median( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``median`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -926,6 +2928,7 @@ def median( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -933,6 +2936,18 @@ def median( New Dataset with ``median`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.median + dask.array.median + Dataset.median + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -970,13 +2985,6 @@ def median( * labels (labels) object 'a' 'b' 'c' Data variables: da (labels) float64 nan 2.0 2.0 - - See Also - -------- - numpy.median - Dataset.median - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.median, @@ -991,21 +2999,33 @@ def median( class DatasetResampleReductions: __slots__ = () + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> "Dataset": + raise NotImplementedError() + def count( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``count`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -1013,6 +3033,7 @@ def count( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1020,6 +3041,14 @@ def count( New Dataset with ``count`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.count + dask.array.count + Dataset.count + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -1047,13 +3076,6 @@ def count( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) int64 1 3 1 - - See Also - -------- - numpy.count - Dataset.count - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.count, @@ -1064,20 +3086,20 @@ def count( ) def all( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``all`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -1085,6 +3107,7 @@ def all( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1092,6 +3115,14 @@ def all( New Dataset with ``all`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.all + dask.array.all + Dataset.all + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -1119,13 +3150,6 @@ def all( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) bool True True False - - See Also - -------- - numpy.all - Dataset.all - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.array_all, @@ -1136,20 +3160,20 @@ def all( ) def any( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``any`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -1157,6 +3181,7 @@ def any( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1164,6 +3189,14 @@ def any( New Dataset with ``any`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.any + dask.array.any + Dataset.any + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -1185,19 +3218,12 @@ def any( da (time) bool True True True True True False >>> ds.resample(time="3M").any() - - Dimensions: (time: 3) - Coordinates: - * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - Data variables: - da (time) bool True True True - - See Also - -------- - numpy.any - Dataset.any - :ref:`resampling` - User guide on resampling operations. + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) bool True True True """ return self.reduce( duck_array_ops.array_any, @@ -1208,25 +3234,25 @@ def any( ) def max( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``max`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -1235,6 +3261,7 @@ def max( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1242,6 +3269,14 @@ def max( New Dataset with ``max`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.max + dask.array.max + Dataset.max + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -1279,13 +3314,6 @@ def max( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) float64 1.0 3.0 nan - - See Also - -------- - numpy.max - Dataset.max - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.max, @@ -1297,25 +3325,25 @@ def max( ) def min( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``min`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -1324,6 +3352,7 @@ def min( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1331,6 +3360,14 @@ def min( New Dataset with ``min`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.min + dask.array.min + Dataset.min + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -1368,13 +3405,6 @@ def min( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) float64 1.0 1.0 nan - - See Also - -------- - numpy.min - Dataset.min - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.min, @@ -1386,25 +3416,25 @@ def min( ) def mean( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``mean`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -1413,6 +3443,7 @@ def mean( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1420,6 +3451,18 @@ def mean( New Dataset with ``mean`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.mean + dask.array.mean + Dataset.mean + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -1457,13 +3500,6 @@ def mean( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) float64 1.0 2.0 nan - - See Also - -------- - numpy.mean - Dataset.mean - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.mean, @@ -1475,26 +3511,26 @@ def mean( ) def prod( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``prod`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -1509,6 +3545,7 @@ def prod( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1516,6 +3553,18 @@ def prod( New Dataset with ``prod`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.prod + dask.array.prod + Dataset.prod + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -1563,13 +3612,6 @@ def prod( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) float64 nan 6.0 nan - - See Also - -------- - numpy.prod - Dataset.prod - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.prod, @@ -1582,26 +3624,26 @@ def prod( ) def sum( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``sum`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -1616,6 +3658,7 @@ def sum( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1623,6 +3666,18 @@ def sum( New Dataset with ``sum`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.sum + dask.array.sum + Dataset.sum + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -1670,13 +3725,6 @@ def sum( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) float64 nan 6.0 nan - - See Also - -------- - numpy.sum - Dataset.sum - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.sum, @@ -1689,26 +3737,30 @@ def sum( ) def std( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, + ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``std`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -1716,6 +3768,7 @@ def std( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1723,6 +3776,18 @@ def std( New Dataset with ``std`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.std + dask.array.std + Dataset.std + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -1761,43 +3826,51 @@ def std( Data variables: da (time) float64 0.0 0.8165 nan - See Also - -------- - numpy.std - Dataset.std - :ref:`resampling` - User guide on resampling operations. + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.resample(time="3M").std(skipna=True, ddof=1) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 nan 1.0 nan """ return self.reduce( duck_array_ops.std, dim=dim, skipna=skipna, + ddof=ddof, numeric_only=True, keep_attrs=keep_attrs, **kwargs, ) def var( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, + ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``var`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -1805,6 +3878,7 @@ def var( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1812,6 +3886,18 @@ def var( New Dataset with ``var`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.var + dask.array.var + Dataset.var + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -1850,42 +3936,46 @@ def var( Data variables: da (time) float64 0.0 0.6667 nan - See Also - -------- - numpy.var - Dataset.var - :ref:`resampling` - User guide on resampling operations. + Specify ``ddof=1`` for an unbiased estimate. + + >>> ds.resample(time="3M").var(skipna=True, ddof=1) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 nan 1.0 nan """ return self.reduce( duck_array_ops.var, dim=dim, skipna=skipna, + ddof=ddof, numeric_only=True, keep_attrs=keep_attrs, **kwargs, ) def median( - self: DatasetReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_Dataset: + ) -> "Dataset": """ Reduce this Dataset's data by applying ``median`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -1894,6 +3984,7 @@ def median( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -1901,6 +3992,18 @@ def median( New Dataset with ``median`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.median + dask.array.median + Dataset.median + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -1938,13 +4041,6 @@ def median( * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: da (time) float64 1.0 2.0 nan - - See Also - -------- - numpy.median - Dataset.median - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.median, @@ -1956,37 +4052,36 @@ def median( ) -class DataArrayReduce(Protocol): +class DataArrayGroupByReductions: + __slots__ = () + def reduce( self, func: Callable[..., Any], dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, axis: Union[None, int, Sequence[int]] = None, keep_attrs: bool = None, keepdims: bool = False, **kwargs: Any, - ) -> T_DataArray: - ... - - -class DataArrayGroupByReductions: - __slots__ = () + ) -> "DataArray": + raise NotImplementedError() def count( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``count`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -1994,6 +4089,7 @@ def count( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2001,6 +4097,14 @@ def count( New DataArray with ``count`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.count + dask.array.count + DataArray.count + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -2023,13 +4127,6 @@ def count( array([1, 2, 2]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.count - DataArray.count - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.count, @@ -2039,20 +4136,20 @@ def count( ) def all( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``all`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2060,6 +4157,7 @@ def all( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2067,6 +4165,14 @@ def all( New DataArray with ``all`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.all + dask.array.all + DataArray.all + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -2089,13 +4195,6 @@ def all( array([False, True, True]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.all - DataArray.all - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.array_all, @@ -2105,20 +4204,20 @@ def all( ) def any( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``any`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2126,6 +4225,7 @@ def any( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2133,6 +4233,14 @@ def any( New DataArray with ``any`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.any + dask.array.any + DataArray.any + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -2155,13 +4263,6 @@ def any( array([ True, True, True]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.any - DataArray.any - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.array_any, @@ -2171,25 +4272,25 @@ def any( ) def max( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``max`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -2198,6 +4299,7 @@ def max( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2205,6 +4307,14 @@ def max( New DataArray with ``max`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.max + dask.array.max + DataArray.max + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -2235,13 +4345,6 @@ def max( array([nan, 2., 3.]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.max - DataArray.max - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.max, @@ -2252,25 +4355,25 @@ def max( ) def min( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``min`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -2279,6 +4382,7 @@ def min( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2286,6 +4390,14 @@ def min( New DataArray with ``min`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.min + dask.array.min + DataArray.min + :ref:`groupby` + User guide on groupby operations. + Examples -------- >>> da = xr.DataArray( @@ -2316,13 +4428,6 @@ def min( array([nan, 2., 1.]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.min - DataArray.min - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.min, @@ -2333,25 +4438,25 @@ def min( ) def mean( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``mean`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -2360,6 +4465,7 @@ def mean( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2367,6 +4473,18 @@ def mean( New DataArray with ``mean`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.mean + dask.array.mean + DataArray.mean + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -2397,13 +4515,6 @@ def mean( array([nan, 2., 2.]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.mean - DataArray.mean - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.mean, @@ -2414,26 +4525,26 @@ def mean( ) def prod( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``prod`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -2448,6 +4559,7 @@ def prod( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2455,6 +4567,18 @@ def prod( New DataArray with ``prod`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.prod + dask.array.prod + DataArray.prod + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -2493,13 +4617,6 @@ def prod( array([nan, 4., 3.]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.prod - DataArray.prod - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.prod, @@ -2511,26 +4628,26 @@ def prod( ) def sum( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``sum`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -2545,6 +4662,7 @@ def sum( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2552,6 +4670,18 @@ def sum( New DataArray with ``sum`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.sum + dask.array.sum + DataArray.sum + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -2590,13 +4720,6 @@ def sum( array([nan, 4., 4.]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.sum - DataArray.sum - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.sum, @@ -2608,26 +4731,30 @@ def sum( ) def std( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, + ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``std`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2635,6 +4762,7 @@ def std( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2642,6 +4770,18 @@ def std( New DataArray with ``std`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.std + dask.array.std + DataArray.std + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -2673,42 +4813,48 @@ def std( Coordinates: * labels (labels) object 'a' 'b' 'c' - See Also - -------- - numpy.std - DataArray.std - :ref:`groupby` - User guide on groupby operations. + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.groupby("labels").std(skipna=True, ddof=1) + + array([ nan, 0. , 1.41421356]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.std, dim=dim, skipna=skipna, + ddof=ddof, keep_attrs=keep_attrs, **kwargs, ) def var( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, + ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``var`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2716,6 +4862,7 @@ def var( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2723,6 +4870,18 @@ def var( New DataArray with ``var`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.var + dask.array.var + DataArray.var + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -2754,41 +4913,43 @@ def var( Coordinates: * labels (labels) object 'a' 'b' 'c' - See Also - -------- - numpy.var - DataArray.var - :ref:`groupby` - User guide on groupby operations. + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.groupby("labels").var(skipna=True, ddof=1) + + array([nan, 0., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' """ return self.reduce( duck_array_ops.var, dim=dim, skipna=skipna, + ddof=ddof, keep_attrs=keep_attrs, **kwargs, ) def median( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``median`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -2797,6 +4958,7 @@ def median( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2804,6 +4966,18 @@ def median( New DataArray with ``median`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.median + dask.array.median + DataArray.median + :ref:`groupby` + User guide on groupby operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -2834,13 +5008,6 @@ def median( array([nan, 2., 2.]) Coordinates: * labels (labels) object 'a' 'b' 'c' - - See Also - -------- - numpy.median - DataArray.median - :ref:`groupby` - User guide on groupby operations. """ return self.reduce( duck_array_ops.median, @@ -2854,21 +5021,33 @@ def median( class DataArrayResampleReductions: __slots__ = () + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> "DataArray": + raise NotImplementedError() + def count( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``count`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2876,6 +5055,7 @@ def count( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2883,6 +5063,14 @@ def count( New DataArray with ``count`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.count + dask.array.count + DataArray.count + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -2905,13 +5093,6 @@ def count( array([1, 3, 1]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.count - DataArray.count - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.count, @@ -2921,20 +5102,20 @@ def count( ) def all( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``all`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2942,6 +5123,7 @@ def all( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -2949,6 +5131,14 @@ def all( New DataArray with ``all`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.all + dask.array.all + DataArray.all + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -2971,13 +5161,6 @@ def all( array([ True, True, False]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.all - DataArray.all - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.array_all, @@ -2987,20 +5170,20 @@ def all( ) def any( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``any`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -3008,6 +5191,7 @@ def any( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3015,6 +5199,14 @@ def any( New DataArray with ``any`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.any + dask.array.any + DataArray.any + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -3037,13 +5229,6 @@ def any( array([ True, True, True]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.any - DataArray.any - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.array_any, @@ -3053,25 +5238,25 @@ def any( ) def max( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``max`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -3080,6 +5265,7 @@ def max( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3087,6 +5273,14 @@ def max( New DataArray with ``max`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.max + dask.array.max + DataArray.max + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -3117,13 +5311,6 @@ def max( array([ 1., 3., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.max - DataArray.max - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.max, @@ -3134,25 +5321,25 @@ def max( ) def min( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``min`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -3161,6 +5348,7 @@ def min( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3168,6 +5356,14 @@ def min( New DataArray with ``min`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.min + dask.array.min + DataArray.min + :ref:`resampling` + User guide on resampling operations. + Examples -------- >>> da = xr.DataArray( @@ -3198,13 +5394,6 @@ def min( array([ 1., 1., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.min - DataArray.min - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.min, @@ -3215,25 +5404,25 @@ def min( ) def mean( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``mean`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -3242,6 +5431,7 @@ def mean( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3249,6 +5439,18 @@ def mean( New DataArray with ``mean`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.mean + dask.array.mean + DataArray.mean + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -3279,13 +5481,6 @@ def mean( array([ 1., 2., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.mean - DataArray.mean - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.mean, @@ -3296,26 +5491,26 @@ def mean( ) def prod( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``prod`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -3330,6 +5525,7 @@ def prod( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3337,6 +5533,18 @@ def prod( New DataArray with ``prod`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.prod + dask.array.prod + DataArray.prod + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -3375,13 +5583,6 @@ def prod( array([nan, 6., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.prod - DataArray.prod - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.prod, @@ -3393,26 +5594,26 @@ def prod( ) def sum( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, min_count: Optional[int] = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``sum`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If @@ -3427,6 +5628,7 @@ def sum( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3434,6 +5636,18 @@ def sum( New DataArray with ``sum`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.sum + dask.array.sum + DataArray.sum + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -3472,13 +5686,6 @@ def sum( array([nan, 6., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.sum - DataArray.sum - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.sum, @@ -3490,26 +5697,30 @@ def sum( ) def std( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, + ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``std`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -3517,6 +5728,7 @@ def std( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3524,6 +5736,18 @@ def std( New DataArray with ``std`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.std + dask.array.std + DataArray.std + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -3555,42 +5779,48 @@ def std( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - See Also - -------- - numpy.std - DataArray.std - :ref:`resampling` - User guide on resampling operations. + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.resample(time="3M").std(skipna=True, ddof=1) + + array([nan, 1., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.std, dim=dim, skipna=skipna, + ddof=ddof, keep_attrs=keep_attrs, **kwargs, ) def var( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, + ddof: int = 0, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``var`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -3598,6 +5828,7 @@ def var( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3605,6 +5836,18 @@ def var( New DataArray with ``var`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.var + dask.array.var + DataArray.var + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -3636,41 +5879,43 @@ def var( Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - See Also - -------- - numpy.var - DataArray.var - :ref:`resampling` - User guide on resampling operations. + Specify ``ddof=1`` for an unbiased estimate. + + >>> da.resample(time="3M").var(skipna=True, ddof=1) + + array([nan, 1., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 """ return self.reduce( duck_array_ops.var, dim=dim, skipna=skipna, + ddof=ddof, keep_attrs=keep_attrs, **kwargs, ) def median( - self: DataArrayReduce, + self, dim: Union[None, Hashable, Sequence[Hashable]] = None, - skipna: bool = True, + *, + skipna: bool = None, keep_attrs: bool = None, **kwargs, - ) -> T_DataArray: + ) -> "DataArray": """ Reduce this DataArray's data by applying ``median`` along some dimension(s). Parameters ---------- - dim : hashable or iterable of hashable, optional + dim : hashable or iterable of hashable, default: None Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` - or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions - present in the grouped variable. - skipna : bool, optional + or ``dim=["x", "y"]``. If None, will reduce over all dimensions. + skipna : bool, default: None If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been + have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional If True, ``attrs`` will be copied from the original @@ -3679,6 +5924,7 @@ def median( **kwargs : dict Additional keyword arguments passed on to the appropriate array function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. Returns ------- @@ -3686,6 +5932,18 @@ def median( New DataArray with ``median`` applied to its data and the indicated dimension(s) removed + See Also + -------- + numpy.median + dask.array.median + DataArray.median + :ref:`resampling` + User guide on resampling operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + Examples -------- >>> da = xr.DataArray( @@ -3716,13 +5974,6 @@ def median( array([ 1., 2., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 - - See Also - -------- - numpy.median - DataArray.median - :ref:`resampling` - User guide on resampling operations. """ return self.reduce( duck_array_ops.median, diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 2e5d8348c85..e79c94e8907 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -30,6 +30,7 @@ CLASS_PREAMBLE = """ + class {obj}{cls}Reductions: __slots__ = () From 434db03e2f0d8e2c30b991c0866b80eb46348621 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 10 Mar 2022 09:47:58 -0700 Subject: [PATCH 104/138] polish --- xarray/util/generate_reductions.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index e79c94e8907..1e10902812e 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -113,11 +113,7 @@ def {method}( These could include dask-specific kwargs like ``split_every``.""" NAN_CUM_METHODS = ["cumsum", "cumprod"] - -NUMERIC_ONLY_METHODS = [ - "cumsum", - "cumprod", -] +NUMERIC_ONLY_METHODS = ["cumsum", "cumprod"] _NUMERIC_ONLY_NOTES = "Non-numeric variables will be removed prior to reducing." ExtraKwarg = collections.namedtuple("ExtraKwarg", "docs kwarg call example") From a1769baf158629e84b6258069a22904de8f37fc4 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 13 Mar 2022 09:34:58 +0530 Subject: [PATCH 105/138] polish --- xarray/util/generate_reductions.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 42175215cdb..2906f40b6c6 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -35,7 +35,6 @@ except ImportError: flox = None''' - DEFAULT_PREAMBLE = """ class {obj}{cls}Reductions: @@ -133,7 +132,6 @@ def {method}( “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, where ``N`` represents the number of elements.""" - _KEEP_ATTRS_DOCSTRING = """keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -289,6 +287,7 @@ def generate_example(self, method): ).format(calculation=calculation, method=method.name) else: extra_examples = "" + return f""" Examples --------{create_da}{self.datastructure.docstring_create} @@ -421,7 +420,6 @@ class DataStructure: see_also_obj="Dataset", definition_preamble=DEFAULT_PREAMBLE, ) - DATAARRAY_GROUPBY_GENERATOR = GroupByReductionGenerator( cls="GroupBy", datastructure=DATAARRAY_OBJECT, @@ -441,7 +439,6 @@ class DataStructure: example_call_preamble='.resample(time="3M")', definition_preamble=GROUPBY_PREAMBLE, ) - DATASET_GROUPBY_GENERATOR = GenericReductionGenerator( cls="GroupBy", datastructure=DATASET_OBJECT, From 26d85d5260a38e00f903e689b3819d51fb29f689 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 13 Mar 2022 09:43:45 +0530 Subject: [PATCH 106/138] loooser test --- xarray/tests/test_groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 024edf99510..41bd7631c22 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1914,7 +1914,7 @@ def test_resample_ds_da_are_the_same(self): "x": np.arange(5), } ) - assert_identical( + assert_allclose( ds.resample(time="M").mean()["foo"], ds.foo.resample(time="M").mean() ) From e4125831e4c86df4752666bf27344fce99c77b02 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 13 Mar 2022 11:10:30 +0530 Subject: [PATCH 107/138] Fix. --- xarray/core/_reductions.py | 546 ++++++++++++++++++++--------- xarray/util/generate_reductions.py | 5 +- 2 files changed, 386 insertions(+), 165 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 50bff78be28..7df2fc16746 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -2035,13 +2035,23 @@ def count( Data variables: da (labels) int64 1 2 2 """ - return self.reduce( - duck_array_ops.count, - dim=dim, - numeric_only=False, - keep_attrs=keep_attrs, - **kwargs, - ) + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): + return self._flox_reduce( + func="count", + dim=dim, + numeric_only=False, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.count, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) def all( self, @@ -2109,13 +2119,23 @@ def all( Data variables: da (labels) bool False True True """ - return self.reduce( - duck_array_ops.array_all, - dim=dim, - numeric_only=False, - keep_attrs=keep_attrs, - **kwargs, - ) + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): + return self._flox_reduce( + func="all", + dim=dim, + numeric_only=False, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.array_all, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) def any( self, @@ -2183,13 +2203,23 @@ def any( Data variables: da (labels) bool True True True """ - return self.reduce( - duck_array_ops.array_any, - dim=dim, - numeric_only=False, - keep_attrs=keep_attrs, - **kwargs, - ) + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): + return self._flox_reduce( + func="any", + dim=dim, + numeric_only=False, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.array_any, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) def max( self, @@ -2273,14 +2303,25 @@ def max( Data variables: da (labels) float64 nan 2.0 3.0 """ - return self.reduce( - duck_array_ops.max, - dim=dim, - skipna=skipna, - numeric_only=False, - keep_attrs=keep_attrs, - **kwargs, - ) + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): + return self._flox_reduce( + func="max", + dim=dim, + skipna=skipna, + numeric_only=False, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.max, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) def min( self, @@ -2364,14 +2405,25 @@ def min( Data variables: da (labels) float64 nan 2.0 1.0 """ - return self.reduce( - duck_array_ops.min, - dim=dim, - skipna=skipna, - numeric_only=False, - keep_attrs=keep_attrs, - **kwargs, - ) + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): + return self._flox_reduce( + func="min", + dim=dim, + skipna=skipna, + numeric_only=False, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.min, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) def mean( self, @@ -2459,14 +2511,25 @@ def mean( Data variables: da (labels) float64 nan 2.0 2.0 """ - return self.reduce( - duck_array_ops.mean, - dim=dim, - skipna=skipna, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): + return self._flox_reduce( + func="mean", + dim=dim, + skipna=skipna, + numeric_only=True, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.mean, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) def prod( self, @@ -2571,15 +2634,27 @@ def prod( Data variables: da (labels) float64 nan 4.0 3.0 """ - return self.reduce( - duck_array_ops.prod, - dim=dim, - skipna=skipna, - min_count=min_count, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): + return self._flox_reduce( + func="prod", + dim=dim, + skipna=skipna, + min_count=min_count, + numeric_only=True, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.prod, + dim=dim, + skipna=skipna, + min_count=min_count, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) def sum( self, @@ -2684,15 +2759,27 @@ def sum( Data variables: da (labels) float64 nan 4.0 4.0 """ - return self.reduce( - duck_array_ops.sum, - dim=dim, - skipna=skipna, - min_count=min_count, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): + return self._flox_reduce( + func="sum", + dim=dim, + skipna=skipna, + min_count=min_count, + numeric_only=True, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.sum, + dim=dim, + skipna=skipna, + min_count=min_count, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) def std( self, @@ -2794,15 +2881,27 @@ def std( Data variables: da (labels) float64 nan 0.0 1.414 """ - return self.reduce( - duck_array_ops.std, - dim=dim, - skipna=skipna, - ddof=ddof, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): + return self._flox_reduce( + func="std", + dim=dim, + skipna=skipna, + ddof=ddof, + numeric_only=True, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.std, + dim=dim, + skipna=skipna, + ddof=ddof, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) def var( self, @@ -2904,15 +3003,27 @@ def var( Data variables: da (labels) float64 nan 0.0 2.0 """ - return self.reduce( - duck_array_ops.var, - dim=dim, - skipna=skipna, - ddof=ddof, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): + return self._flox_reduce( + func="var", + dim=dim, + skipna=skipna, + ddof=ddof, + numeric_only=True, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + ddof=ddof, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) def median( self, @@ -3098,13 +3209,23 @@ def count( Data variables: da (time) int64 1 3 1 """ - return self.reduce( - duck_array_ops.count, - dim=dim, - numeric_only=False, - keep_attrs=keep_attrs, - **kwargs, - ) + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): + return self._flox_reduce( + func="count", + dim=dim, + numeric_only=False, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.count, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) def all( self, @@ -3172,13 +3293,23 @@ def all( Data variables: da (time) bool True True False """ - return self.reduce( - duck_array_ops.array_all, - dim=dim, - numeric_only=False, - keep_attrs=keep_attrs, - **kwargs, - ) + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): + return self._flox_reduce( + func="all", + dim=dim, + numeric_only=False, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.array_all, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) def any( self, @@ -3246,13 +3377,23 @@ def any( Data variables: da (time) bool True True True """ - return self.reduce( - duck_array_ops.array_any, - dim=dim, - numeric_only=False, - keep_attrs=keep_attrs, - **kwargs, - ) + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): + return self._flox_reduce( + func="any", + dim=dim, + numeric_only=False, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.array_any, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) def max( self, @@ -3336,14 +3477,25 @@ def max( Data variables: da (time) float64 1.0 3.0 nan """ - return self.reduce( - duck_array_ops.max, - dim=dim, - skipna=skipna, - numeric_only=False, - keep_attrs=keep_attrs, - **kwargs, - ) + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): + return self._flox_reduce( + func="max", + dim=dim, + skipna=skipna, + numeric_only=False, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.max, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) def min( self, @@ -3427,14 +3579,25 @@ def min( Data variables: da (time) float64 1.0 1.0 nan """ - return self.reduce( - duck_array_ops.min, - dim=dim, - skipna=skipna, - numeric_only=False, - keep_attrs=keep_attrs, - **kwargs, - ) + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): + return self._flox_reduce( + func="min", + dim=dim, + skipna=skipna, + numeric_only=False, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.min, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) def mean( self, @@ -3522,14 +3685,25 @@ def mean( Data variables: da (time) float64 1.0 2.0 nan """ - return self.reduce( - duck_array_ops.mean, - dim=dim, - skipna=skipna, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): + return self._flox_reduce( + func="mean", + dim=dim, + skipna=skipna, + numeric_only=True, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.mean, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) def prod( self, @@ -3634,15 +3808,27 @@ def prod( Data variables: da (time) float64 nan 6.0 nan """ - return self.reduce( - duck_array_ops.prod, - dim=dim, - skipna=skipna, - min_count=min_count, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): + return self._flox_reduce( + func="prod", + dim=dim, + skipna=skipna, + min_count=min_count, + numeric_only=True, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.prod, + dim=dim, + skipna=skipna, + min_count=min_count, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) def sum( self, @@ -3747,15 +3933,27 @@ def sum( Data variables: da (time) float64 nan 6.0 nan """ - return self.reduce( - duck_array_ops.sum, - dim=dim, - skipna=skipna, - min_count=min_count, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): + return self._flox_reduce( + func="sum", + dim=dim, + skipna=skipna, + min_count=min_count, + numeric_only=True, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.sum, + dim=dim, + skipna=skipna, + min_count=min_count, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) def std( self, @@ -3857,15 +4055,27 @@ def std( Data variables: da (time) float64 nan 1.0 nan """ - return self.reduce( - duck_array_ops.std, - dim=dim, - skipna=skipna, - ddof=ddof, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): + return self._flox_reduce( + func="std", + dim=dim, + skipna=skipna, + ddof=ddof, + numeric_only=True, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.std, + dim=dim, + skipna=skipna, + ddof=ddof, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) def var( self, @@ -3967,15 +4177,27 @@ def var( Data variables: da (time) float64 nan 1.0 nan """ - return self.reduce( - duck_array_ops.var, - dim=dim, - skipna=skipna, - ddof=ddof, - numeric_only=True, - keep_attrs=keep_attrs, - **kwargs, - ) + if flox and OPTIONS["use_flox"] and contains_only_dask_or_numpy(self._obj): + return self._flox_reduce( + func="var", + dim=dim, + skipna=skipna, + ddof=ddof, + numeric_only=True, + # fill_value=fill_value, + keep_attrs=keep_attrs, + **kwargs, + ) + else: + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + ddof=ddof, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) def median( self, diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 2906f40b6c6..0b51632343b 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -429,7 +429,6 @@ class DataStructure: example_call_preamble='.groupby("labels")', definition_preamble=GROUPBY_PREAMBLE, ) - DATAARRAY_RESAMPLE_GENERATOR = GroupByReductionGenerator( cls="Resample", datastructure=DATAARRAY_OBJECT, @@ -439,7 +438,7 @@ class DataStructure: example_call_preamble='.resample(time="3M")', definition_preamble=GROUPBY_PREAMBLE, ) -DATASET_GROUPBY_GENERATOR = GenericReductionGenerator( +DATASET_GROUPBY_GENERATOR = GroupByReductionGenerator( cls="GroupBy", datastructure=DATASET_OBJECT, methods=REDUCTION_METHODS, @@ -448,7 +447,7 @@ class DataStructure: example_call_preamble='.groupby("labels")', definition_preamble=GROUPBY_PREAMBLE, ) -DATASET_RESAMPLE_GENERATOR = GenericReductionGenerator( +DATASET_RESAMPLE_GENERATOR = GroupByReductionGenerator( cls="Resample", datastructure=DATASET_OBJECT, methods=REDUCTION_METHODS, From 2694dbea60310d83a02e91727c1de71f0eec4ec6 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 13 Mar 2022 11:10:49 +0530 Subject: [PATCH 108/138] Test flox kwargs --- xarray/tests/__init__.py | 2 ++ xarray/tests/test_groupby.py | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 00fec07f793..f71ed6082a4 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -78,6 +78,8 @@ def _importorskip(modname, minversion=None): has_cartopy, requires_cartopy = _importorskip("cartopy") has_pint, requires_pint = _importorskip("pint") has_numexpr, requires_numexpr = _importorskip("numexpr") +has_flox, requires_flox = _importorskip("flox") + # some special cases has_scipy_or_netCDF4 = has_scipy or has_netCDF4 diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 41bd7631c22..f119e706cde 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -17,6 +17,7 @@ assert_identical, create_test_data, requires_dask, + requires_flox, requires_scipy, ) @@ -928,6 +929,17 @@ def test_groupby_dataset_assign(): assert_identical(actual, expected) +@requires_flox +@pytest.mark.parametrize("kwargs", [{"method": "map-reduce"}, {"engine": "numpy"}]) +def test_groupby_flox_kwargs(kwargs): + ds = Dataset({"a": ("x", range(5))}, {"c": ("x", [0, 0, 1, 1, 1])}) + with xr.set_options(use_flox=False): + expected = ds.groupby("c").mean() + with xr.set_options(use_flox=True): + actual = ds.groupby("c").mean(**kwargs) + assert_identical(expected, actual) + + class TestDataArrayGroupBy: @pytest.fixture(autouse=True) def setup(self): From fd6aa17743cb360c6587309fa92ac0fd290a590b Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 30 Mar 2022 13:23:34 +0530 Subject: [PATCH 109/138] fix --- xarray/core/groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 893f0f01b7e..da86e21c244 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -350,7 +350,7 @@ def __init__( if bins is not None: if duck_array_ops.isnull(bins).all(): raise ValueError("All bin edges are NaN.") - binned, self._bins = pd.cut(group.values, bins, **cut_kwargs, retbins=True) + binned, bins = pd.cut(group.values, bins, **cut_kwargs, retbins=True) new_dim_name = group.name + "_bins" group = DataArray(binned, group.coords, name=new_dim_name) full_index = binned.categories From c176f8d21faa94929db1ee1609e9edc7a9556e41 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 30 Mar 2022 15:52:26 +0530 Subject: [PATCH 110/138] Test cleanup --- xarray/tests/test_groupby.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 182767ecc3a..0029d0401e2 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1028,19 +1028,22 @@ def test_groupby_properties(self): assert_array_equal(expected_groups[key], grouped.groups[key]) assert 3 == len(grouped) - def test_groupby_map_identity(self): + @pytest.mark.parametrize( + "by, use_da", [("x", False), ("y", False), ("y", True), ("abc", False)] + ) + @pytest.mark.parametrize("shortcut", [True, False]) + @pytest.mark.parametrize("squeeze", [True, False]) + def test_groupby_map_identity(self, by, use_da, shortcut, squeeze) -> None: expected = self.da - idx = expected.coords["y"] + if use_da: + by = expected.coords[by] def identity(x): return x - for g in ["x", "y", "abc", idx]: - for shortcut in [False, True]: - for squeeze in [False, True]: - grouped = expected.groupby(g, squeeze=squeeze) - actual = grouped.map(identity, shortcut=shortcut) - assert_identical(expected, actual) + grouped = expected.groupby(by, squeeze=squeeze) + actual = grouped.map(identity, shortcut=shortcut) + assert_identical(expected, actual) def test_groupby_sum(self): array = self.da From 9d4ee11bb6be58b00a382e6af7e7f6b781d3e80d Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sun, 10 Apr 2022 08:25:20 -0600 Subject: [PATCH 111/138] [skip-ci] Apply suggestions from code review Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> --- setup.cfg | 1 - xarray/core/groupby.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index afa25325018..dbe4bc78a35 100644 --- a/setup.cfg +++ b/setup.cfg @@ -98,7 +98,6 @@ accel = scipy bottleneck numbagg - numpy_groupies flox parallel = diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index da86e21c244..4c2bd41112f 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -662,7 +662,7 @@ def _flox_reduce(self, dim, **kwargs): ) if self._bins is not None: - # bins provided to dask_groupby are at full precision + # bins provided to flox are at full precision # the bin edge labels have a default precision of 3 # reassign to fix that. new_coord = [ From 4dd9e661def401105589d42bfd14a7bc3e48eb3c Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 10 Apr 2022 08:27:09 -0600 Subject: [PATCH 112/138] Update envs --- ci/requirements/environment-windows.yml | 2 +- ci/requirements/environment.yml | 3 +-- ci/requirements/py39-all-but-dask.yml | 1 - 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index a31188fec5b..e3cf0691014 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -10,6 +10,7 @@ dependencies: - cftime - dask-core - distributed + - flox - fsspec!=2021.7.0 - h5netcdf - h5py @@ -44,4 +45,3 @@ dependencies: - zarr - pip: - numbagg - - git+https://github.com/dcherian/flox.git diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index 1ac4f87d34a..7b198c9f0ca 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -12,6 +12,7 @@ dependencies: - cftime - dask-core - distributed + - flox - fsspec!=2021.7.0 - h5netcdf - h5py @@ -25,7 +26,6 @@ dependencies: - numba - numexpr - numpy - - numpy_groupies - packaging - pandas - pint @@ -49,4 +49,3 @@ dependencies: - zarr - pip: - numbagg - - git+https://github.com/dcherian/flox.git diff --git a/ci/requirements/py39-all-but-dask.yml b/ci/requirements/py39-all-but-dask.yml index f05745ee1fa..e2488459e6b 100644 --- a/ci/requirements/py39-all-but-dask.yml +++ b/ci/requirements/py39-all-but-dask.yml @@ -24,7 +24,6 @@ dependencies: - netcdf4 - numba - numpy - - numpy_groupies - packaging - pandas - pint From 812ce333957224c8193a1ef53de8fc99ff600439 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sun, 10 Apr 2022 08:29:24 -0600 Subject: [PATCH 113/138] [skip-ci] Apply suggestions from code review Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> --- xarray/util/generate_reductions.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 0b51632343b..1d72af9bda8 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -55,6 +55,8 @@ def reduce( GROUPBY_PREAMBLE = """ class {obj}{cls}Reductions: + __slots__ = ("_obj",) + _obj: "{obj}" def reduce( @@ -303,6 +305,7 @@ def generate_code(self, method): extra_kwargs.append(f"numeric_only={method.numeric_only},") # numpy_groupies & flox do not support median + # https://github.com/ml31415/numpy-groupies/issues/43 if method.name == "median": indent = 12 else: From 158314abfcdfbfc2c69bc174f7ae1b6acb78f106 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 10 Apr 2022 14:31:00 +0000 Subject: [PATCH 114/138] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/util/generate_reductions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 1d72af9bda8..1e8ee4b83b0 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -56,7 +56,7 @@ def reduce( class {obj}{cls}Reductions: __slots__ = ("_obj",) - + _obj: "{obj}" def reduce( From 3580ae3ca1b7e93b9ec83636b7d9b5ab678e6bd9 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 10 Apr 2022 08:31:20 -0600 Subject: [PATCH 115/138] fix --- xarray/util/generate_reductions.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 1d72af9bda8..0d420511c44 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -55,8 +55,6 @@ def reduce( GROUPBY_PREAMBLE = """ class {obj}{cls}Reductions: - __slots__ = ("_obj",) - _obj: "{obj}" def reduce( From d613779646d8ec77f852eb0558fbdc3bb474874b Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 10 Apr 2022 08:41:09 -0600 Subject: [PATCH 116/138] fix --- xarray/core/groupby.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 4c2bd41112f..8a1530bad20 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -634,7 +634,8 @@ def _flox_reduce(self, dim, **kwargs): # TODO: handle bins=N in flox if self._bins is not None: - expected_groups = (self._bins,) + # TODO: fix this; When binning by time, self._bins is a DatetimeIndex + expected_groups = (np.array(self._bins),) isbin = (True,) # This is an annoying hack. Xarray returns np.nan # when there are no observations in a bin, instead of 0. From d0a412a0e7fdfdc2f8cc02dfcf6279ededfea586 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 13 Apr 2022 11:36:40 -0600 Subject: [PATCH 117/138] Update ci/requirements/environment-windows.yml --- ci/requirements/environment-windows.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index e3cf0691014..634140fe84b 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -23,7 +23,6 @@ dependencies: - netcdf4 - numba - numpy - - numpy_groupies - packaging - pandas - pint From 3a7052e66f3d287247759733a29a7118ea5c4f22 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 23 Apr 2022 18:47:30 -0600 Subject: [PATCH 118/138] Support numeric_only --- xarray/core/groupby.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 8a1530bad20..58987f47026 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -585,8 +585,13 @@ def _flox_reduce(self, dim, **kwargs): from .dataarray import DataArray from .dataset import Dataset - # TODO: fix this - kwargs.pop("numeric_only", None) + numeric_only = kwargs.pop("numeric_only", None) + drop_vars = [] + if numeric_only: + assert isinstance(self._obj, Dataset) + for name, var in self._obj.data_vars.items(): + if not (np.issubdtype(var.dtype, np.number) or (var.dtype == np.bool_)): + drop_vars.append(name) # weird backcompat # reducing along a unique indexed dimension with squeeze=True @@ -594,12 +599,10 @@ def _flox_reduce(self, dim, **kwargs): if ( dim is None or dim == self._group.name ) and self._group.name in self._obj.xindexes: - # TODO: switch to xindexes after we can use is_unique index = self._obj.indexes[self._group.name] if index.is_unique and self._squeeze: raise ValueError(f"cannot reduce over dimensions {self._group.name!r}") - # TODO: only do this for resample, not general groupers... # this creates a label DataArray since resample doesn't do that somehow if isinstance(self._group_indices[0], slice): repeats = [] @@ -654,7 +657,7 @@ def _flox_reduce(self, dim, **kwargs): isbin = False result = xarray_reduce( - self._original_obj, + self._original_obj.drop_vars(drop_vars), group, dim=dim, expected_groups=expected_groups, From eae37e2fd629abe2558f230b2f496b49cfd4c8a6 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 25 Apr 2022 20:45:44 -0600 Subject: [PATCH 119/138] Properly support numeric_only --- xarray/core/groupby.py | 56 +++++++++++++++++++++++++----------- xarray/tests/test_groupby.py | 17 ++++++++--- 2 files changed, 52 insertions(+), 21 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 58987f47026..3da8ed848d4 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -580,26 +580,31 @@ def _maybe_unstack(self, obj): return obj def _flox_reduce(self, dim, **kwargs): + """Adaptor function that translates our groupby API to that of flox.""" from flox.xarray import xarray_reduce from .dataarray import DataArray from .dataset import Dataset + obj = self._original_obj + numeric_only = kwargs.pop("numeric_only", None) - drop_vars = [] if numeric_only: - assert isinstance(self._obj, Dataset) - for name, var in self._obj.data_vars.items(): - if not (np.issubdtype(var.dtype, np.number) or (var.dtype == np.bool_)): - drop_vars.append(name) + non_numeric = { + name: var + for name, var in obj.data_vars.items() + if not (np.issubdtype(var.dtype, np.number) or (var.dtype == np.bool_)) + } + else: + non_numeric = {} # weird backcompat # reducing along a unique indexed dimension with squeeze=True # should raise an error if ( dim is None or dim == self._group.name - ) and self._group.name in self._obj.xindexes: - index = self._obj.indexes[self._group.name] + ) and self._group.name in obj.xindexes: + index = obj.indexes[self._group.name] if index.is_unique and self._squeeze: raise ValueError(f"cannot reduce over dimensions {self._group.name!r}") @@ -623,17 +628,23 @@ def _flox_reduce(self, dim, **kwargs): else: group = self._unstacked_group - # Do this so we raise the same error message whether flox is present or not. - # Better to control it here than in flox. + unindexed_dims = tuple() if isinstance(group, str): + if group in obj.dims and group not in obj._indexes and self._bins is None: + unindexed_dims = (group,) group = self._original_obj[group] - if dim not in (None, Ellipsis): - if isinstance(dim, str): - dim = (dim,) - if any( - d not in group.dims and d not in self._original_obj.dims for d in dim - ): - raise ValueError(f"cannot reduce over dimensions {dim}.") + + if isinstance(dim, str): + dim = (dim,) + elif dim is None: + dim = group.dims + elif dim is Ellipsis: + dim = tuple(self._original_obj.dims) + + # Do this so we raise the same error message whether flox is present or not. + # Better to control it here than in flox. + if any(d not in group.dims and d not in self._original_obj.dims for d in dim): + raise ValueError(f"cannot reduce over dimensions {dim}.") # TODO: handle bins=N in flox if self._bins is not None: @@ -657,7 +668,7 @@ def _flox_reduce(self, dim, **kwargs): isbin = False result = xarray_reduce( - self._original_obj.drop_vars(drop_vars), + self._original_obj.drop_vars(non_numeric), group, dim=dim, expected_groups=expected_groups, @@ -665,6 +676,17 @@ def _flox_reduce(self, dim, **kwargs): **kwargs, ) + # Ignore error when the groupby reduction is effectively + # a reduction of the underlying dataset + result = result.drop_vars(unindexed_dims, errors="ignore") + + # broadcast and restore non-numeric data variables (backcompat) + for name, var in non_numeric.items(): + if all(d not in var.dims for d in dim): + result[name] = var.variable.set_dims( + (group.name,) + var.dims, (result.sizes[group.name],) + var.shape + ) + if self._bins is not None: # bins provided to flox are at full precision # the bin edge labels have a default precision of 3 diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 0029d0401e2..8c745dc640d 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -25,7 +25,10 @@ @pytest.fixture def dataset(): ds = xr.Dataset( - {"foo": (("x", "y", "z"), np.random.randn(3, 4, 2))}, + { + "foo": (("x", "y", "z"), np.random.randn(3, 4, 2)), + "baz": ("x", ["e", "f", "g"]), + }, {"x": ["a", "b", "c"], "y": [1, 2, 3, 4], "z": [1, 2]}, ) ds["boo"] = (("z", "y"), [["f", "g", "h", "j"]] * 2) @@ -72,6 +75,15 @@ def test_multi_index_groupby_map(dataset) -> None: assert_equal(expected, actual) +def test_reduce_numeric_only(dataset) -> None: + gb = dataset.groupby("x", squeeze=False) + with xr.set_options(use_flox=False): + expected = gb.sum() + with xr.set_options(use_flox=True): + actual = gb.sum() + assert_identical(expected, actual) + + def test_multi_index_groupby_sum() -> None: # regression test for GH873 ds = xr.Dataset( @@ -1977,6 +1989,3 @@ def func(arg1, arg2, arg3=0.0): expected = xr.Dataset({"foo": ("time", [3.0, 3.0, 3.0]), "time": times}) actual = ds.resample(time="D").map(func, args=(1.0,), arg3=1.0) assert_identical(expected, actual) - - -# TODO: move other groupby tests from test_dataset and test_dataarray over here From 5583e342cc187d6f14bf43cc36c53d9916345e58 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 25 Apr 2022 20:50:03 -0600 Subject: [PATCH 120/138] Set default to "split-reduce" to reduce surprises --- xarray/core/groupby.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 3da8ed848d4..eae5edb5a65 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -588,6 +588,11 @@ def _flox_reduce(self, dim, **kwargs): obj = self._original_obj + # preserve current strategy (approximately) for dask groupby. + # We want to control the default anyway to prevent surprises + # if flox decides to change its default + kwargs.setdefault("method", "split-reduce") + numeric_only = kwargs.pop("numeric_only", None) if numeric_only: non_numeric = { From 2d1de0fcd08fe135bb721ee8382a0d54282529f2 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 2 May 2022 14:38:06 -0600 Subject: [PATCH 121/138] Add flox to min_all_deps --- ci/requirements/min-all-deps.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/requirements/min-all-deps.yml b/ci/requirements/min-all-deps.yml index 76e2b28093d..9e19f500860 100644 --- a/ci/requirements/min-all-deps.yml +++ b/ci/requirements/min-all-deps.yml @@ -18,6 +18,7 @@ dependencies: - coveralls - dask-core=2.30 - distributed=2.30 + - flox=0.5.0 - h5netcdf=0.8 - h5py=2.10 # hdf5 1.12 conflicts with h5py=2.10 From 7d9b4709353e1562b44cd299a5e8b62913bd9ea1 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 3 May 2022 08:50:22 -0600 Subject: [PATCH 122/138] Update ci/requirements/min-all-deps.yml --- ci/requirements/min-all-deps.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements/min-all-deps.yml b/ci/requirements/min-all-deps.yml index 9e19f500860..3afc0c1d434 100644 --- a/ci/requirements/min-all-deps.yml +++ b/ci/requirements/min-all-deps.yml @@ -18,7 +18,7 @@ dependencies: - coveralls - dask-core=2.30 - distributed=2.30 - - flox=0.5.0 + - flox=0.5 - h5netcdf=0.8 - h5py=2.10 # hdf5 1.12 conflicts with h5py=2.10 From 7dab7308d330e1ca833038f89479959fbff2426e Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 3 May 2022 09:13:24 -0600 Subject: [PATCH 123/138] [skip-ci] add whats-new --- doc/whats-new.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4882402073c..6dda6997ba3 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -107,6 +107,10 @@ Performance - GroupBy binary operations are now vectorized. Previously this involved looping over all groups. (:issue:`5804`,:pull:`6160`) By `Deepak Cherian `_. +- Substantially improved GroupBy operations using `flox `_. + This is auto-enabled when ``flox`` is installed. Use ``xr.set_options(use_flox=False)`` to use + the old algorithm. (:issue:`4473`, :issue:`4498`, :issue:`659`, :issue:`2237`, :pull:`271`). + By `Deepak Cherian `_ and `Anderson Banihirwe `_ Internal Changes ~~~~~~~~~~~~~~~~ From 6902de318e15f88d9dd33479a2e061ad318c3ef0 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 3 May 2022 09:18:25 -0600 Subject: [PATCH 124/138] Better defaults for resample --- xarray/core/groupby.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index eae5edb5a65..095124202c5 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -588,10 +588,16 @@ def _flox_reduce(self, dim, **kwargs): obj = self._original_obj + # TODO: could be better? + is_resample = isinstance(self._group_indices[0], slice) + # preserve current strategy (approximately) for dask groupby. # We want to control the default anyway to prevent surprises # if flox decides to change its default - kwargs.setdefault("method", "split-reduce") + if is_resample: + kwargs.setdefault("method", "cohorts") + else: + kwargs.setdefault("method", "split-reduce") numeric_only = kwargs.pop("numeric_only", None) if numeric_only: @@ -614,7 +620,7 @@ def _flox_reduce(self, dim, **kwargs): raise ValueError(f"cannot reduce over dimensions {self._group.name!r}") # this creates a label DataArray since resample doesn't do that somehow - if isinstance(self._group_indices[0], slice): + if is_resample: repeats = [] for slicer in self._group_indices: stop = ( From b2b3001255bf440e9f2907297d091ae4b02dbc28 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 3 May 2022 09:58:01 -0600 Subject: [PATCH 125/138] [skip-ci] Fix whats-new. --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6dda6997ba3..41315274234 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -110,7 +110,7 @@ Performance - Substantially improved GroupBy operations using `flox `_. This is auto-enabled when ``flox`` is installed. Use ``xr.set_options(use_flox=False)`` to use the old algorithm. (:issue:`4473`, :issue:`4498`, :issue:`659`, :issue:`2237`, :pull:`271`). - By `Deepak Cherian `_ and `Anderson Banihirwe `_ + By `Deepak Cherian `_ and `Anderson Banihirwe `_. Internal Changes ~~~~~~~~~~~~~~~~ From 36c206ede789c1567f6d4fd7baa0746c6877aae1 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 4 May 2022 09:15:40 -0600 Subject: [PATCH 126/138] Clean up resampling. Add Resample._flox_reduce. Change inheritance order to make things work. --- xarray/core/_reductions.py | 38 ------------------------------ xarray/core/groupby.py | 32 ++++--------------------- xarray/core/resample.py | 29 +++++++++++++++++++++-- xarray/util/generate_reductions.py | 9 +++++-- 4 files changed, 38 insertions(+), 70 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 7df2fc16746..08bf1c14907 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -3124,25 +3124,6 @@ def median( class DatasetResampleReductions: _obj: "Dataset" - def reduce( - self, - func: Callable[..., Any], - dim: Union[None, Hashable, Sequence[Hashable]] = None, - *, - axis: Union[None, int, Sequence[int]] = None, - keep_attrs: bool = None, - keepdims: bool = False, - **kwargs: Any, - ) -> "Dataset": - raise NotImplementedError() - - def _flox_reduce( - self, - dim: Union[None, Hashable, Sequence[Hashable]], - **kwargs, - ) -> "Dataset": - raise NotImplementedError() - def count( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, @@ -5372,25 +5353,6 @@ def median( class DataArrayResampleReductions: _obj: "DataArray" - def reduce( - self, - func: Callable[..., Any], - dim: Union[None, Hashable, Sequence[Hashable]] = None, - *, - axis: Union[None, int, Sequence[int]] = None, - keep_attrs: bool = None, - keepdims: bool = False, - **kwargs: Any, - ) -> "DataArray": - raise NotImplementedError() - - def _flox_reduce( - self, - dim: Union[None, Hashable, Sequence[Hashable]], - **kwargs, - ) -> "DataArray": - raise NotImplementedError() - def count( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 095124202c5..450db3542c3 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -583,21 +583,14 @@ def _flox_reduce(self, dim, **kwargs): """Adaptor function that translates our groupby API to that of flox.""" from flox.xarray import xarray_reduce - from .dataarray import DataArray from .dataset import Dataset obj = self._original_obj - # TODO: could be better? - is_resample = isinstance(self._group_indices[0], slice) - # preserve current strategy (approximately) for dask groupby. # We want to control the default anyway to prevent surprises # if flox decides to change its default - if is_resample: - kwargs.setdefault("method", "cohorts") - else: - kwargs.setdefault("method", "split-reduce") + kwargs.setdefault("method", "split-reduce") numeric_only = kwargs.pop("numeric_only", None) if numeric_only: @@ -619,21 +612,9 @@ def _flox_reduce(self, dim, **kwargs): if index.is_unique and self._squeeze: raise ValueError(f"cannot reduce over dimensions {self._group.name!r}") - # this creates a label DataArray since resample doesn't do that somehow - if is_resample: - repeats = [] - for slicer in self._group_indices: - stop = ( - slicer.stop - if slicer.stop is not None - else self._obj.sizes[self._group_dim] - ) - repeats.append(stop - slicer.start) - labels = np.repeat(self._unique_coord.data, repeats) - group = DataArray( - labels, dims=(self._group_dim,), name=self._unique_coord.name - ) - else: + # group is only passed by resample + group = kwargs.pop("group", None) + if group is None: if isinstance(self._unstacked_group, _DummyGroup): group = self._unstacked_group.name else: @@ -657,7 +638,6 @@ def _flox_reduce(self, dim, **kwargs): if any(d not in group.dims and d not in self._original_obj.dims for d in dim): raise ValueError(f"cannot reduce over dimensions {dim}.") - # TODO: handle bins=N in flox if self._bins is not None: # TODO: fix this; When binning by time, self._bins is a DatetimeIndex expected_groups = (np.array(self._bins),) @@ -712,10 +692,6 @@ def _flox_reduce(self, dim, **kwargs): if isinstance(self._obj, Dataset) and self._group_dim in self._obj.dims: result = result.transpose(self._group.name, ...) - if self._unique_coord.name == "__resample_dim__": - result = self._maybe_restore_empty_groups(result) - # TODO: make this cleaner; the renaming happens in DatasetResample.map - result = result.rename(dict(__resample_dim__=self._group_dim)) return result def fillna(self, value): diff --git a/xarray/core/resample.py b/xarray/core/resample.py index ed665ad4048..524f5257deb 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -1,6 +1,8 @@ import warnings from typing import Any, Callable, Hashable, Sequence, Union +import numpy as np + from ._reductions import DataArrayResampleReductions, DatasetResampleReductions from .groupby import DataArrayGroupByBase, DatasetGroupByBase @@ -21,6 +23,29 @@ class Resample: """ + def _flox_reduce(self, dim, **kwargs): + + from .dataarray import DataArray + + kwargs.setdefault("method", "cohorts") + + # now create a label DataArray since resample doesn't do that somehow + repeats = [] + for slicer in self._group_indices: + stop = ( + slicer.stop + if slicer.stop is not None + else self._obj.sizes[self._group_dim] + ) + repeats.append(stop - slicer.start) + labels = np.repeat(self._unique_coord.data, repeats) + group = DataArray(labels, dims=(self._group_dim,), name=self._unique_coord.name) + + result = super()._flox_reduce(dim=dim, group=group, **kwargs) + result = self._maybe_restore_empty_groups(result) + result = result.rename({"__resample_dim__": self._group_dim}) + return result + def _upsample(self, method, *args, **kwargs): """Dispatch function to call appropriate up-sampling methods on data. @@ -158,7 +183,7 @@ def _interpolate(self, kind="linear"): ) -class DataArrayResample(DataArrayGroupByBase, DataArrayResampleReductions, Resample): +class DataArrayResample(DataArrayResampleReductions, Resample, DataArrayGroupByBase): """DataArrayGroupBy object specialized to time resampling operations over a specified dimension """ @@ -249,7 +274,7 @@ def apply(self, func, args=(), shortcut=None, **kwargs): return self.map(func=func, shortcut=shortcut, args=args, **kwargs) -class DatasetResample(DatasetGroupByBase, DatasetResampleReductions, Resample): +class DatasetResample(DatasetResampleReductions, Resample, DatasetGroupByBase): """DatasetGroupBy object specialized to resampling a specified dimension""" def __init__(self, *args, dim=None, resample_dim=None, **kwargs): diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 0d420511c44..5502bd86032 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -76,6 +76,11 @@ def _flox_reduce( ) -> "{obj}": raise NotImplementedError()""" +RESAMPLE_PREAMBLE = ''' + +class {obj}{cls}Reductions: + _obj: "{obj}"''' + TEMPLATE_REDUCTION_SIGNATURE = ''' def {method}( self, @@ -437,7 +442,7 @@ class DataStructure: docref="resampling", docref_description="resampling operations", example_call_preamble='.resample(time="3M")', - definition_preamble=GROUPBY_PREAMBLE, + definition_preamble=RESAMPLE_PREAMBLE, ) DATASET_GROUPBY_GENERATOR = GroupByReductionGenerator( cls="GroupBy", @@ -455,7 +460,7 @@ class DataStructure: docref="resampling", docref_description="resampling operations", example_call_preamble='.resample(time="3M")', - definition_preamble=GROUPBY_PREAMBLE, + definition_preamble=RESAMPLE_PREAMBLE, ) From 7a58590561fef1fc1714c71443b61b4553bdfded Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Thu, 5 May 2022 20:02:30 +0200 Subject: [PATCH 127/138] Test adding back dummy methods. --- xarray/core/_reductions.py | 19 +++++++++++++++++++ xarray/util/generate_reductions.py | 24 ++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 08bf1c14907..d1beb19bd44 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -5353,6 +5353,25 @@ def median( class DataArrayResampleReductions: _obj: "DataArray" + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> "Dataset": + raise NotImplementedError() + + def _flox_reduce( + self, + dim: Union[None, Hashable, Sequence[Hashable]], + **kwargs, + ) -> "Dataset": + raise NotImplementedError() + def count( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 5502bd86032..b83e8530429 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -76,10 +76,29 @@ def _flox_reduce( ) -> "{obj}": raise NotImplementedError()""" -RESAMPLE_PREAMBLE = ''' +RESAMPLE_PREAMBLE = """ class {obj}{cls}Reductions: - _obj: "{obj}"''' + _obj: "{obj}" + + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> "{obj}": + raise NotImplementedError() + + def _flox_reduce( + self, + dim: Union[None, Hashable, Sequence[Hashable]], + **kwargs, + ) -> "{obj}": + raise NotImplementedError()""" TEMPLATE_REDUCTION_SIGNATURE = ''' def {method}( @@ -470,6 +489,7 @@ class DataStructure: p = Path(os.getcwd()) filepath = p.parent / "xarray" / "xarray" / "core" / "_reductions.py" + # filepath = p.parent / "core" / "_reductions.py" # Run from script location with open(filepath, mode="w", encoding="utf-8") as f: f.write(MODULE_PREAMBLE + "\n") for gen in [ From 687beacc4696008489b00a68335f674b1b0719ba Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Thu, 5 May 2022 20:19:53 +0200 Subject: [PATCH 128/138] Update _reductions.py --- xarray/core/_reductions.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index d1beb19bd44..4dbbb985241 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -3124,6 +3124,25 @@ def median( class DatasetResampleReductions: _obj: "Dataset" + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + *, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> "Dataset": + raise NotImplementedError() + + def _flox_reduce( + self, + dim: Union[None, Hashable, Sequence[Hashable]], + **kwargs, + ) -> "Dataset": + raise NotImplementedError() + def count( self, dim: Union[None, Hashable, Sequence[Hashable]] = None, From 4705b6cdc7e6d4c5f6e7c02317b605d172d45242 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Thu, 5 May 2022 20:46:49 +0200 Subject: [PATCH 129/138] Update _reductions.py --- xarray/core/_reductions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 4dbbb985241..7df2fc16746 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -5381,14 +5381,14 @@ def reduce( keep_attrs: bool = None, keepdims: bool = False, **kwargs: Any, - ) -> "Dataset": + ) -> "DataArray": raise NotImplementedError() def _flox_reduce( self, dim: Union[None, Hashable, Sequence[Hashable]], **kwargs, - ) -> "Dataset": + ) -> "DataArray": raise NotImplementedError() def count( From ac49bfa5307aebfb849597e5049e24a725df17ee Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Thu, 5 May 2022 20:55:45 +0200 Subject: [PATCH 130/138] Update resample.py --- xarray/core/resample.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/resample.py b/xarray/core/resample.py index 524f5257deb..813f6380d4d 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -183,7 +183,7 @@ def _interpolate(self, kind="linear"): ) -class DataArrayResample(DataArrayResampleReductions, Resample, DataArrayGroupByBase): +class DataArrayResample(DataArrayGroupByBase, DataArrayResampleReductions, Resample): """DataArrayGroupBy object specialized to time resampling operations over a specified dimension """ @@ -274,7 +274,7 @@ def apply(self, func, args=(), shortcut=None, **kwargs): return self.map(func=func, shortcut=shortcut, args=args, **kwargs) -class DatasetResample(DatasetResampleReductions, Resample, DatasetGroupByBase): +class DatasetResample(DatasetGroupByBase, DatasetResampleReductions, Resample): """DatasetGroupBy object specialized to resampling a specified dimension""" def __init__(self, *args, dim=None, resample_dim=None, **kwargs): From 444feee5c0176390f4e0bf95a32decf179e5d9f2 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Thu, 5 May 2022 22:32:19 +0200 Subject: [PATCH 131/138] Try subclassing to ResampleBase-classes Resample assumes having self._group_dim but that was never defined earlier, mypy should complain about this if the class was typed. --- xarray/core/resample.py | 336 ++++++++++++++++++++-------------------- 1 file changed, 170 insertions(+), 166 deletions(-) diff --git a/xarray/core/resample.py b/xarray/core/resample.py index 813f6380d4d..c6264a119a9 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -8,182 +8,186 @@ RESAMPLE_DIM = "__resample_dim__" +_resample_classes = [None, None] +for i, GroupByBase in enumerate((DataArrayGroupByBase, DatasetGroupByBase)): + class _Resample(GroupByBase): + """An object that extends the `GroupBy` object with additional logic + for handling specialized re-sampling operations. -class Resample: - """An object that extends the `GroupBy` object with additional logic - for handling specialized re-sampling operations. - - You should create a `Resample` object by using the `DataArray.resample` or - `Dataset.resample` methods. The dimension along re-sampling - - See Also - -------- - DataArray.resample - Dataset.resample - - """ - - def _flox_reduce(self, dim, **kwargs): - - from .dataarray import DataArray - - kwargs.setdefault("method", "cohorts") - - # now create a label DataArray since resample doesn't do that somehow - repeats = [] - for slicer in self._group_indices: - stop = ( - slicer.stop - if slicer.stop is not None - else self._obj.sizes[self._group_dim] - ) - repeats.append(stop - slicer.start) - labels = np.repeat(self._unique_coord.data, repeats) - group = DataArray(labels, dims=(self._group_dim,), name=self._unique_coord.name) - - result = super()._flox_reduce(dim=dim, group=group, **kwargs) - result = self._maybe_restore_empty_groups(result) - result = result.rename({"__resample_dim__": self._group_dim}) - return result - - def _upsample(self, method, *args, **kwargs): - """Dispatch function to call appropriate up-sampling methods on - data. - - This method should not be called directly; instead, use one of the - wrapper functions supplied by `Resample`. - - Parameters - ---------- - method : {"asfreq", "pad", "ffill", "backfill", "bfill", "nearest", \ - "interpolate"} - Method to use for up-sampling + You should create a `Resample` object by using the `DataArray.resample` or + `Dataset.resample` methods. The dimension along re-sampling See Also -------- - Resample.asfreq - Resample.pad - Resample.backfill - Resample.interpolate + DataArray.resample + Dataset.resample """ - upsampled_index = self._full_index - - # Drop non-dimension coordinates along the resampled dimension - for k, v in self._obj.coords.items(): - if k == self._dim: - continue - if self._dim in v.dims: - self._obj = self._obj.drop_vars(k) - - if method == "asfreq": - return self.mean(self._dim) - - elif method in ["pad", "ffill", "backfill", "bfill", "nearest"]: - kwargs = kwargs.copy() - kwargs.update(**{self._dim: upsampled_index}) - return self._obj.reindex(method=method, *args, **kwargs) - - elif method == "interpolate": - return self._interpolate(*args, **kwargs) - - else: - raise ValueError( - 'Specified method was "{}" but must be one of' - '"asfreq", "ffill", "bfill", or "interpolate"'.format(method) + def _flox_reduce(self, dim, **kwargs): + + from .dataarray import DataArray + + kwargs.setdefault("method", "cohorts") + + # now create a label DataArray since resample doesn't do that somehow + repeats = [] + for slicer in self._group_indices: + stop = ( + slicer.stop + if slicer.stop is not None + else self._obj.sizes[self._group_dim] + ) + repeats.append(stop - slicer.start) + labels = np.repeat(self._unique_coord.data, repeats) + group = DataArray(labels, dims=(self._group_dim,), name=self._unique_coord.name) + + result = super()._flox_reduce(dim=dim, group=group, **kwargs) + result = self._maybe_restore_empty_groups(result) + result = result.rename({"__resample_dim__": self._group_dim}) + return result + + def _upsample(self, method, *args, **kwargs): + """Dispatch function to call appropriate up-sampling methods on + data. + + This method should not be called directly; instead, use one of the + wrapper functions supplied by `Resample`. + + Parameters + ---------- + method : {"asfreq", "pad", "ffill", "backfill", "bfill", "nearest", \ + "interpolate"} + Method to use for up-sampling + + See Also + -------- + Resample.asfreq + Resample.pad + Resample.backfill + Resample.interpolate + + """ + + upsampled_index = self._full_index + + # Drop non-dimension coordinates along the resampled dimension + for k, v in self._obj.coords.items(): + if k == self._dim: + continue + if self._dim in v.dims: + self._obj = self._obj.drop_vars(k) + + if method == "asfreq": + return self.mean(self._dim) + + elif method in ["pad", "ffill", "backfill", "bfill", "nearest"]: + kwargs = kwargs.copy() + kwargs.update(**{self._dim: upsampled_index}) + return self._obj.reindex(method=method, *args, **kwargs) + + elif method == "interpolate": + return self._interpolate(*args, **kwargs) + + else: + raise ValueError( + 'Specified method was "{}" but must be one of' + '"asfreq", "ffill", "bfill", or "interpolate"'.format(method) + ) + + def asfreq(self): + """Return values of original object at the new up-sampling frequency; + essentially a re-index with new times set to NaN. + """ + return self._upsample("asfreq") + + def pad(self, tolerance=None): + """Forward fill new values at up-sampled frequency. + + Parameters + ---------- + tolerance : optional + Maximum distance between original and new labels to limit + the up-sampling method. + Up-sampled data with indices that satisfy the equation + ``abs(index[indexer] - target) <= tolerance`` are filled by + new values. Data with indices that are outside the given + tolerance are filled with ``NaN`` s + """ + return self._upsample("pad", tolerance=tolerance) + + ffill = pad + + def backfill(self, tolerance=None): + """Backward fill new values at up-sampled frequency. + + Parameters + ---------- + tolerance : optional + Maximum distance between original and new labels to limit + the up-sampling method. + Up-sampled data with indices that satisfy the equation + ``abs(index[indexer] - target) <= tolerance`` are filled by + new values. Data with indices that are outside the given + tolerance are filled with ``NaN`` s + """ + return self._upsample("backfill", tolerance=tolerance) + + bfill = backfill + + def nearest(self, tolerance=None): + """Take new values from nearest original coordinate to up-sampled + frequency coordinates. + + Parameters + ---------- + tolerance : optional + Maximum distance between original and new labels to limit + the up-sampling method. + Up-sampled data with indices that satisfy the equation + ``abs(index[indexer] - target) <= tolerance`` are filled by + new values. Data with indices that are outside the given + tolerance are filled with ``NaN`` s + """ + return self._upsample("nearest", tolerance=tolerance) + + def interpolate(self, kind="linear"): + """Interpolate up-sampled data using the original data + as knots. + + Parameters + ---------- + kind : {"linear", "nearest", "zero", "slinear", \ + "quadratic", "cubic"}, default: "linear" + Interpolation scheme to use + + See Also + -------- + scipy.interpolate.interp1d + + """ + return self._interpolate(kind=kind) + + def _interpolate(self, kind="linear"): + """Apply scipy.interpolate.interp1d along resampling dimension.""" + # drop any existing non-dimension coordinates along the resampling + # dimension + dummy = self._obj.copy() + for k, v in self._obj.coords.items(): + if k != self._dim and self._dim in v.dims: + dummy = dummy.drop_vars(k) + return dummy.interp( + assume_sorted=True, + method=kind, + kwargs={"bounds_error": False}, + **{self._dim: self._full_index}, ) - def asfreq(self): - """Return values of original object at the new up-sampling frequency; - essentially a re-index with new times set to NaN. - """ - return self._upsample("asfreq") - - def pad(self, tolerance=None): - """Forward fill new values at up-sampled frequency. - - Parameters - ---------- - tolerance : optional - Maximum distance between original and new labels to limit - the up-sampling method. - Up-sampled data with indices that satisfy the equation - ``abs(index[indexer] - target) <= tolerance`` are filled by - new values. Data with indices that are outside the given - tolerance are filled with ``NaN`` s - """ - return self._upsample("pad", tolerance=tolerance) - - ffill = pad - - def backfill(self, tolerance=None): - """Backward fill new values at up-sampled frequency. - - Parameters - ---------- - tolerance : optional - Maximum distance between original and new labels to limit - the up-sampling method. - Up-sampled data with indices that satisfy the equation - ``abs(index[indexer] - target) <= tolerance`` are filled by - new values. Data with indices that are outside the given - tolerance are filled with ``NaN`` s - """ - return self._upsample("backfill", tolerance=tolerance) - - bfill = backfill - - def nearest(self, tolerance=None): - """Take new values from nearest original coordinate to up-sampled - frequency coordinates. - - Parameters - ---------- - tolerance : optional - Maximum distance between original and new labels to limit - the up-sampling method. - Up-sampled data with indices that satisfy the equation - ``abs(index[indexer] - target) <= tolerance`` are filled by - new values. Data with indices that are outside the given - tolerance are filled with ``NaN`` s - """ - return self._upsample("nearest", tolerance=tolerance) - - def interpolate(self, kind="linear"): - """Interpolate up-sampled data using the original data - as knots. - - Parameters - ---------- - kind : {"linear", "nearest", "zero", "slinear", \ - "quadratic", "cubic"}, default: "linear" - Interpolation scheme to use - - See Also - -------- - scipy.interpolate.interp1d - - """ - return self._interpolate(kind=kind) - - def _interpolate(self, kind="linear"): - """Apply scipy.interpolate.interp1d along resampling dimension.""" - # drop any existing non-dimension coordinates along the resampling - # dimension - dummy = self._obj.copy() - for k, v in self._obj.coords.items(): - if k != self._dim and self._dim in v.dims: - dummy = dummy.drop_vars(k) - return dummy.interp( - assume_sorted=True, - method=kind, - kwargs={"bounds_error": False}, - **{self._dim: self._full_index}, - ) + _resample_classes[i] = _Resample +DataArrayResampleBase, DatasetResampleBase = _resample_classes -class DataArrayResample(DataArrayGroupByBase, DataArrayResampleReductions, Resample): +class DataArrayResample(DataArrayResampleBase, DataArrayResampleReductions): """DataArrayGroupBy object specialized to time resampling operations over a specified dimension """ @@ -274,7 +278,7 @@ def apply(self, func, args=(), shortcut=None, **kwargs): return self.map(func=func, shortcut=shortcut, args=args, **kwargs) -class DatasetResample(DatasetGroupByBase, DatasetResampleReductions, Resample): +class DatasetResample(DatasetResampleBase, DatasetResampleReductions): """DatasetGroupBy object specialized to resampling a specified dimension""" def __init__(self, *args, dim=None, resample_dim=None, **kwargs): From 4f7ef6d1c94fb431b335d07e6cfd092d3b1a9957 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 5 May 2022 20:36:23 +0000 Subject: [PATCH 132/138] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/resample.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/xarray/core/resample.py b/xarray/core/resample.py index c6264a119a9..61fce2e19cd 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -10,6 +10,7 @@ _resample_classes = [None, None] for i, GroupByBase in enumerate((DataArrayGroupByBase, DatasetGroupByBase)): + class _Resample(GroupByBase): """An object that extends the `GroupBy` object with additional logic for handling specialized re-sampling operations. @@ -40,7 +41,9 @@ def _flox_reduce(self, dim, **kwargs): ) repeats.append(stop - slicer.start) labels = np.repeat(self._unique_coord.data, repeats) - group = DataArray(labels, dims=(self._group_dim,), name=self._unique_coord.name) + group = DataArray( + labels, dims=(self._group_dim,), name=self._unique_coord.name + ) result = super()._flox_reduce(dim=dim, group=group, **kwargs) result = self._maybe_restore_empty_groups(result) @@ -187,6 +190,7 @@ def _interpolate(self, kind="linear"): DataArrayResampleBase, DatasetResampleBase = _resample_classes + class DataArrayResample(DataArrayResampleBase, DataArrayResampleReductions): """DataArrayGroupBy object specialized to time resampling operations over a specified dimension From 32828090d0c031e3ad0b13ff7e04832f61265749 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Thu, 5 May 2022 23:09:01 +0200 Subject: [PATCH 133/138] Copy/paste instead of a for loop Hopefully mypy will be satisfied --- xarray/core/resample.py | 505 +++++++++++++++++++++++++++------------- 1 file changed, 337 insertions(+), 168 deletions(-) diff --git a/xarray/core/resample.py b/xarray/core/resample.py index c6264a119a9..ab918a496f3 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -8,184 +8,353 @@ RESAMPLE_DIM = "__resample_dim__" -_resample_classes = [None, None] -for i, GroupByBase in enumerate((DataArrayGroupByBase, DatasetGroupByBase)): - class _Resample(GroupByBase): - """An object that extends the `GroupBy` object with additional logic - for handling specialized re-sampling operations. - You should create a `Resample` object by using the `DataArray.resample` or - `Dataset.resample` methods. The dimension along re-sampling +class DataArrayResampleNase(DataArrayGroupByBase): + """An object that extends the `GroupBy` object with additional logic + for handling specialized re-sampling operations. + + You should create a `Resample` object by using the `DataArray.resample` or + `Dataset.resample` methods. The dimension along re-sampling + + See Also + -------- + DataArray.resample + Dataset.resample + + """ + + def _flox_reduce(self, dim, **kwargs): + + from .dataarray import DataArray + + kwargs.setdefault("method", "cohorts") + + # now create a label DataArray since resample doesn't do that somehow + repeats = [] + for slicer in self._group_indices: + stop = ( + slicer.stop + if slicer.stop is not None + else self._obj.sizes[self._group_dim] + ) + repeats.append(stop - slicer.start) + labels = np.repeat(self._unique_coord.data, repeats) + group = DataArray(labels, dims=(self._group_dim,), name=self._unique_coord.name) + + result = super()._flox_reduce(dim=dim, group=group, **kwargs) + result = self._maybe_restore_empty_groups(result) + result = result.rename({RESAMPLE_DIM: self._group_dim}) + return result + + def _upsample(self, method, *args, **kwargs): + """Dispatch function to call appropriate up-sampling methods on + data. + + This method should not be called directly; instead, use one of the + wrapper functions supplied by `Resample`. + + Parameters + ---------- + method : {"asfreq", "pad", "ffill", "backfill", "bfill", "nearest", \ + "interpolate"} + Method to use for up-sampling + + See Also + -------- + Resample.asfreq + Resample.pad + Resample.backfill + Resample.interpolate + + """ + + upsampled_index = self._full_index + + # Drop non-dimension coordinates along the resampled dimension + for k, v in self._obj.coords.items(): + if k == self._dim: + continue + if self._dim in v.dims: + self._obj = self._obj.drop_vars(k) + + if method == "asfreq": + return self.mean(self._dim) + + elif method in ["pad", "ffill", "backfill", "bfill", "nearest"]: + kwargs = kwargs.copy() + kwargs.update(**{self._dim: upsampled_index}) + return self._obj.reindex(method=method, *args, **kwargs) + + elif method == "interpolate": + return self._interpolate(*args, **kwargs) + + else: + raise ValueError( + 'Specified method was "{}" but must be one of' + '"asfreq", "ffill", "bfill", or "interpolate"'.format(method) + ) + + def asfreq(self): + """Return values of original object at the new up-sampling frequency; + essentially a re-index with new times set to NaN. + """ + return self._upsample("asfreq") + + def pad(self, tolerance=None): + """Forward fill new values at up-sampled frequency. + + Parameters + ---------- + tolerance : optional + Maximum distance between original and new labels to limit + the up-sampling method. + Up-sampled data with indices that satisfy the equation + ``abs(index[indexer] - target) <= tolerance`` are filled by + new values. Data with indices that are outside the given + tolerance are filled with ``NaN`` s + """ + return self._upsample("pad", tolerance=tolerance) + + ffill = pad + + def backfill(self, tolerance=None): + """Backward fill new values at up-sampled frequency. + + Parameters + ---------- + tolerance : optional + Maximum distance between original and new labels to limit + the up-sampling method. + Up-sampled data with indices that satisfy the equation + ``abs(index[indexer] - target) <= tolerance`` are filled by + new values. Data with indices that are outside the given + tolerance are filled with ``NaN`` s + """ + return self._upsample("backfill", tolerance=tolerance) + + bfill = backfill + + def nearest(self, tolerance=None): + """Take new values from nearest original coordinate to up-sampled + frequency coordinates. + + Parameters + ---------- + tolerance : optional + Maximum distance between original and new labels to limit + the up-sampling method. + Up-sampled data with indices that satisfy the equation + ``abs(index[indexer] - target) <= tolerance`` are filled by + new values. Data with indices that are outside the given + tolerance are filled with ``NaN`` s + """ + return self._upsample("nearest", tolerance=tolerance) + + def interpolate(self, kind="linear"): + """Interpolate up-sampled data using the original data + as knots. + + Parameters + ---------- + kind : {"linear", "nearest", "zero", "slinear", \ + "quadratic", "cubic"}, default: "linear" + Interpolation scheme to use + + See Also + -------- + scipy.interpolate.interp1d + + """ + return self._interpolate(kind=kind) + + def _interpolate(self, kind="linear"): + """Apply scipy.interpolate.interp1d along resampling dimension.""" + # drop any existing non-dimension coordinates along the resampling + # dimension + dummy = self._obj.copy() + for k, v in self._obj.coords.items(): + if k != self._dim and self._dim in v.dims: + dummy = dummy.drop_vars(k) + return dummy.interp( + assume_sorted=True, + method=kind, + kwargs={"bounds_error": False}, + **{self._dim: self._full_index}, + ) + +# Copy/pasted from above to make mypy happy: +class DatasetResampleNase(DatasetGroupByBase): + """An object that extends the `GroupBy` object with additional logic + for handling specialized re-sampling operations. + + You should create a `Resample` object by using the `DataArray.resample` or + `Dataset.resample` methods. The dimension along re-sampling + + See Also + -------- + DataArray.resample + Dataset.resample + + """ + + def _flox_reduce(self, dim, **kwargs): + + from .dataarray import DataArray + + kwargs.setdefault("method", "cohorts") + + # now create a label DataArray since resample doesn't do that somehow + repeats = [] + for slicer in self._group_indices: + stop = ( + slicer.stop + if slicer.stop is not None + else self._obj.sizes[self._group_dim] + ) + repeats.append(stop - slicer.start) + labels = np.repeat(self._unique_coord.data, repeats) + group = DataArray(labels, dims=(self._group_dim,), name=self._unique_coord.name) + + result = super()._flox_reduce(dim=dim, group=group, **kwargs) + result = self._maybe_restore_empty_groups(result) + result = result.rename({RESAMPLE_DIM: self._group_dim}) + return result + + def _upsample(self, method, *args, **kwargs): + """Dispatch function to call appropriate up-sampling methods on + data. + + This method should not be called directly; instead, use one of the + wrapper functions supplied by `Resample`. + + Parameters + ---------- + method : {"asfreq", "pad", "ffill", "backfill", "bfill", "nearest", \ + "interpolate"} + Method to use for up-sampling See Also -------- - DataArray.resample - Dataset.resample + Resample.asfreq + Resample.pad + Resample.backfill + Resample.interpolate """ - def _flox_reduce(self, dim, **kwargs): - - from .dataarray import DataArray - - kwargs.setdefault("method", "cohorts") - - # now create a label DataArray since resample doesn't do that somehow - repeats = [] - for slicer in self._group_indices: - stop = ( - slicer.stop - if slicer.stop is not None - else self._obj.sizes[self._group_dim] - ) - repeats.append(stop - slicer.start) - labels = np.repeat(self._unique_coord.data, repeats) - group = DataArray(labels, dims=(self._group_dim,), name=self._unique_coord.name) - - result = super()._flox_reduce(dim=dim, group=group, **kwargs) - result = self._maybe_restore_empty_groups(result) - result = result.rename({"__resample_dim__": self._group_dim}) - return result - - def _upsample(self, method, *args, **kwargs): - """Dispatch function to call appropriate up-sampling methods on - data. - - This method should not be called directly; instead, use one of the - wrapper functions supplied by `Resample`. - - Parameters - ---------- - method : {"asfreq", "pad", "ffill", "backfill", "bfill", "nearest", \ - "interpolate"} - Method to use for up-sampling - - See Also - -------- - Resample.asfreq - Resample.pad - Resample.backfill - Resample.interpolate - - """ - - upsampled_index = self._full_index - - # Drop non-dimension coordinates along the resampled dimension - for k, v in self._obj.coords.items(): - if k == self._dim: - continue - if self._dim in v.dims: - self._obj = self._obj.drop_vars(k) - - if method == "asfreq": - return self.mean(self._dim) - - elif method in ["pad", "ffill", "backfill", "bfill", "nearest"]: - kwargs = kwargs.copy() - kwargs.update(**{self._dim: upsampled_index}) - return self._obj.reindex(method=method, *args, **kwargs) - - elif method == "interpolate": - return self._interpolate(*args, **kwargs) - - else: - raise ValueError( - 'Specified method was "{}" but must be one of' - '"asfreq", "ffill", "bfill", or "interpolate"'.format(method) - ) - - def asfreq(self): - """Return values of original object at the new up-sampling frequency; - essentially a re-index with new times set to NaN. - """ - return self._upsample("asfreq") - - def pad(self, tolerance=None): - """Forward fill new values at up-sampled frequency. - - Parameters - ---------- - tolerance : optional - Maximum distance between original and new labels to limit - the up-sampling method. - Up-sampled data with indices that satisfy the equation - ``abs(index[indexer] - target) <= tolerance`` are filled by - new values. Data with indices that are outside the given - tolerance are filled with ``NaN`` s - """ - return self._upsample("pad", tolerance=tolerance) - - ffill = pad - - def backfill(self, tolerance=None): - """Backward fill new values at up-sampled frequency. - - Parameters - ---------- - tolerance : optional - Maximum distance between original and new labels to limit - the up-sampling method. - Up-sampled data with indices that satisfy the equation - ``abs(index[indexer] - target) <= tolerance`` are filled by - new values. Data with indices that are outside the given - tolerance are filled with ``NaN`` s - """ - return self._upsample("backfill", tolerance=tolerance) - - bfill = backfill - - def nearest(self, tolerance=None): - """Take new values from nearest original coordinate to up-sampled - frequency coordinates. - - Parameters - ---------- - tolerance : optional - Maximum distance between original and new labels to limit - the up-sampling method. - Up-sampled data with indices that satisfy the equation - ``abs(index[indexer] - target) <= tolerance`` are filled by - new values. Data with indices that are outside the given - tolerance are filled with ``NaN`` s - """ - return self._upsample("nearest", tolerance=tolerance) - - def interpolate(self, kind="linear"): - """Interpolate up-sampled data using the original data - as knots. - - Parameters - ---------- - kind : {"linear", "nearest", "zero", "slinear", \ - "quadratic", "cubic"}, default: "linear" - Interpolation scheme to use - - See Also - -------- - scipy.interpolate.interp1d - - """ - return self._interpolate(kind=kind) - - def _interpolate(self, kind="linear"): - """Apply scipy.interpolate.interp1d along resampling dimension.""" - # drop any existing non-dimension coordinates along the resampling - # dimension - dummy = self._obj.copy() - for k, v in self._obj.coords.items(): - if k != self._dim and self._dim in v.dims: - dummy = dummy.drop_vars(k) - return dummy.interp( - assume_sorted=True, - method=kind, - kwargs={"bounds_error": False}, - **{self._dim: self._full_index}, + upsampled_index = self._full_index + + # Drop non-dimension coordinates along the resampled dimension + for k, v in self._obj.coords.items(): + if k == self._dim: + continue + if self._dim in v.dims: + self._obj = self._obj.drop_vars(k) + + if method == "asfreq": + return self.mean(self._dim) + + elif method in ["pad", "ffill", "backfill", "bfill", "nearest"]: + kwargs = kwargs.copy() + kwargs.update(**{self._dim: upsampled_index}) + return self._obj.reindex(method=method, *args, **kwargs) + + elif method == "interpolate": + return self._interpolate(*args, **kwargs) + + else: + raise ValueError( + 'Specified method was "{}" but must be one of' + '"asfreq", "ffill", "bfill", or "interpolate"'.format(method) ) - _resample_classes[i] = _Resample + def asfreq(self): + """Return values of original object at the new up-sampling frequency; + essentially a re-index with new times set to NaN. + """ + return self._upsample("asfreq") -DataArrayResampleBase, DatasetResampleBase = _resample_classes + def pad(self, tolerance=None): + """Forward fill new values at up-sampled frequency. + + Parameters + ---------- + tolerance : optional + Maximum distance between original and new labels to limit + the up-sampling method. + Up-sampled data with indices that satisfy the equation + ``abs(index[indexer] - target) <= tolerance`` are filled by + new values. Data with indices that are outside the given + tolerance are filled with ``NaN`` s + """ + return self._upsample("pad", tolerance=tolerance) + + ffill = pad + + def backfill(self, tolerance=None): + """Backward fill new values at up-sampled frequency. + + Parameters + ---------- + tolerance : optional + Maximum distance between original and new labels to limit + the up-sampling method. + Up-sampled data with indices that satisfy the equation + ``abs(index[indexer] - target) <= tolerance`` are filled by + new values. Data with indices that are outside the given + tolerance are filled with ``NaN`` s + """ + return self._upsample("backfill", tolerance=tolerance) + + bfill = backfill + + def nearest(self, tolerance=None): + """Take new values from nearest original coordinate to up-sampled + frequency coordinates. + + Parameters + ---------- + tolerance : optional + Maximum distance between original and new labels to limit + the up-sampling method. + Up-sampled data with indices that satisfy the equation + ``abs(index[indexer] - target) <= tolerance`` are filled by + new values. Data with indices that are outside the given + tolerance are filled with ``NaN`` s + """ + return self._upsample("nearest", tolerance=tolerance) + + def interpolate(self, kind="linear"): + """Interpolate up-sampled data using the original data + as knots. + + Parameters + ---------- + kind : {"linear", "nearest", "zero", "slinear", \ + "quadratic", "cubic"}, default: "linear" + Interpolation scheme to use + + See Also + -------- + scipy.interpolate.interp1d + + """ + return self._interpolate(kind=kind) + + def _interpolate(self, kind="linear"): + """Apply scipy.interpolate.interp1d along resampling dimension.""" + # drop any existing non-dimension coordinates along the resampling + # dimension + dummy = self._obj.copy() + for k, v in self._obj.coords.items(): + if k != self._dim and self._dim in v.dims: + dummy = dummy.drop_vars(k) + return dummy.interp( + assume_sorted=True, + method=kind, + kwargs={"bounds_error": False}, + **{self._dim: self._full_index}, + ) class DataArrayResample(DataArrayResampleBase, DataArrayResampleReductions): """DataArrayGroupBy object specialized to time resampling operations over a From 4a384fd06786d4dce52538a991d609b95670753f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 5 May 2022 21:13:23 +0000 Subject: [PATCH 134/138] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/resample.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/core/resample.py b/xarray/core/resample.py index 31fb89dcc96..a6f7500f09d 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -182,6 +182,7 @@ def _interpolate(self, kind="linear"): **{self._dim: self._full_index}, ) + # Copy/pasted from above to make mypy happy: class DatasetResampleNase(DatasetGroupByBase): """An object that extends the `GroupBy` object with additional logic From c38ef78687a63ad6d7211c0a9a70f1fdd25ab528 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Thu, 5 May 2022 23:21:09 +0200 Subject: [PATCH 135/138] Update resample.py --- xarray/core/resample.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/resample.py b/xarray/core/resample.py index 31fb89dcc96..bb1db5300ac 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -9,7 +9,7 @@ RESAMPLE_DIM = "__resample_dim__" -class DataArrayResampleNase(DataArrayGroupByBase): +class DataArrayResampleBase(DataArrayGroupByBase): """An object that extends the `GroupBy` object with additional logic for handling specialized re-sampling operations. @@ -183,7 +183,7 @@ def _interpolate(self, kind="linear"): ) # Copy/pasted from above to make mypy happy: -class DatasetResampleNase(DatasetGroupByBase): +class DatasetResampleBase(DatasetGroupByBase): """An object that extends the `GroupBy` object with additional logic for handling specialized re-sampling operations. From 67cda8a552c647795a5480ced99d4f8f14cb3518 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Mon, 9 May 2022 22:16:50 +0200 Subject: [PATCH 136/138] Ignore typing when flox is not available --- xarray/core/_reductions.py | 2 +- xarray/util/generate_reductions.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 7df2fc16746..d782363760a 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -14,7 +14,7 @@ try: import flox except ImportError: - flox = None + flox = None # type: ignore class DatasetReductions: diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index b83e8530429..96b91c16906 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -33,7 +33,7 @@ try: import flox except ImportError: - flox = None''' + flox = None # type: ignore''' DEFAULT_PREAMBLE = """ From fd20ba2d7447331bed47030fa80bf20735835a1f Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 10 May 2022 11:54:00 -0600 Subject: [PATCH 137/138] Update whats-new --- doc/whats-new.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1d9cf2a7c00..2fb92a5e468 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -137,7 +137,8 @@ Performance - Substantially improved GroupBy operations using `flox `_. This is auto-enabled when ``flox`` is installed. Use ``xr.set_options(use_flox=False)`` to use the old algorithm. (:issue:`4473`, :issue:`4498`, :issue:`659`, :issue:`2237`, :pull:`271`). - By `Deepak Cherian `_ and `Anderson Banihirwe `_. + By `Deepak Cherian `_,`Anderson Banihirwe `_, + `Jimmy Westling `_. Internal Changes ~~~~~~~~~~~~~~~~ From ad33d85123dc28a374b1dfee41a2a35aba4a654c Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 10 May 2022 13:20:02 -0600 Subject: [PATCH 138/138] Deduplicate --- xarray/core/resample.py | 183 +--------------------------------------- 1 file changed, 4 insertions(+), 179 deletions(-) diff --git a/xarray/core/resample.py b/xarray/core/resample.py index cb11387e8cb..bcc4bfb90cd 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -4,12 +4,12 @@ import numpy as np from ._reductions import DataArrayResampleReductions, DatasetResampleReductions -from .groupby import DataArrayGroupByBase, DatasetGroupByBase +from .groupby import DataArrayGroupByBase, DatasetGroupByBase, GroupBy RESAMPLE_DIM = "__resample_dim__" -class DataArrayResampleBase(DataArrayGroupByBase): +class Resample(GroupBy): """An object that extends the `GroupBy` object with additional logic for handling specialized re-sampling operations. @@ -183,182 +183,7 @@ def _interpolate(self, kind="linear"): ) -# Copy/pasted from above to make mypy happy: -class DatasetResampleBase(DatasetGroupByBase): - """An object that extends the `GroupBy` object with additional logic - for handling specialized re-sampling operations. - - You should create a `Resample` object by using the `DataArray.resample` or - `Dataset.resample` methods. The dimension along re-sampling - - See Also - -------- - DataArray.resample - Dataset.resample - - """ - - def _flox_reduce(self, dim, **kwargs): - - from .dataarray import DataArray - - kwargs.setdefault("method", "cohorts") - - # now create a label DataArray since resample doesn't do that somehow - repeats = [] - for slicer in self._group_indices: - stop = ( - slicer.stop - if slicer.stop is not None - else self._obj.sizes[self._group_dim] - ) - repeats.append(stop - slicer.start) - labels = np.repeat(self._unique_coord.data, repeats) - group = DataArray(labels, dims=(self._group_dim,), name=self._unique_coord.name) - - result = super()._flox_reduce(dim=dim, group=group, **kwargs) - result = self._maybe_restore_empty_groups(result) - result = result.rename({RESAMPLE_DIM: self._group_dim}) - return result - - def _upsample(self, method, *args, **kwargs): - """Dispatch function to call appropriate up-sampling methods on - data. - - This method should not be called directly; instead, use one of the - wrapper functions supplied by `Resample`. - - Parameters - ---------- - method : {"asfreq", "pad", "ffill", "backfill", "bfill", "nearest", \ - "interpolate"} - Method to use for up-sampling - - See Also - -------- - Resample.asfreq - Resample.pad - Resample.backfill - Resample.interpolate - - """ - - upsampled_index = self._full_index - - # Drop non-dimension coordinates along the resampled dimension - for k, v in self._obj.coords.items(): - if k == self._dim: - continue - if self._dim in v.dims: - self._obj = self._obj.drop_vars(k) - - if method == "asfreq": - return self.mean(self._dim) - - elif method in ["pad", "ffill", "backfill", "bfill", "nearest"]: - kwargs = kwargs.copy() - kwargs.update(**{self._dim: upsampled_index}) - return self._obj.reindex(method=method, *args, **kwargs) - - elif method == "interpolate": - return self._interpolate(*args, **kwargs) - - else: - raise ValueError( - 'Specified method was "{}" but must be one of' - '"asfreq", "ffill", "bfill", or "interpolate"'.format(method) - ) - - def asfreq(self): - """Return values of original object at the new up-sampling frequency; - essentially a re-index with new times set to NaN. - """ - return self._upsample("asfreq") - - def pad(self, tolerance=None): - """Forward fill new values at up-sampled frequency. - - Parameters - ---------- - tolerance : optional - Maximum distance between original and new labels to limit - the up-sampling method. - Up-sampled data with indices that satisfy the equation - ``abs(index[indexer] - target) <= tolerance`` are filled by - new values. Data with indices that are outside the given - tolerance are filled with ``NaN`` s - """ - return self._upsample("pad", tolerance=tolerance) - - ffill = pad - - def backfill(self, tolerance=None): - """Backward fill new values at up-sampled frequency. - - Parameters - ---------- - tolerance : optional - Maximum distance between original and new labels to limit - the up-sampling method. - Up-sampled data with indices that satisfy the equation - ``abs(index[indexer] - target) <= tolerance`` are filled by - new values. Data with indices that are outside the given - tolerance are filled with ``NaN`` s - """ - return self._upsample("backfill", tolerance=tolerance) - - bfill = backfill - - def nearest(self, tolerance=None): - """Take new values from nearest original coordinate to up-sampled - frequency coordinates. - - Parameters - ---------- - tolerance : optional - Maximum distance between original and new labels to limit - the up-sampling method. - Up-sampled data with indices that satisfy the equation - ``abs(index[indexer] - target) <= tolerance`` are filled by - new values. Data with indices that are outside the given - tolerance are filled with ``NaN`` s - """ - return self._upsample("nearest", tolerance=tolerance) - - def interpolate(self, kind="linear"): - """Interpolate up-sampled data using the original data - as knots. - - Parameters - ---------- - kind : {"linear", "nearest", "zero", "slinear", \ - "quadratic", "cubic"}, default: "linear" - Interpolation scheme to use - - See Also - -------- - scipy.interpolate.interp1d - - """ - return self._interpolate(kind=kind) - - def _interpolate(self, kind="linear"): - """Apply scipy.interpolate.interp1d along resampling dimension.""" - # drop any existing non-dimension coordinates along the resampling - # dimension - dummy = self._obj.copy() - for k, v in self._obj.coords.items(): - if k != self._dim and self._dim in v.dims: - dummy = dummy.drop_vars(k) - return dummy.interp( - assume_sorted=True, - method=kind, - kwargs={"bounds_error": False}, - **{self._dim: self._full_index}, - ) - - -class DataArrayResample(DataArrayResampleBase, DataArrayResampleReductions): +class DataArrayResample(Resample, DataArrayGroupByBase, DataArrayResampleReductions): """DataArrayGroupBy object specialized to time resampling operations over a specified dimension """ @@ -449,7 +274,7 @@ def apply(self, func, args=(), shortcut=None, **kwargs): return self.map(func=func, shortcut=shortcut, args=args, **kwargs) -class DatasetResample(DatasetResampleBase, DatasetResampleReductions): +class DatasetResample(Resample, DatasetGroupByBase, DatasetResampleReductions): """DatasetGroupBy object specialized to resampling a specified dimension""" def __init__(self, *args, dim=None, resample_dim=None, **kwargs):