From 15328b64a44ddb18c1c35c3126b58498a63e3dd5 Mon Sep 17 00:00:00 2001 From: Ben Mares Date: Thu, 9 Nov 2023 22:32:35 +0100 Subject: [PATCH 1/5] Declare Dataset, DataArray, Variable, GroupBy unhashable (#8392) * Add unhashable to generate_ops * Regenerate _typed_ops after adding "unhashable" * Fix variable redefinition The previous commit revealed the following mypy error: xarray/core/dataset.py: note: In member "swap_dims" of class "Dataset": xarray/core/dataset.py:4415: error: Incompatible types in assignment (expression has type "Variable", variable has type "Hashable") [assignment] xarray/core/dataset.py:4415: note: Following member(s) of "Variable" have conflicts: xarray/core/dataset.py:4415: note: __hash__: expected "Callable[[], int]", got "None" xarray/core/dataset.py:4416: error: "Hashable" has no attribute "dims" [attr-defined] xarray/core/dataset.py:4419: error: "Hashable" has no attribute "to_index_variable" [attr-defined] xarray/core/dataset.py:4430: error: "Hashable" has no attribute "to_base_variable" [attr-defined] --- xarray/core/_typed_ops.py | 20 ++++++++++++++++++++ xarray/core/dataset.py | 32 +++++++++++++++++--------------- xarray/util/generate_ops.py | 6 ++++++ 3 files changed, 43 insertions(+), 15 deletions(-) diff --git a/xarray/core/_typed_ops.py b/xarray/core/_typed_ops.py index 9b79ed46a9c..ceab91ad991 100644 --- a/xarray/core/_typed_ops.py +++ b/xarray/core/_typed_ops.py @@ -83,6 +83,10 @@ def __eq__(self, other: DsCompatible) -> Self: # type:ignore[override] def __ne__(self, other: DsCompatible) -> Self: # type:ignore[override] return self._binary_op(other, nputils.array_ne) + # When __eq__ is defined but __hash__ is not, then an object is unhashable, + # and it should be declared as follows: + __hash__: None # type:ignore[assignment] + def __radd__(self, other: DsCompatible) -> Self: return self._binary_op(other, operator.add, reflexive=True) @@ -291,6 +295,10 @@ def __eq__(self, other: DaCompatible) -> Self: # type:ignore[override] def __ne__(self, other: DaCompatible) -> Self: # type:ignore[override] return self._binary_op(other, nputils.array_ne) + # When __eq__ is defined but __hash__ is not, then an object is unhashable, + # and it should be declared as follows: + __hash__: None # type:ignore[assignment] + def __radd__(self, other: DaCompatible) -> Self: return self._binary_op(other, operator.add, reflexive=True) @@ -643,6 +651,10 @@ def __ne__(self, other: VarCompatible) -> Self: def __ne__(self, other: VarCompatible) -> Self | T_DataArray: return self._binary_op(other, nputils.array_ne) + # When __eq__ is defined but __hash__ is not, then an object is unhashable, + # and it should be declared as follows: + __hash__: None # type:ignore[assignment] + def __radd__(self, other: VarCompatible) -> Self: return self._binary_op(other, operator.add, reflexive=True) @@ -851,6 +863,10 @@ def __eq__(self, other: GroupByCompatible) -> Dataset: # type:ignore[override] def __ne__(self, other: GroupByCompatible) -> Dataset: # type:ignore[override] return self._binary_op(other, nputils.array_ne) + # When __eq__ is defined but __hash__ is not, then an object is unhashable, + # and it should be declared as follows: + __hash__: None # type:ignore[assignment] + def __radd__(self, other: GroupByCompatible) -> Dataset: return self._binary_op(other, operator.add, reflexive=True) @@ -973,6 +989,10 @@ def __eq__(self, other: T_Xarray) -> T_Xarray: # type:ignore[override] def __ne__(self, other: T_Xarray) -> T_Xarray: # type:ignore[override] return self._binary_op(other, nputils.array_ne) + # When __eq__ is defined but __hash__ is not, then an object is unhashable, + # and it should be declared as follows: + __hash__: None # type:ignore[assignment] + def __radd__(self, other: T_Xarray) -> T_Xarray: return self._binary_op(other, operator.add, reflexive=True) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 10deea5f62b..610575e9f64 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4410,16 +4410,18 @@ def swap_dims( # rename_dims() method that only renames dimensions. dims_dict = either_dict_or_kwargs(dims_dict, dims_kwargs, "swap_dims") - for k, v in dims_dict.items(): - if k not in self.dims: + for current_name, new_name in dims_dict.items(): + if current_name not in self.dims: raise ValueError( - f"cannot swap from dimension {k!r} because it is " + f"cannot swap from dimension {current_name!r} because it is " f"not one of the dimensions of this dataset {tuple(self.dims)}" ) - if v in self.variables and self.variables[v].dims != (k,): + if new_name in self.variables and self.variables[new_name].dims != ( + current_name, + ): raise ValueError( - f"replacement dimension {v!r} is not a 1D " - f"variable along the old dimension {k!r}" + f"replacement dimension {new_name!r} is not a 1D " + f"variable along the old dimension {current_name!r}" ) result_dims = {dims_dict.get(dim, dim) for dim in self.dims} @@ -4429,24 +4431,24 @@ def swap_dims( variables: dict[Hashable, Variable] = {} indexes: dict[Hashable, Index] = {} - for k, v in self.variables.items(): - dims = tuple(dims_dict.get(dim, dim) for dim in v.dims) + for current_name, current_variable in self.variables.items(): + dims = tuple(dims_dict.get(dim, dim) for dim in current_variable.dims) var: Variable - if k in result_dims: - var = v.to_index_variable() + if current_name in result_dims: + var = current_variable.to_index_variable() var.dims = dims - if k in self._indexes: - indexes[k] = self._indexes[k] - variables[k] = var + if current_name in self._indexes: + indexes[current_name] = self._indexes[current_name] + variables[current_name] = var else: index, index_vars = create_default_index_implicit(var) indexes.update({name: index for name in index_vars}) variables.update(index_vars) coord_names.update(index_vars) else: - var = v.to_base_variable() + var = current_variable.to_base_variable() var.dims = dims - variables[k] = var + variables[current_name] = var return self._replace_with_new_dims(variables, coord_names, indexes=indexes) diff --git a/xarray/util/generate_ops.py b/xarray/util/generate_ops.py index 5859934f646..f9aa69d983b 100644 --- a/xarray/util/generate_ops.py +++ b/xarray/util/generate_ops.py @@ -116,6 +116,10 @@ def {method}(self) -> Self: template_other_unary = """ def {method}(self, *args: Any, **kwargs: Any) -> Self: return self._unary_op({func}, *args, **kwargs)""" +unhashable = """ + # When __eq__ is defined but __hash__ is not, then an object is unhashable, + # and it should be declared as follows: + __hash__: None # type:ignore[assignment]""" # For some methods we override return type `bool` defined by base class `object`. # We need to add "# type: ignore[override]" @@ -152,6 +156,7 @@ def binops( template_binop, extras | {"type_ignore": _type_ignore(type_ignore_eq)}, ), + ([(None, None)], unhashable, extras), (BINOPS_REFLEXIVE, template_reflexive, extras), ] @@ -185,6 +190,7 @@ def binops_overload( "overload_type_ignore": _type_ignore(type_ignore_eq), }, ), + ([(None, None)], unhashable, extras), (BINOPS_REFLEXIVE, template_reflexive, extras), ] From 0ba2eb074adf100edb55affa6d595e1a14450071 Mon Sep 17 00:00:00 2001 From: Ben Mares Date: Fri, 10 Nov 2023 01:39:16 +0100 Subject: [PATCH 2/5] Add missing DataArray.dt.total_seconds() method (#8435) --- doc/api.rst | 1 + doc/whats-new.rst | 2 ++ xarray/core/accessor_dt.py | 14 ++++++++++++++ xarray/tests/test_accessor_dt.py | 14 ++++++++++++++ 4 files changed, 31 insertions(+) diff --git a/doc/api.rst b/doc/api.rst index 96b4864804f..f2ff809e45f 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -557,6 +557,7 @@ Datetimelike properties DataArray.dt.seconds DataArray.dt.microseconds DataArray.dt.nanoseconds + DataArray.dt.total_seconds **Timedelta methods**: diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b6bad62dd7c..e28177814b7 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -24,6 +24,8 @@ New Features - Use `opt_einsum `_ for :py:func:`xarray.dot` by default if installed. By `Deepak Cherian `_. (:issue:`7764`, :pull:`8373`). +- Add ``DataArray.dt.total_seconds()`` method to match the Pandas API. (:pull:`8435`). + By `Ben Mares `_. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index 0d4a402cd19..b57c2f3857c 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -74,6 +74,8 @@ def _access_through_series(values, name): if name == "season": months = values_as_series.dt.month.values field_values = _season_from_months(months) + elif name == "total_seconds": + field_values = values_as_series.dt.total_seconds().values elif name == "isocalendar": # special NaT-handling can be removed when # https://github.com/pandas-dev/pandas/issues/54657 is resolved @@ -574,6 +576,13 @@ class TimedeltaAccessor(TimeAccessor[T_DataArray]): 43200, 64800]) Coordinates: * time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00 + >>> ts.dt.total_seconds() + + array([ 86400., 108000., 129600., 151200., 172800., 194400., 216000., + 237600., 259200., 280800., 302400., 324000., 345600., 367200., + 388800., 410400., 432000., 453600., 475200., 496800.]) + Coordinates: + * time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00 """ @property @@ -596,6 +605,11 @@ def nanoseconds(self) -> T_DataArray: """Number of nanoseconds (>= 0 and less than 1 microsecond) for each element""" return self._date_field("nanoseconds", np.int64) + # Not defined as a property in order to match the Pandas API + def total_seconds(self) -> T_DataArray: + """Total duration of each element expressed in seconds.""" + return self._date_field("total_seconds", np.float64) + class CombinedDatetimelikeAccessor( DatetimeAccessor[T_DataArray], TimedeltaAccessor[T_DataArray] diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 64b487628c8..a8d5e722b66 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -6,6 +6,7 @@ import xarray as xr from xarray.tests import ( + assert_allclose, assert_array_equal, assert_chunks_equal, assert_equal, @@ -100,6 +101,19 @@ def test_field_access(self, field) -> None: assert expected.dtype == actual.dtype assert_identical(expected, actual) + def test_total_seconds(self) -> None: + # Subtract a value in the middle of the range to ensure that some values + # are negative + delta = self.data.time - np.datetime64("2000-01-03") + actual = delta.dt.total_seconds() + expected = xr.DataArray( + np.arange(-48, 52, dtype=np.float64) * 3600, + name="total_seconds", + coords=[self.data.time], + ) + # This works with assert_identical when pandas is >=1.5.0. + assert_allclose(expected, actual) + @pytest.mark.parametrize( "field, pandas_field", [ From e5d163a8ddef4e8aa95a2841505a642f8ffbcea6 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Thu, 9 Nov 2023 22:15:01 -0800 Subject: [PATCH 3/5] Rename `to_array` to `to_dataarray` (#8438) --- doc/api.rst | 2 +- doc/howdoi.rst | 2 +- doc/user-guide/reshaping.rst | 12 ++++++------ doc/whats-new.rst | 12 +++++++++--- xarray/core/common.py | 2 +- xarray/core/computation.py | 4 +++- xarray/core/dataset.py | 14 ++++++++++---- xarray/core/groupby.py | 4 ++++ xarray/tests/test_concat.py | 8 ++++---- xarray/tests/test_dask.py | 16 ++++++++-------- xarray/tests/test_dataarray.py | 4 ++-- xarray/tests/test_dataset.py | 6 +++--- xarray/tests/test_groupby.py | 6 +++--- xarray/tests/test_rolling.py | 2 +- xarray/tests/test_sparse.py | 4 ++-- 15 files changed, 58 insertions(+), 40 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index f2ff809e45f..51d79161578 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -603,7 +603,7 @@ Dataset methods Dataset.as_numpy Dataset.from_dataframe Dataset.from_dict - Dataset.to_array + Dataset.to_dataarray Dataset.to_dataframe Dataset.to_dask_dataframe Dataset.to_dict diff --git a/doc/howdoi.rst b/doc/howdoi.rst index 8cc4e9939f2..97b0872fdc4 100644 --- a/doc/howdoi.rst +++ b/doc/howdoi.rst @@ -36,7 +36,7 @@ How do I ... * - rename a variable, dimension or coordinate - :py:meth:`Dataset.rename`, :py:meth:`DataArray.rename`, :py:meth:`Dataset.rename_vars`, :py:meth:`Dataset.rename_dims`, * - convert a DataArray to Dataset or vice versa - - :py:meth:`DataArray.to_dataset`, :py:meth:`Dataset.to_array`, :py:meth:`Dataset.to_stacked_array`, :py:meth:`DataArray.to_unstacked_dataset` + - :py:meth:`DataArray.to_dataset`, :py:meth:`Dataset.to_dataarray`, :py:meth:`Dataset.to_stacked_array`, :py:meth:`DataArray.to_unstacked_dataset` * - extract variables that have certain attributes - :py:meth:`Dataset.filter_by_attrs` * - extract the underlying array (e.g. NumPy or Dask arrays) diff --git a/doc/user-guide/reshaping.rst b/doc/user-guide/reshaping.rst index d0b72322218..14b343549e2 100644 --- a/doc/user-guide/reshaping.rst +++ b/doc/user-guide/reshaping.rst @@ -59,11 +59,11 @@ use :py:meth:`~xarray.DataArray.squeeze` Converting between datasets and arrays -------------------------------------- -To convert from a Dataset to a DataArray, use :py:meth:`~xarray.Dataset.to_array`: +To convert from a Dataset to a DataArray, use :py:meth:`~xarray.Dataset.to_dataarray`: .. ipython:: python - arr = ds.to_array() + arr = ds.to_dataarray() arr This method broadcasts all data variables in the dataset against each other, @@ -77,7 +77,7 @@ To convert back from a DataArray to a Dataset, use arr.to_dataset(dim="variable") -The broadcasting behavior of ``to_array`` means that the resulting array +The broadcasting behavior of ``to_dataarray`` means that the resulting array includes the union of data variable dimensions: .. ipython:: python @@ -88,7 +88,7 @@ includes the union of data variable dimensions: ds2 # the resulting array has 6 elements - ds2.to_array() + ds2.to_dataarray() Otherwise, the result could not be represented as an orthogonal array. @@ -161,8 +161,8 @@ arrays as inputs. For datasets with only one variable, we only need ``stack`` and ``unstack``, but combining multiple variables in a :py:class:`xarray.Dataset` is more complicated. If the variables in the dataset have matching numbers of dimensions, we can call -:py:meth:`~xarray.Dataset.to_array` and then stack along the the new coordinate. -But :py:meth:`~xarray.Dataset.to_array` will broadcast the dataarrays together, +:py:meth:`~xarray.Dataset.to_dataarray` and then stack along the the new coordinate. +But :py:meth:`~xarray.Dataset.to_dataarray` will broadcast the dataarrays together, which will effectively tile the lower dimensional variable along the missing dimensions. The method :py:meth:`xarray.Dataset.to_stacked_array` allows combining variables of differing dimensions without this wasteful copying while diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e28177814b7..157282803cc 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -41,6 +41,12 @@ Deprecations this was one place in the API where dimension positions were used. (:pull:`8341`) By `Maximilian Roos `_. +- Rename :py:meth:`Dataset.to_array` to :py:meth:`Dataset.to_dataarray` for + consistency with :py:meth:`DataArray.to_dataset` & + :py:func:`open_dataarray` functions. This is a "soft" deprecation — the + existing methods work and don't raise any warnings, given the relatively small + benefits of the change. + By `Maximilian Roos `_. Bug fixes ~~~~~~~~~ @@ -6709,7 +6715,7 @@ Backwards incompatible changes Enhancements ~~~~~~~~~~~~ -- New ``xray.Dataset.to_array`` and enhanced +- New ``xray.Dataset.to_dataarray`` and enhanced ``xray.DataArray.to_dataset`` methods make it easy to switch back and forth between arrays and datasets: @@ -6720,8 +6726,8 @@ Enhancements coords={"c": 42}, attrs={"Conventions": "None"}, ) - ds.to_array() - ds.to_array().to_dataset(dim="variable") + ds.to_dataarray() + ds.to_dataarray().to_dataset(dim="variable") - New ``xray.Dataset.fillna`` method to fill missing values, modeled off the pandas method of the same name: diff --git a/xarray/core/common.py b/xarray/core/common.py index ab8a4d84261..fef8adb101a 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1173,7 +1173,7 @@ def _dataset_indexer(dim: Hashable) -> DataArray: var for var in cond if dim not in cond[var].dims ) keepany = cond_wdim.any(dim=(d for d in cond.dims.keys() if d != dim)) - return keepany.to_array().any("variable") + return keepany.to_dataarray().any("variable") _get_indexer = ( _dataarray_indexer if isinstance(cond, DataArray) else _dataset_indexer diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 23d54a5779c..0c5c9d6d5cb 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1603,7 +1603,9 @@ def cross( >>> ds_a = xr.Dataset(dict(x=("dim_0", [1]), y=("dim_0", [2]), z=("dim_0", [3]))) >>> ds_b = xr.Dataset(dict(x=("dim_0", [4]), y=("dim_0", [5]), z=("dim_0", [6]))) >>> c = xr.cross( - ... ds_a.to_array("cartesian"), ds_b.to_array("cartesian"), dim="cartesian" + ... ds_a.to_dataarray("cartesian"), + ... ds_b.to_dataarray("cartesian"), + ... dim="cartesian", ... ) >>> c.to_dataset(dim="cartesian") diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 610575e9f64..da13d52b9c1 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1502,7 +1502,7 @@ def __array__(self, dtype=None): "cannot directly convert an xarray.Dataset into a " "numpy array. Instead, create an xarray.DataArray " "first, either with indexing on the Dataset or by " - "invoking the `to_array()` method." + "invoking the `to_dataarray()` method." ) @property @@ -5260,7 +5260,7 @@ def to_stacked_array( """Combine variables of differing dimensionality into a DataArray without broadcasting. - This method is similar to Dataset.to_array but does not broadcast the + This method is similar to Dataset.to_dataarray but does not broadcast the variables. Parameters @@ -5289,7 +5289,7 @@ def to_stacked_array( See Also -------- - Dataset.to_array + Dataset.to_dataarray Dataset.stack DataArray.to_unstacked_dataset @@ -7019,7 +7019,7 @@ def assign( return data - def to_array( + def to_dataarray( self, dim: Hashable = "variable", name: Hashable | None = None ) -> DataArray: """Convert this dataset into an xarray.DataArray @@ -7056,6 +7056,12 @@ def to_array( return DataArray._construct_direct(variable, coords, name, indexes) + def to_array( + self, dim: Hashable = "variable", name: Hashable | None = None + ) -> DataArray: + """Deprecated version of to_dataarray""" + return self.to_dataarray(dim=dim, name=name) + def _normalize_dim_order( self, dim_order: Sequence[Hashable] | None = None ) -> dict[Hashable, int]: diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 788e1efa80b..8c81d3e6a96 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -251,6 +251,10 @@ def to_dataarray(self) -> DataArray: data=self.data, dims=(self.name,), coords=self.coords, name=self.name ) + def to_array(self) -> DataArray: + """Deprecated version of to_dataarray.""" + return self.to_dataarray() + T_Group = Union["T_DataArray", "IndexVariable", _DummyGroup] diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 11d0d38594d..92415631748 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -1070,10 +1070,10 @@ def test_concat_fill_value(self, fill_value) -> None: def test_concat_join_kwarg(self) -> None: ds1 = Dataset( {"a": (("x", "y"), [[0]])}, coords={"x": [0], "y": [0]} - ).to_array() + ).to_dataarray() ds2 = Dataset( {"a": (("x", "y"), [[0]])}, coords={"x": [1], "y": [0.0001]} - ).to_array() + ).to_dataarray() expected: dict[JoinOptions, Any] = {} expected["outer"] = Dataset( @@ -1101,7 +1101,7 @@ def test_concat_join_kwarg(self) -> None: for join in expected: actual = concat([ds1, ds2], join=join, dim="x") - assert_equal(actual, expected[join].to_array()) + assert_equal(actual, expected[join].to_dataarray()) def test_concat_combine_attrs_kwarg(self) -> None: da1 = DataArray([0], coords=[("x", [0])], attrs={"b": 42}) @@ -1224,7 +1224,7 @@ def test_concat_preserve_coordinate_order() -> None: def test_concat_typing_check() -> None: ds = Dataset({"foo": 1}, {"bar": 2}) - da = Dataset({"foo": 3}, {"bar": 4}).to_array(dim="foo") + da = Dataset({"foo": 3}, {"bar": 4}).to_dataarray(dim="foo") # concatenate a list of non-homogeneous types must raise TypeError with pytest.raises( diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 1c2511427ac..c2a77c97d85 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -608,11 +608,11 @@ def test_to_dataset_roundtrip(self): v = self.lazy_array expected = u.assign_coords(x=u["x"]) - self.assertLazyAndEqual(expected, v.to_dataset("x").to_array("x")) + self.assertLazyAndEqual(expected, v.to_dataset("x").to_dataarray("x")) def test_merge(self): def duplicate_and_merge(array): - return xr.merge([array, array.rename("bar")]).to_array() + return xr.merge([array, array.rename("bar")]).to_dataarray() expected = duplicate_and_merge(self.eager_array) actual = duplicate_and_merge(self.lazy_array) @@ -1306,12 +1306,12 @@ def test_map_blocks_kwargs(obj): assert_identical(actual, expected) -def test_map_blocks_to_array(map_ds): +def test_map_blocks_to_dataarray(map_ds): with raise_if_dask_computes(): - actual = xr.map_blocks(lambda x: x.to_array(), map_ds) + actual = xr.map_blocks(lambda x: x.to_dataarray(), map_ds) - # to_array does not preserve name, so cannot use assert_identical - assert_equal(actual, map_ds.to_array()) + # to_dataarray does not preserve name, so cannot use assert_identical + assert_equal(actual, map_ds.to_dataarray()) @pytest.mark.parametrize( @@ -1376,8 +1376,8 @@ def test_map_blocks_template_convert_object(): assert_identical(actual, template) ds = da.to_dataset() - func = lambda x: x.to_array().isel(x=[1]) - template = ds.to_array().isel(x=[1, 5, 9]) + func = lambda x: x.to_dataarray().isel(x=[1]) + template = ds.to_dataarray().isel(x=[1, 5, 9]) with raise_if_dask_computes(): actual = xr.map_blocks(func, ds, template=template) assert_identical(actual, template) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 26537766f4d..1fbb834b679 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3801,7 +3801,7 @@ def test_to_dataset_split(self) -> None: with pytest.raises(TypeError): array.to_dataset("x", name="foo") - roundtripped = actual.to_array(dim="x") + roundtripped = actual.to_dataarray(dim="x") assert_identical(array, roundtripped) array = DataArray([1, 2, 3], dims="x") @@ -3818,7 +3818,7 @@ def test_to_dataset_retains_keys(self) -> None: array = DataArray([1, 2, 3], coords=[("x", dates)], attrs={"a": 1}) # convert to dateset and back again - result = array.to_dataset("x").to_array(dim="x") + result = array.to_dataset("x").to_dataarray(dim="x") assert_equal(array, result) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 687aae8f1dc..af4ede15fa4 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4569,7 +4569,7 @@ def test_squeeze_drop(self) -> None: selected = data.squeeze(drop=True) assert_identical(data, selected) - def test_to_array(self) -> None: + def test_to_dataarray(self) -> None: ds = Dataset( {"a": 1, "b": ("x", [1, 2, 3])}, coords={"c": 42}, @@ -4579,10 +4579,10 @@ def test_to_array(self) -> None: coords = {"c": 42, "variable": ["a", "b"]} dims = ("variable", "x") expected = DataArray(data, coords, dims, attrs=ds.attrs) - actual = ds.to_array() + actual = ds.to_dataarray() assert_identical(expected, actual) - actual = ds.to_array("abc", name="foo") + actual = ds.to_dataarray("abc", name="foo") expected = expected.rename({"variable": "abc"}).rename("foo") assert_identical(expected, actual) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 320ba999318..8afdf95a082 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -600,19 +600,19 @@ def test_groupby_grouping_errors() -> None: with pytest.raises( ValueError, match=r"None of the data falls within bins with edges" ): - dataset.to_array().groupby_bins("x", bins=[0.1, 0.2, 0.3]) + dataset.to_dataarray().groupby_bins("x", bins=[0.1, 0.2, 0.3]) with pytest.raises(ValueError, match=r"All bin edges are NaN."): dataset.groupby_bins("x", bins=[np.nan, np.nan, np.nan]) with pytest.raises(ValueError, match=r"All bin edges are NaN."): - dataset.to_array().groupby_bins("x", bins=[np.nan, np.nan, np.nan]) + dataset.to_dataarray().groupby_bins("x", bins=[np.nan, np.nan, np.nan]) with pytest.raises(ValueError, match=r"Failed to group data."): dataset.groupby(dataset.foo * np.nan) with pytest.raises(ValueError, match=r"Failed to group data."): - dataset.to_array().groupby(dataset.foo * np.nan) + dataset.to_dataarray().groupby(dataset.foo * np.nan) def test_groupby_reduce_dimension_error(array) -> None: diff --git a/xarray/tests/test_rolling.py b/xarray/tests/test_rolling.py index 3b213db0b88..cb7b723a208 100644 --- a/xarray/tests/test_rolling.py +++ b/xarray/tests/test_rolling.py @@ -631,7 +631,7 @@ def test_rolling_construct(self, center: bool, window: int) -> None: ds_rolling_mean = ds_rolling.construct("window", stride=2, fill_value=0.0).mean( "window" ) - assert (ds_rolling_mean.isnull().sum() == 0).to_array(dim="vars").all() + assert (ds_rolling_mean.isnull().sum() == 0).to_dataarray(dim="vars").all() assert (ds_rolling_mean["x"] == 0.0).sum() >= 0 @pytest.mark.parametrize("center", (True, False)) diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py index 489836b70fd..5b75c10631a 100644 --- a/xarray/tests/test_sparse.py +++ b/xarray/tests/test_sparse.py @@ -578,7 +578,7 @@ def setUp(self): def test_to_dataset_roundtrip(self): x = self.sp_xr - assert_equal(x, x.to_dataset("x").to_array("x")) + assert_equal(x, x.to_dataset("x").to_dataarray("x")) def test_align(self): a1 = xr.DataArray( @@ -830,7 +830,7 @@ def test_reindex(self): @pytest.mark.xfail def test_merge(self): x = self.sp_xr - y = xr.merge([x, x.rename("bar")]).to_array() + y = xr.merge([x, x.rename("bar")]).to_dataarray() assert isinstance(y, sparse.SparseArray) @pytest.mark.xfail From 8e95b60939c4abc3ac785d730ed55e1a39da3c6a Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sun, 12 Nov 2023 15:53:34 -0700 Subject: [PATCH 4/5] Remove keep_attrs from resample signature (#8444) --- doc/whats-new.rst | 3 +++ xarray/core/common.py | 8 -------- xarray/core/dataarray.py | 2 -- xarray/core/dataset.py | 2 -- xarray/tests/test_groupby.py | 10 ---------- 5 files changed, 3 insertions(+), 22 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 157282803cc..430da3e4c4e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -47,6 +47,9 @@ Deprecations existing methods work and don't raise any warnings, given the relatively small benefits of the change. By `Maximilian Roos `_. +- Finally remove ``keep_attrs`` kwarg from :py:meth:`DataArray.resample` and + :py:meth:`Dataset.resample`. These were deprecated a long time ago. + By `Deepak Cherian `_. Bug fixes ~~~~~~~~~ diff --git a/xarray/core/common.py b/xarray/core/common.py index fef8adb101a..fa0fa9aec0f 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -860,7 +860,6 @@ def _resample( base: int | None, offset: pd.Timedelta | datetime.timedelta | str | None, origin: str | DatetimeLike, - keep_attrs: bool | None, loffset: datetime.timedelta | str | None, restore_coord_dims: bool | None, **indexer_kwargs: str, @@ -989,13 +988,6 @@ def _resample( from xarray.core.pdcompat import _convert_base_to_offset from xarray.core.resample import RESAMPLE_DIM - if keep_attrs is not None: - warnings.warn( - "Passing ``keep_attrs`` to ``resample`` has no effect and will raise an" - " error in xarray 0.20. Pass ``keep_attrs`` directly to the applied" - " function, e.g. ``resample(...).mean(keep_attrs=True)``." - ) - # note: the second argument (now 'skipna') use to be 'dim' if ( (skipna is not None and not isinstance(skipna, bool)) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index c512e742fb8..27eb3cdfddc 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -7021,7 +7021,6 @@ def resample( base: int | None = None, offset: pd.Timedelta | datetime.timedelta | str | None = None, origin: str | DatetimeLike = "start_day", - keep_attrs: bool | None = None, loffset: datetime.timedelta | str | None = None, restore_coord_dims: bool | None = None, **indexer_kwargs: str, @@ -7143,7 +7142,6 @@ def resample( base=base, offset=offset, origin=origin, - keep_attrs=keep_attrs, loffset=loffset, restore_coord_dims=restore_coord_dims, **indexer_kwargs, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index da13d52b9c1..c7f92b87d63 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -10327,7 +10327,6 @@ def resample( base: int | None = None, offset: pd.Timedelta | datetime.timedelta | str | None = None, origin: str | DatetimeLike = "start_day", - keep_attrs: bool | None = None, loffset: datetime.timedelta | str | None = None, restore_coord_dims: bool | None = None, **indexer_kwargs: str, @@ -10404,7 +10403,6 @@ def resample( base=base, offset=offset, origin=origin, - keep_attrs=keep_attrs, loffset=loffset, restore_coord_dims=restore_coord_dims, **indexer_kwargs, diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 8afdf95a082..4974394d59a 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1776,11 +1776,6 @@ def test_resample_keep_attrs(self): expected = DataArray([1, 1, 1], [("time", times[::4])], attrs=array.attrs) assert_identical(result, expected) - with pytest.warns( - UserWarning, match="Passing ``keep_attrs`` to ``resample`` has no effect." - ): - array.resample(time="1D", keep_attrs=True) - def test_resample_skipna(self): times = pd.date_range("2000-01-01", freq="6H", periods=10) array = DataArray(np.ones(10), [("time", times)]) @@ -2138,11 +2133,6 @@ def test_resample_by_mean_with_keep_attrs(self): expected = ds.attrs assert expected == actual - with pytest.warns( - UserWarning, match="Passing ``keep_attrs`` to ``resample`` has no effect." - ): - ds.resample(time="1D", keep_attrs=True) - def test_resample_loffset(self): times = pd.date_range("2000-01-01", freq="6H", periods=10) ds = Dataset( From 0c1ad54d67015437b507a33305a30fa2cb1f5398 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 13 Nov 2023 12:39:52 -0700 Subject: [PATCH 5/5] Pin pint to >=0.22 (#8445) * unpin pint * More unpins * Try>=0.21 * Bump pint to >=0.22 * Undo windows test skips * Remove nanprod skips * Bump min-deps * use exact pin --------- Co-authored-by: Justus Magin --- .binder/environment.yml | 4 +- ci/requirements/all-but-dask.yml | 2 +- ci/requirements/environment-py311.yml | 2 +- ci/requirements/environment-windows-py311.yml | 2 +- ci/requirements/environment-windows.yml | 2 +- ci/requirements/environment.yml | 2 +- ci/requirements/min-all-deps.yml | 2 +- doc/whats-new.rst | 1 + xarray/tests/test_units.py | 43 ------------------- 9 files changed, 9 insertions(+), 51 deletions(-) diff --git a/.binder/environment.yml b/.binder/environment.yml index 99a7d9f2494..fa4e14c41c2 100644 --- a/.binder/environment.yml +++ b/.binder/environment.yml @@ -2,7 +2,7 @@ name: xarray-examples channels: - conda-forge dependencies: - - python=3.9 + - python=3.10 - boto3 - bottleneck - cartopy @@ -25,7 +25,7 @@ dependencies: - numpy - packaging - pandas - - pint + - pint>=0.22 - pip - pooch - pydap diff --git a/ci/requirements/all-but-dask.yml b/ci/requirements/all-but-dask.yml index 4645be08b83..0de81ea768c 100644 --- a/ci/requirements/all-but-dask.yml +++ b/ci/requirements/all-but-dask.yml @@ -26,7 +26,7 @@ dependencies: - numpy - packaging - pandas - - pint<0.21 + - pint>=0.22 - pip - pseudonetcdf - pydap diff --git a/ci/requirements/environment-py311.yml b/ci/requirements/environment-py311.yml index 0b9817daef3..8d1107142d9 100644 --- a/ci/requirements/environment-py311.yml +++ b/ci/requirements/environment-py311.yml @@ -28,7 +28,7 @@ dependencies: - numpy - packaging - pandas - - pint<0.21 + - pint>=0.22 - pip - pooch - pre-commit diff --git a/ci/requirements/environment-windows-py311.yml b/ci/requirements/environment-windows-py311.yml index 8c36c5a9fd4..f590acb77a4 100644 --- a/ci/requirements/environment-windows-py311.yml +++ b/ci/requirements/environment-windows-py311.yml @@ -25,7 +25,7 @@ dependencies: - numpy - packaging - pandas - - pint<0.21 + - pint>=0.22 - pip - pre-commit - pseudonetcdf diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index efa9ccb5a9a..b4c760b25c4 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -25,7 +25,7 @@ dependencies: - numpy - packaging - pandas - - pint<0.21 + - pint>=0.22 - pip - pre-commit - pseudonetcdf diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index 6e93ab7a946..7009932f7eb 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -29,7 +29,7 @@ dependencies: - opt_einsum - packaging - pandas - - pint<0.21 + - pint>=0.22 - pip - pooch - pre-commit diff --git a/ci/requirements/min-all-deps.yml b/ci/requirements/min-all-deps.yml index 8400270ce1b..854289ffab4 100644 --- a/ci/requirements/min-all-deps.yml +++ b/ci/requirements/min-all-deps.yml @@ -35,7 +35,7 @@ dependencies: - numpy=1.22 - packaging=21.3 - pandas=1.4 - - pint=0.19 + - pint=0.22 - pip - pseudonetcdf=3.2 - pydap=3.3 diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 430da3e4c4e..382953cde10 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -30,6 +30,7 @@ New Features Breaking changes ~~~~~~~~~~~~~~~~ +- Bump minimum tested pint version to ``>=0.22``. By `Deepak Cherian `_. Deprecations ~~~~~~~~~~~~ diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 14a7a10f734..be13e75be4c 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -2,12 +2,10 @@ import functools import operator -import sys import numpy as np import pandas as pd import pytest -from packaging import version import xarray as xr from xarray.core import dtypes, duck_array_ops @@ -1513,10 +1511,6 @@ def test_dot_dataarray(dtype): class TestVariable: - @pytest.mark.skipif( - (sys.version_info >= (3, 11)) and sys.platform.startswith("win"), - reason="fails for some reason on win and 3.11, GH7971", - ) @pytest.mark.parametrize( "func", ( @@ -1539,13 +1533,6 @@ class TestVariable: ids=repr, ) def test_aggregation(self, func, dtype): - if ( - func.name == "prod" - and dtype.kind == "f" - and version.parse(pint.__version__) < version.parse("0.19") - ): - pytest.xfail(reason="nanprod is not by older `pint` versions") - array = np.linspace(0, 1, 10).astype(dtype) * ( unit_registry.m if func.name != "cumprod" else unit_registry.dimensionless ) @@ -2348,10 +2335,6 @@ def test_repr(self, func, variant, dtype): # warnings or errors, but does not check the result func(data_array) - @pytest.mark.skipif( - (sys.version_info >= (3, 11)) and sys.platform.startswith("win"), - reason="fails for some reason on win and 3.11, GH7971", - ) @pytest.mark.parametrize( "func", ( @@ -2404,13 +2387,6 @@ def test_repr(self, func, variant, dtype): ids=repr, ) def test_aggregation(self, func, dtype): - if ( - func.name == "prod" - and dtype.kind == "f" - and version.parse(pint.__version__) < version.parse("0.19") - ): - pytest.xfail(reason="nanprod is not by older `pint` versions") - array = np.arange(10).astype(dtype) * ( unit_registry.m if func.name != "cumprod" else unit_registry.dimensionless ) @@ -2429,10 +2405,6 @@ def test_aggregation(self, func, dtype): assert_units_equal(expected, actual) assert_allclose(expected, actual) - @pytest.mark.skipif( - (sys.version_info >= (3, 11)) and sys.platform.startswith("win"), - reason="fails for some reason on win and 3.11, GH7971", - ) @pytest.mark.parametrize( "func", ( @@ -4085,10 +4057,6 @@ def test_repr(self, func, variant, dtype): # warnings or errors, but does not check the result func(ds) - @pytest.mark.skipif( - (sys.version_info >= (3, 11)) and sys.platform.startswith("win"), - reason="fails for some reason on win and 3.11, GH7971", - ) @pytest.mark.parametrize( "func", ( @@ -4110,13 +4078,6 @@ def test_repr(self, func, variant, dtype): ids=repr, ) def test_aggregation(self, func, dtype): - if ( - func.name == "prod" - and dtype.kind == "f" - and version.parse(pint.__version__) < version.parse("0.19") - ): - pytest.xfail(reason="nanprod is not by older `pint` versions") - unit_a, unit_b = ( (unit_registry.Pa, unit_registry.degK) if func.name != "cumprod" @@ -5647,10 +5608,6 @@ def test_merge(self, variant, unit, error, dtype): @requires_dask class TestPintWrappingDask: - @pytest.mark.skipif( - version.parse(pint.__version__) <= version.parse("0.21"), - reason="pint didn't support dask properly before 0.21", - ) def test_duck_array_ops(self): import dask.array