diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 2652af5d9fd..a3c974e8fb0 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -21,7 +21,9 @@ v0.16.1 (unreleased) Breaking changes ~~~~~~~~~~~~~~~~ - +- :py:meth:`DataArray.astype` and :py:meth:`Dataset.astype` now preserve attributes. Keep the + old behavior by passing `keep_attrs=False` (:issue:`2049`, :pull:`4314`). + By `Dan Nowacki `_ and `Gabriel Joel Mitchell `_. New Features ~~~~~~~~~~~~ diff --git a/xarray/core/common.py b/xarray/core/common.py index 4207aea3a25..798889f57b0 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1299,6 +1299,53 @@ def isin(self, test_elements): dask="allowed", ) + def astype(self, dtype, casting="unsafe", copy=True, keep_attrs=True): + """ + Copy of the xarray object, with data cast to a specified type. + Leaves coordinate dtype unchanged. + + Parameters + ---------- + dtype : str or dtype + Typecode or data-type to which the array is cast. + casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional + Controls what kind of data casting may occur. Defaults to 'unsafe' + for backwards compatibility. + + * 'no' means the data types should not be cast at all. + * 'equiv' means only byte-order changes are allowed. + * 'safe' means only casts which can preserve values are allowed. + * 'same_kind' means only safe casts or casts within a kind, + like float64 to float32, are allowed. + * 'unsafe' means any data conversions may be done. + copy : bool, optional + By default, astype always returns a newly allocated array. If this + is set to False and the `dtype` requirement is satisfied, the input + array is returned instead of a copy. + keep_attrs : bool, optional + By default, astype keeps attributes. Set to False to remove + attributes in the returned object. + + Returns + ------- + out : same as object + New object with data cast to the specified type. + + See also + -------- + np.ndarray.astype + dask.array.Array.astype + """ + from .computation import apply_ufunc + + return apply_ufunc( + duck_array_ops.astype, + self, + kwargs=dict(dtype=dtype, casting=casting, copy=copy), + keep_attrs=keep_attrs, + dask="allowed", + ) + def __enter__(self: T) -> T: return self diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 377e7377b6a..e64fea2ccf0 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -7,6 +7,7 @@ import datetime import inspect import warnings +from distutils.version import LooseVersion from functools import partial import numpy as np @@ -14,7 +15,7 @@ from . import dask_array_compat, dask_array_ops, dtypes, npcompat, nputils from .nputils import nanfirst, nanlast -from .pycompat import cupy_array_type, dask_array_type +from .pycompat import cupy_array_type, dask_array_type, sparse_array_type try: import dask.array as dask_array @@ -150,6 +151,28 @@ def trapz(y, x, axis): ) +def astype(data, **kwargs): + try: + import sparse + except ImportError: + sparse = None + + if ( + sparse is not None + and isinstance(data, sparse_array_type) + and LooseVersion(sparse.__version__) < LooseVersion("0.11.0") + and "casting" in kwargs + ): + warnings.warn( + "The current version of sparse does not support the 'casting' argument. It will be ignored in the call to astype().", + RuntimeWarning, + stacklevel=4, + ) + kwargs.pop("casting") + + return data.astype(**kwargs) + + def asarray(data, xp=np): return ( data diff --git a/xarray/core/ops.py b/xarray/core/ops.py index 9dd9ee24ccd..28f3c302232 100644 --- a/xarray/core/ops.py +++ b/xarray/core/ops.py @@ -42,7 +42,7 @@ NUMPY_SAME_METHODS = ["item", "searchsorted"] # methods which don't modify the data shape, so the result should still be # wrapped in an Variable/DataArray -NUMPY_UNARY_METHODS = ["astype", "argsort", "clip", "conj", "conjugate"] +NUMPY_UNARY_METHODS = ["argsort", "clip", "conj", "conjugate"] PANDAS_UNARY_FUNCTIONS = ["isnull", "notnull"] # methods which remove an axis REDUCE_METHODS = ["all", "any"] diff --git a/xarray/core/variable.py b/xarray/core/variable.py index a9567e80ce4..f00d81869da 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -360,6 +360,52 @@ def data(self, data): ) self._data = data + def astype(self, dtype, casting="unsafe", copy=True, keep_attrs=True): + """ + Copy of the Variable object, with data cast to a specified type. + + Parameters + ---------- + dtype : str or dtype + Typecode or data-type to which the array is cast. + casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional + Controls what kind of data casting may occur. Defaults to 'unsafe' + for backwards compatibility. + + * 'no' means the data types should not be cast at all. + * 'equiv' means only byte-order changes are allowed. + * 'safe' means only casts which can preserve values are allowed. + * 'same_kind' means only safe casts or casts within a kind, + like float64 to float32, are allowed. + * 'unsafe' means any data conversions may be done. + copy : bool, optional + By default, astype always returns a newly allocated array. If this + is set to False and the `dtype` requirement is satisfied, the input + array is returned instead of a copy. + keep_attrs : bool, optional + By default, astype keeps attributes. Set to False to remove + attributes in the returned object. + + Returns + ------- + out : same as object + New object with data cast to the specified type. + + See also + -------- + np.ndarray.astype + dask.array.Array.astype + """ + from .computation import apply_ufunc + + return apply_ufunc( + duck_array_ops.astype, + self, + kwargs=dict(dtype=dtype, casting=casting, copy=copy), + keep_attrs=keep_attrs, + dask="allowed", + ) + def load(self, **kwargs): """Manually trigger loading of this variable's data from disk or a remote source into memory and return this variable. diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 84455d320cb..96760207ca3 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1874,6 +1874,19 @@ def test_array_interface(self): bar = Variable(["x", "y"], np.zeros((10, 20))) assert_equal(self.dv, np.maximum(self.dv, bar)) + def test_astype_attrs(self): + for v in [self.va.copy(), self.mda.copy(), self.ds.copy()]: + v.attrs["foo"] = "bar" + assert v.attrs == v.astype(float).attrs + assert not v.astype(float, keep_attrs=False).attrs + + def test_astype_dtype(self): + original = DataArray([-1, 1, 2, 3, 1000]) + converted = original.astype(float) + assert_array_equal(original, converted) + assert np.issubdtype(original.dtype, np.integer) + assert np.issubdtype(converted.dtype, np.floating) + def test_is_null(self): x = np.random.RandomState(42).randn(5, 6) x[x < 0] = np.nan diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 72bc560bcc6..6af8a8f0e71 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -5634,6 +5634,15 @@ def test_pad(self): np.testing.assert_equal(padded["var1"].isel(dim2=[0, -1]).data, 42) np.testing.assert_equal(padded["dim2"][[0, -1]].data, np.nan) + def test_astype_attrs(self): + data = create_test_data(seed=123) + data.attrs["foo"] = "bar" + + assert data.attrs == data.astype(float).attrs + assert data.var1.attrs == data.astype(float).var1.attrs + assert not data.astype(float, keep_attrs=False).attrs + assert not data.astype(float, keep_attrs=False).var1.attrs + # Py.test tests