Skip to content
4 changes: 3 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ v0.16.1 (unreleased)

Breaking changes
~~~~~~~~~~~~~~~~

- :py:meth:`DataArray.astype` and :py:meth:`Dataset.astype` now preserve attributes. Keep the
old behavior by passing `keep_attrs=False` (:issue:`2049`, :pull:`4314`).
By `Dan Nowacki <https://github.com/dnowacki-usgs>`_ and `Gabriel Joel Mitchell <https://github.com/gajomi>`_.

New Features
~~~~~~~~~~~~
Expand Down
47 changes: 47 additions & 0 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1305,6 +1305,53 @@ def isin(self, test_elements):
dask="allowed",
)

def astype(self, dtype, casting="unsafe", copy=True, keep_attrs=True):
"""
Copy of the xarray object, with data cast to a specified type.
Leaves coordinate dtype unchanged.

Parameters
----------
dtype : str or dtype
Typecode or data-type to which the array is cast.
casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
Controls what kind of data casting may occur. Defaults to 'unsafe'
for backwards compatibility.

* 'no' means the data types should not be cast at all.
* 'equiv' means only byte-order changes are allowed.
* 'safe' means only casts which can preserve values are allowed.
* 'same_kind' means only safe casts or casts within a kind,
like float64 to float32, are allowed.
* 'unsafe' means any data conversions may be done.
copy : bool, optional
By default, astype always returns a newly allocated array. If this
is set to False and the `dtype` requirement is satisfied, the input
array is returned instead of a copy.
keep_attrs : bool, optional
By default, astype keeps attributes. Set to False to remove
attributes in the returned object.

Returns
-------
out : same as object
New object with data cast to the specified type.

See also
--------
np.ndarray.astype
dask.array.Array.astype
"""
from .computation import apply_ufunc

return apply_ufunc(
duck_array_ops.astype,
self,
kwargs=dict(dtype=dtype, casting=casting, copy=copy),
keep_attrs=keep_attrs,
dask="allowed",
)

def __enter__(self: T) -> T:
return self

Expand Down
23 changes: 23 additions & 0 deletions xarray/core/duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import contextlib
import inspect
import warnings
from distutils.version import LooseVersion
from functools import partial

import numpy as np
Expand Down Expand Up @@ -149,6 +150,28 @@ def trapz(y, x, axis):
)


def astype(data, **kwargs):
try:
import sparse
except ImportError:
sparse = None

if (
sparse is not None
and isinstance(data, sparse._coo.core.COO)
and LooseVersion(sparse.__version__) < LooseVersion("0.11.0")
and "casting" in kwargs
):
warnings.warn(
"The current version of sparse does not support the 'casting' argument. It will be ignored in the call to astype().",
RuntimeWarning,
stacklevel=4,
)
kwargs.pop("casting")

return data.astype(**kwargs)


def asarray(data, xp=np):
return (
data
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
NUMPY_SAME_METHODS = ["item", "searchsorted"]
# methods which don't modify the data shape, so the result should still be
# wrapped in an Variable/DataArray
NUMPY_UNARY_METHODS = ["astype", "argsort", "clip", "conj", "conjugate"]
NUMPY_UNARY_METHODS = ["argsort", "clip", "conj", "conjugate"]
PANDAS_UNARY_FUNCTIONS = ["isnull", "notnull"]
# methods which remove an axis
REDUCE_METHODS = ["all", "any"]
Expand Down
46 changes: 46 additions & 0 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,52 @@ def data(self, data):
)
self._data = data

def astype(self, dtype, casting="unsafe", copy=True, keep_attrs=True):
"""
Copy of the Variable object, with data cast to a specified type.

Parameters
----------
dtype : str or dtype
Typecode or data-type to which the array is cast.
casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
Controls what kind of data casting may occur. Defaults to 'unsafe'
for backwards compatibility.

* 'no' means the data types should not be cast at all.
* 'equiv' means only byte-order changes are allowed.
* 'safe' means only casts which can preserve values are allowed.
* 'same_kind' means only safe casts or casts within a kind,
like float64 to float32, are allowed.
* 'unsafe' means any data conversions may be done.
copy : bool, optional
By default, astype always returns a newly allocated array. If this
is set to False and the `dtype` requirement is satisfied, the input
array is returned instead of a copy.
keep_attrs : bool, optional
By default, astype keeps attributes. Set to False to remove
attributes in the returned object.

Returns
-------
out : same as object
New object with data cast to the specified type.

See also
--------
np.ndarray.astype
dask.array.Array.astype
"""
from .computation import apply_ufunc

return apply_ufunc(
duck_array_ops.astype,
self,
kwargs=dict(dtype=dtype, casting=casting, copy=copy),
keep_attrs=keep_attrs,
dask="allowed",
)

def load(self, **kwargs):
"""Manually trigger loading of this variable's data from disk or a
remote source into memory and return this variable.
Expand Down
13 changes: 13 additions & 0 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1874,6 +1874,19 @@ def test_array_interface(self):
bar = Variable(["x", "y"], np.zeros((10, 20)))
assert_equal(self.dv, np.maximum(self.dv, bar))

def test_astype_attrs(self):
for v in [self.va.copy(), self.mda.copy(), self.ds.copy()]:
v.attrs["foo"] = "bar"
assert v.attrs == v.astype(float).attrs
assert not v.astype(float, keep_attrs=False).attrs

def test_astype_dtype(self):
original = DataArray([-1, 1, 2, 3, 1000])
converted = original.astype(float)
assert_array_equal(original, converted)
assert np.issubdtype(original.dtype, np.integer)
assert np.issubdtype(converted.dtype, np.floating)

def test_is_null(self):
x = np.random.RandomState(42).randn(5, 6)
x[x < 0] = np.nan
Expand Down
9 changes: 9 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5607,6 +5607,15 @@ def test_pad(self):
np.testing.assert_equal(padded["var1"].isel(dim2=[0, -1]).data, 42)
np.testing.assert_equal(padded["dim2"][[0, -1]].data, np.nan)

def test_astype_attrs(self):
data = create_test_data(seed=123)
data.attrs["foo"] = "bar"

assert data.attrs == data.astype(float).attrs
assert data.var1.attrs == data.astype(float).var1.attrs
assert not data.astype(float, keep_attrs=False).attrs
assert not data.astype(float, keep_attrs=False).var1.attrs


# Py.test tests

Expand Down