Skip to content
4 changes: 3 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ v0.16.1 (unreleased)

Breaking changes
~~~~~~~~~~~~~~~~

- :py:meth:`DataArray.astype` and :py:meth:`Dataset.astype` now preserve attributes. Keep the
old behavior by passing `keep_attrs=False` (:issue:`2049`, :pull:`4314`).
By `Dan Nowacki <https://github.com/dnowacki-usgs>`_ and `Gabriel Joel Mitchell <https://github.com/gajomi>`_.

New Features
~~~~~~~~~~~~
Expand Down
42 changes: 42 additions & 0 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1305,6 +1305,48 @@ def isin(self, test_elements):
dask="allowed",
)

def astype(self, dtype, casting="unsafe", copy=True, keep_attrs=True):
"""
Copy of the xarray object, with data cast to a specified type.
Leaves coordinate dtype unchanged.

Parameters
----------
dtype : str or dtype
Typecode or data-type to which the array is cast.
casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
Controls what kind of data casting may occur. Defaults to 'unsafe'
for backwards compatibility.

* 'no' means the data types should not be cast at all.
* 'equiv' means only byte-order changes are allowed.
* 'safe' means only casts which can preserve values are allowed.
* 'same_kind' means only safe casts or casts within a kind,
like float64 to float32, are allowed.
* 'unsafe' means any data conversions may be done.
copy : bool, optional
By default, astype always returns a newly allocated array. If this
is set to False and the `dtype` requirement is satisfied, the input
array is returned instead of a copy.
keep_attrs : bool, optional
By default, astype keeps attributes. Set to False to remove
attributes in the returned object.

See also
--------
np.ndarray.astype
dask.array.Array.astype
"""
from .computation import apply_ufunc

return apply_ufunc(
duck_array_ops.astype,
self,
kwargs=dict(dtype=dtype, casting=casting, copy=copy),
keep_attrs=keep_attrs,
dask="allowed",
)

def __enter__(self: T) -> T:
return self

Expand Down
27 changes: 27 additions & 0 deletions xarray/core/duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,33 @@ def trapz(y, x, axis):
)


def astype(data, **kwargs):
try:
return data.astype(**kwargs)
except TypeError as e:
Copy link
Collaborator

@keewis keewis Aug 19, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would it be better to do something like

        try:
            import sparse
        except ImportError:
            sparse = None

        if (
            sparse is not None
            and isinstance(data, sparse.C00)
            and LooseVersion(sparse.__version__) < LooseVersion("0.10.0")
            and "casting" in kwargs
        ):
            # warn
            kwargs.pop("casting")
        return data.astype(**kwargs)

?

that way, we wouldn't call data.astype(**kwargs) twice

# FIXME: This should no longer be necessary in future versions of sparse
# Current versions of sparse (v0.10.0) don't support the "casting" kwarg
# This was fixed by https://github.com/pydata/sparse/pull/392
try:
import sparse
except ImportError:
sparse = None
if (
"got an unexpected keyword argument 'casting'" in repr(e)
and sparse is not None
and isinstance(data, sparse._coo.core.COO)
):
warnings.warn(
"The current version of sparse does not support the 'casting' argument. It will be ignored in the call to astype().",
RuntimeWarning,
stacklevel=4,
)
kwargs.pop("casting")
else:
raise e
return data.astype(**kwargs)


def asarray(data, xp=np):
return (
data
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
NUMPY_SAME_METHODS = ["item", "searchsorted"]
# methods which don't modify the data shape, so the result should still be
# wrapped in an Variable/DataArray
NUMPY_UNARY_METHODS = ["astype", "argsort", "clip", "conj", "conjugate"]
NUMPY_UNARY_METHODS = ["argsort", "clip", "conj", "conjugate"]
PANDAS_UNARY_FUNCTIONS = ["isnull", "notnull"]
# methods which remove an axis
REDUCE_METHODS = ["all", "any"]
Expand Down
41 changes: 41 additions & 0 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,47 @@ def data(self, data):
)
self._data = data

def astype(self, dtype, casting="unsafe", copy=True, keep_attrs=True):
"""
Copy of the Variable object, with data cast to a specified type.

Parameters
----------
dtype : str or dtype
Typecode or data-type to which the array is cast.
casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
Controls what kind of data casting may occur. Defaults to 'unsafe'
for backwards compatibility.

* 'no' means the data types should not be cast at all.
* 'equiv' means only byte-order changes are allowed.
* 'safe' means only casts which can preserve values are allowed.
* 'same_kind' means only safe casts or casts within a kind,
like float64 to float32, are allowed.
* 'unsafe' means any data conversions may be done.
copy : bool, optional
By default, astype always returns a newly allocated array. If this
is set to False and the `dtype` requirement is satisfied, the input
array is returned instead of a copy.
keep_attrs : bool, optional
By default, astype keeps attributes. Set to False to remove
attributes in the returned object.

See also
--------
np.ndarray.astype
dask.array.Array.astype
"""
from .computation import apply_ufunc

return apply_ufunc(
duck_array_ops.astype,
self,
kwargs=dict(dtype=dtype, casting=casting, copy=copy),
keep_attrs=keep_attrs,
dask="allowed",
)

def load(self, **kwargs):
"""Manually trigger loading of this variable's data from disk or a
remote source into memory and return this variable.
Expand Down
13 changes: 13 additions & 0 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1874,6 +1874,19 @@ def test_array_interface(self):
bar = Variable(["x", "y"], np.zeros((10, 20)))
assert_equal(self.dv, np.maximum(self.dv, bar))

def test_astype_attrs(self):
for v in [self.va.copy(), self.mda.copy(), self.ds.copy()]:
v.attrs["foo"] = "bar"
assert list(v.attrs.items()) == list(v.astype(float).attrs.items())
assert [] == list(v.astype(float, keep_attrs=False).attrs.items())

def test_astype_dtype(self):
original = DataArray([-1, 1, 2, 3, 1000])
converted = original.astype(float)
assert_array_equal(original, converted)
assert np.issubdtype(original.dtype, np.integer)
assert np.issubdtype(converted.dtype, np.floating)

def test_is_null(self):
x = np.random.RandomState(42).randn(5, 6)
x[x < 0] = np.nan
Expand Down
11 changes: 11 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5607,6 +5607,17 @@ def test_pad(self):
np.testing.assert_equal(padded["var1"].isel(dim2=[0, -1]).data, 42)
np.testing.assert_equal(padded["dim2"][[0, -1]].data, np.nan)

def test_astype_attrs(self):
data = create_test_data(seed=123)
data.attrs["foo"] = "bar"

assert list(data.attrs.items()) == list(data.astype(float).attrs.items())
assert list(data.var1.attrs.items()) == list(
data.astype(float).var1.attrs.items()
)
assert [] == list(data.astype(float, keep_attrs=False).attrs.items())
assert [] == list(data.astype(float, keep_attrs=False).var1.attrs.items())


# Py.test tests

Expand Down