Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,7 @@
"matplotlib": ("https://matplotlib.org", None),
"dask": ("https://docs.dask.org/en/latest", None),
"cftime": ("https://unidata.github.io/cftime", None),
"sparse": ("https://sparse.pydata.org/en/latest/", None),
}


Expand Down
8 changes: 6 additions & 2 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ Bug fixes
~~~~~~~~~

- :py:func:`merge` with ``combine_attrs='override'`` makes a copy of the attrs (:issue:`4627`).
- :py:meth:`DataArray.astype`, :py:meth:`Dataset.astype` and :py:meth:`Variable.astype` support
the ``order`` and ``subok`` parameters again. This fixes a regression introduced in version 0.16.1
(:issue:`4644`, :pull:`4683`).
By `Richard Kleijn <https://github.com/rhkleijn>`_ .
- Remove dictionary unpacking when using ``.loc`` to avoid collision with ``.sel`` parameters (:pull:`4695`).
By `Anderson Banihirwe <https://github.com/andersy005>`_

Expand Down Expand Up @@ -170,8 +174,8 @@ Internal Changes
- Replace the internal use of ``pd.Index.__or__`` and ``pd.Index.__and__`` with ``pd.Index.union``
and ``pd.Index.intersection`` as they will stop working as set operations in the future
(:issue:`4565`). By `Mathias Hauser <https://github.com/mathause>`_.
- Add GitHub action for running nightly tests against upstream dependencies (:pull:`4583`).
By `Anderson Banihirwe <https://github.com/andersy005>`_.
- Add GitHub action for running nightly tests against upstream dependencies (:pull:`4583`).
By `Anderson Banihirwe <https://github.com/andersy005>`_.
- Ensure all figures are closed properly in plot tests (:pull:`4600`).
By `Yash Saboo <https://github.com/yashsaboo>`_, `Nirupam K N
<https://github.com/Nirupamkn>`_ and `Mathias Hauser
Expand Down
42 changes: 36 additions & 6 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1391,7 +1391,16 @@ def isin(self, test_elements):
dask="allowed",
)

def astype(self, dtype, casting="unsafe", copy=True, keep_attrs=True):
def astype(
self: T,
dtype,
*,
order=None,
casting=None,
subok=None,
copy=None,
keep_attrs=True,
) -> T:
"""
Copy of the xarray object, with data cast to a specified type.
Leaves coordinate dtype unchanged.
Expand All @@ -1400,16 +1409,24 @@ def astype(self, dtype, casting="unsafe", copy=True, keep_attrs=True):
----------
dtype : str or dtype
Typecode or data-type to which the array is cast.
order : {'C', 'F', 'A', 'K'}, optional
Controls the memory layout order of the result. ‘C’ means C order,
‘F’ means Fortran order, ‘A’ means ‘F’ order if all the arrays are
Fortran contiguous, ‘C’ order otherwise, and ‘K’ means as close to
the order the array elements appear in memory as possible.
casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
Controls what kind of data casting may occur. Defaults to 'unsafe'
for backwards compatibility.
Controls what kind of data casting may occur.

* 'no' means the data types should not be cast at all.
* 'equiv' means only byte-order changes are allowed.
* 'safe' means only casts which can preserve values are allowed.
* 'same_kind' means only safe casts or casts within a kind,
like float64 to float32, are allowed.
like float64 to float32, are allowed.
* 'unsafe' means any data conversions may be done.

subok : bool, optional
If True, then sub-classes will be passed-through, otherwise the
returned array will be forced to be a base-class array.
copy : bool, optional
By default, astype always returns a newly allocated array. If this
is set to False and the `dtype` requirement is satisfied, the input
Expand All @@ -1423,17 +1440,30 @@ def astype(self, dtype, casting="unsafe", copy=True, keep_attrs=True):
out : same as object
New object with data cast to the specified type.

Notes
-----
The ``order``, ``casting``, ``subok`` and ``copy`` arguments are only passed
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pydata/xarray are we ok with this as a general policy for functions that are light wrappers of the underlying array-type's method.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm true we don't really do this in other places AFAIK. However, it is similar to passing **kwargs on.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like a good solution to me!

through to the ``astype`` method of the underlying array when a value
different than ``None`` is supplied.
Make sure to only supply these arguments if the underlying array class
supports them.

See also
--------
np.ndarray.astype
numpy.ndarray.astype
dask.array.Array.astype
sparse.COO.astype
"""
from .computation import apply_ufunc

kwargs = dict(order=order, casting=casting, subok=subok, copy=copy)
kwargs = {k: v for k, v in kwargs.items() if v is not None}

return apply_ufunc(
duck_array_ops.astype,
self,
kwargs=dict(dtype=dtype, casting=casting, copy=copy),
dtype,
kwargs=kwargs,
keep_attrs=keep_attrs,
dask="allowed",
)
Expand Down
4 changes: 2 additions & 2 deletions xarray/core/duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def trapz(y, x, axis):
)


def astype(data, **kwargs):
def astype(data, dtype, **kwargs):
try:
import sparse
except ImportError:
Expand All @@ -177,7 +177,7 @@ def astype(data, **kwargs):
)
kwargs.pop("casting")

return data.astype(**kwargs)
return data.astype(dtype, **kwargs)


def asarray(data, xp=np):
Expand Down
62 changes: 46 additions & 16 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,28 +370,45 @@ def data(self, data):
)
self._data = data

def astype(self, dtype, casting="unsafe", copy=True, keep_attrs=True):
def astype(
self: VariableType,
dtype,
*,
order=None,
casting=None,
subok=None,
copy=None,
keep_attrs=True,
) -> VariableType:
"""
Copy of the Variable object, with data cast to a specified type.

Parameters
----------
dtype : str or dtype
Typecode or data-type to which the array is cast.
Typecode or data-type to which the array is cast.
order : {'C', 'F', 'A', 'K'}, optional
Controls the memory layout order of the result. ‘C’ means C order,
‘F’ means Fortran order, ‘A’ means ‘F’ order if all the arrays are
Fortran contiguous, ‘C’ order otherwise, and ‘K’ means as close to
the order the array elements appear in memory as possible.
casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
Controls what kind of data casting may occur. Defaults to 'unsafe'
for backwards compatibility.

* 'no' means the data types should not be cast at all.
* 'equiv' means only byte-order changes are allowed.
* 'safe' means only casts which can preserve values are allowed.
* 'same_kind' means only safe casts or casts within a kind,
like float64 to float32, are allowed.
* 'unsafe' means any data conversions may be done.
Controls what kind of data casting may occur.

* 'no' means the data types should not be cast at all.
* 'equiv' means only byte-order changes are allowed.
* 'safe' means only casts which can preserve values are allowed.
* 'same_kind' means only safe casts or casts within a kind,
like float64 to float32, are allowed.
* 'unsafe' means any data conversions may be done.

subok : bool, optional
If True, then sub-classes will be passed-through, otherwise the
returned array will be forced to be a base-class array.
copy : bool, optional
By default, astype always returns a newly allocated array. If this
is set to False and the `dtype` requirement is satisfied, the input
array is returned instead of a copy.
By default, astype always returns a newly allocated array. If this
is set to False and the `dtype` requirement is satisfied, the input
array is returned instead of a copy.
keep_attrs : bool, optional
By default, astype keeps attributes. Set to False to remove
attributes in the returned object.
Expand All @@ -401,17 +418,30 @@ def astype(self, dtype, casting="unsafe", copy=True, keep_attrs=True):
out : same as object
New object with data cast to the specified type.

Notes
-----
The ``order``, ``casting``, ``subok`` and ``copy`` arguments are only passed
through to the ``astype`` method of the underlying array when a value
different than ``None`` is supplied.
Make sure to only supply these arguments if the underlying array class
supports them.

See also
--------
np.ndarray.astype
numpy.ndarray.astype
dask.array.Array.astype
sparse.COO.astype
"""
from .computation import apply_ufunc

kwargs = dict(order=order, casting=casting, subok=subok, copy=copy)
kwargs = {k: v for k, v in kwargs.items() if v is not None}

return apply_ufunc(
duck_array_ops.astype,
self,
kwargs=dict(dtype=dtype, casting=casting, copy=copy),
dtype,
kwargs=kwargs,
keep_attrs=keep_attrs,
dask="allowed",
)
Expand Down
20 changes: 20 additions & 0 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1918,6 +1918,26 @@ def test_astype_dtype(self):
assert np.issubdtype(original.dtype, np.integer)
assert np.issubdtype(converted.dtype, np.floating)

def test_astype_order(self):
original = DataArray([[1, 2], [3, 4]])
converted = original.astype("d", order="F")
assert_equal(original, converted)
assert original.values.flags["C_CONTIGUOUS"]
assert converted.values.flags["F_CONTIGUOUS"]

def test_astype_subok(self):
class NdArraySubclass(np.ndarray):
pass

original = DataArray(NdArraySubclass(np.arange(3)))
converted_not_subok = original.astype("d", subok=False)
converted_subok = original.astype("d", subok=True)
if not isinstance(original.data, NdArraySubclass):
pytest.xfail("DataArray cannot be backed yet by a subclasses of np.ndarray")
assert isinstance(converted_not_subok.data, np.ndarray)
assert not isinstance(converted_not_subok.data, NdArraySubclass)
assert isinstance(converted_subok.data, NdArraySubclass)

def test_is_null(self):
x = np.random.RandomState(42).randn(5, 6)
x[x < 0] = np.nan
Expand Down