Skip to content

Commit e5a9b60

Browse files
committed
Squashed commit of the following:
commit 398f1b6 Author: dcherian <[email protected]> Date: Fri May 20 08:47:56 2022 -0600 Backward compatibility dask commit bde40e4 Merge: 0783df3 4cae8d0 Author: dcherian <[email protected]> Date: Fri May 20 07:54:48 2022 -0600 Merge branch 'main' into dask-datetime-to-numeric * main: concatenate docs style (pydata#6621) Typing for open_dataset/array/mfdataset and to_netcdf/zarr (pydata#6612) {full,zeros,ones}_like typing (pydata#6611) commit 0783df3 Merge: 5cff4f1 8de7061 Author: dcherian <[email protected]> Date: Sun May 15 21:03:50 2022 -0600 Merge branch 'main' into dask-datetime-to-numeric * main: (24 commits) Fix overflow issue in decode_cf_datetime for dtypes <= np.uint32 (pydata#6598) Enable flox in GroupBy and resample (pydata#5734) Add setuptools as dependency in ASV benchmark CI (pydata#6609) change polyval dim ordering (pydata#6601) re-add timedelta support for polyval (pydata#6599) Minor Dataset.map docstr clarification (pydata#6595) New inline_array kwarg for open_dataset (pydata#6566) Fix polyval overloads (pydata#6593) Restore old MultiIndex dropping behaviour (pydata#6592) [docs] add Dataset.assign_coords example (pydata#6336) (pydata#6558) Fix zarr append dtype checks (pydata#6476) Add missing space in exception message (pydata#6590) Doc Link to accessors list in extending-xarray.rst (pydata#6587) Fix Dataset/DataArray.isel with drop=True and scalar DataArray indexes (pydata#6579) Add some warnings about rechunking to the docs (pydata#6569) [pre-commit.ci] pre-commit autoupdate (pydata#6584) terminology.rst: fix link to Unidata's "netcdf_dataset_components" (pydata#6583) Allow string formatting of scalar DataArrays (pydata#5981) Fix mypy issues & reenable in tests (pydata#6581) polyval: Use Horner's algorithm + support chunked inputs (pydata#6548) ... commit 5cff4f1 Merge: dfe200d 6144c61 Author: Maximilian Roos <[email protected]> Date: Sun May 1 15:16:33 2022 -0700 Merge branch 'main' into dask-datetime-to-numeric commit dfe200d Author: dcherian <[email protected]> Date: Sun May 1 11:04:03 2022 -0600 Minor cleanup commit 35ed378 Author: dcherian <[email protected]> Date: Sun May 1 10:57:36 2022 -0600 Support dask arrays in datetime_to_numeric
1 parent ff5ad1e commit e5a9b60

File tree

2 files changed

+58
-13
lines changed

2 files changed

+58
-13
lines changed

xarray/core/duck_array_ops.py

+19-3
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,14 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
431431
# Compute timedelta object.
432432
# For np.datetime64, this can silently yield garbage due to overflow.
433433
# One option is to enforce 1970-01-01 as the universal offset.
434-
array = array - offset
434+
435+
# This map_blocks call is for backwards compatibility.
436+
# dask == 2021.04.1 does not support subtracting object arrays
437+
# which is required for cftime
438+
if is_duck_dask_array(array):
439+
array = array.map_blocks(lambda a, b: a - b, offset)
440+
else:
441+
array = array - offset
435442

436443
# Scalar is converted to 0d-array
437444
if not hasattr(array, "dtype"):
@@ -517,10 +524,19 @@ def pd_timedelta_to_float(value, datetime_unit):
517524
return np_timedelta64_to_float(value, datetime_unit)
518525

519526

527+
def _timedelta_to_seconds(array):
528+
return np.reshape([a.total_seconds() for a in array.ravel()], array.shape) * 1e6
529+
530+
520531
def py_timedelta_to_float(array, datetime_unit):
521532
"""Convert a timedelta object to a float, possibly at a loss of resolution."""
522-
array = np.asarray(array)
523-
array = np.reshape([a.total_seconds() for a in array.ravel()], array.shape) * 1e6
533+
array = asarray(array)
534+
if is_duck_dask_array(array):
535+
array = array.map_blocks(
536+
_timedelta_to_seconds, meta=np.array([], dtype=np.float64)
537+
)
538+
else:
539+
array = _timedelta_to_seconds(array)
524540
conversion_factor = np.timedelta64(1, "us") / np.timedelta64(1, datetime_unit)
525541
return conversion_factor * array
526542

xarray/tests/test_duck_array_ops.py

+39-10
Original file line numberDiff line numberDiff line change
@@ -675,39 +675,68 @@ def test_multiple_dims(dtype, dask, skipna, func):
675675
assert_allclose(actual, expected)
676676

677677

678-
def test_datetime_to_numeric_datetime64():
678+
@pytest.mark.parametrize("dask", [True, False])
679+
def test_datetime_to_numeric_datetime64(dask):
680+
if dask and not has_dask:
681+
pytest.skip("requires dask")
682+
679683
times = pd.date_range("2000", periods=5, freq="7D").values
680-
result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h")
684+
if dask:
685+
import dask.array
686+
687+
times = dask.array.from_array(times, chunks=-1)
688+
689+
with raise_if_dask_computes():
690+
result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h")
681691
expected = 24 * np.arange(0, 35, 7)
682692
np.testing.assert_array_equal(result, expected)
683693

684694
offset = times[1]
685-
result = duck_array_ops.datetime_to_numeric(times, offset=offset, datetime_unit="h")
695+
with raise_if_dask_computes():
696+
result = duck_array_ops.datetime_to_numeric(
697+
times, offset=offset, datetime_unit="h"
698+
)
686699
expected = 24 * np.arange(-7, 28, 7)
687700
np.testing.assert_array_equal(result, expected)
688701

689702
dtype = np.float32
690-
result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=dtype)
703+
with raise_if_dask_computes():
704+
result = duck_array_ops.datetime_to_numeric(
705+
times, datetime_unit="h", dtype=dtype
706+
)
691707
expected = 24 * np.arange(0, 35, 7).astype(dtype)
692708
np.testing.assert_array_equal(result, expected)
693709

694710

695711
@requires_cftime
696-
def test_datetime_to_numeric_cftime():
712+
@pytest.mark.parametrize("dask", [True, False])
713+
def test_datetime_to_numeric_cftime(dask):
714+
if dask and not has_dask:
715+
pytest.skip("requires dask")
716+
697717
times = cftime_range("2000", periods=5, freq="7D", calendar="standard").values
698-
result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=int)
718+
if dask:
719+
import dask.array
720+
721+
times = dask.array.from_array(times, chunks=-1)
722+
with raise_if_dask_computes():
723+
result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=int)
699724
expected = 24 * np.arange(0, 35, 7)
700725
np.testing.assert_array_equal(result, expected)
701726

702727
offset = times[1]
703-
result = duck_array_ops.datetime_to_numeric(
704-
times, offset=offset, datetime_unit="h", dtype=int
705-
)
728+
with raise_if_dask_computes():
729+
result = duck_array_ops.datetime_to_numeric(
730+
times, offset=offset, datetime_unit="h", dtype=int
731+
)
706732
expected = 24 * np.arange(-7, 28, 7)
707733
np.testing.assert_array_equal(result, expected)
708734

709735
dtype = np.float32
710-
result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=dtype)
736+
with raise_if_dask_computes():
737+
result = duck_array_ops.datetime_to_numeric(
738+
times, datetime_unit="h", dtype=dtype
739+
)
711740
expected = 24 * np.arange(0, 35, 7).astype(dtype)
712741
np.testing.assert_array_equal(result, expected)
713742

0 commit comments

Comments
 (0)