From e26fa134e644ed37cd9cdfebb94cf4412c1ff8ee Mon Sep 17 00:00:00 2001 From: Guillaume Chanel Date: Mon, 23 Aug 2021 08:46:00 +0200 Subject: [PATCH 01/12] Fix bug on duck access without dask (#3391, #5715) Calls to dask functions are replaced by calls to the pycompat functions --- xarray/core/parallel.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index 20ec3608ebb..9a057920768 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -21,6 +21,7 @@ from .alignment import align from .dataarray import DataArray from .dataset import Dataset +from .pycompat import is_duck_dask_array try: import dask @@ -325,13 +326,13 @@ def _wrapper( raise TypeError("kwargs must be a mapping (for example, a dict)") for value in kwargs.values(): - if dask.is_dask_collection(value): + if is_duck_dask_array(value): raise TypeError( "Cannot pass dask collections in kwargs yet. Please compute or " "load values before passing to map_blocks." ) - if not dask.is_dask_collection(obj): + if not is_duck_dask_array(obj): return func(obj, *args, **kwargs) all_args = [obj] + list(args) From 908943833c845d6a9ee977ae73f2414846337e34 Mon Sep 17 00:00:00 2001 From: Guillaume Chanel Date: Mon, 23 Aug 2021 10:45:45 +0200 Subject: [PATCH 02/12] Test if lazy corr is equal to corr --- xarray/tests/test_computation.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 2439ea30b4b..b2d1d270e42 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -1554,6 +1554,22 @@ def test_covcorr_consistency(da_a, da_b, dim): assert_allclose(actual, expected) +@requires_dask +@pytest.mark.parametrize( + "da_a, da_b", + arrays_w_tuples()[1], +) +@pytest.mark.parametrize("dim", [None, "time", "x"]) +def test_cov_lazycov_consistency(da_a, da_b, dim): + da_al = da_a.chunk() + da_bl = da_b.chunk() + c_abl = xr.corr(da_al, da_bl) + c_ab = xr.corr(da_a, da_b) + c_ab_mixed = xr.corr(da_a, da_bl) + assert_allclose(c_ab, c_abl) + assert_allclose(c_ab, c_ab_mixed) + + @pytest.mark.parametrize( "da_a", arrays_w_tuples()[0], From ee7a65bb0443598bab2599e5b83c23db25a83c13 Mon Sep 17 00:00:00 2001 From: Guillaume Chanel Date: Mon, 23 Aug 2021 17:33:16 +0200 Subject: [PATCH 03/12] Create is_dask_collection function The fonction is used to test for lazy datasets in map_blocks (parallel.py) --- xarray/core/parallel.py | 6 +++--- xarray/core/pycompat.py | 8 ++++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index 9a057920768..3cd130c74c3 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -21,7 +21,7 @@ from .alignment import align from .dataarray import DataArray from .dataset import Dataset -from .pycompat import is_duck_dask_array +from .pycompat import is_dask_collection try: import dask @@ -326,13 +326,13 @@ def _wrapper( raise TypeError("kwargs must be a mapping (for example, a dict)") for value in kwargs.values(): - if is_duck_dask_array(value): + if is_dask_collection(value): raise TypeError( "Cannot pass dask collections in kwargs yet. Please compute or " "load values before passing to map_blocks." ) - if not is_duck_dask_array(obj): + if not is_dask_collection(obj): return func(obj, *args, **kwargs) all_args = [obj] + list(args) diff --git a/xarray/core/pycompat.py b/xarray/core/pycompat.py index d1649235006..d95dced9ddf 100644 --- a/xarray/core/pycompat.py +++ b/xarray/core/pycompat.py @@ -43,15 +43,19 @@ def __init__(self, mod): self.available = duck_array_module is not None -def is_duck_dask_array(x): +def is_dask_collection(x): if DuckArrayModule("dask").available: from dask.base import is_dask_collection - return is_duck_array(x) and is_dask_collection(x) + return is_dask_collection(x) else: return False +def is_duck_dask_array(x): + return is_duck_array(x) and is_dask_collection(x) + + dsk = DuckArrayModule("dask") dask_version = dsk.version dask_array_type = dsk.type From a96154a85e330ce5aec6c34e49997833c1ae0ba2 Mon Sep 17 00:00:00 2001 From: Guillaume Chanel Date: Tue, 24 Aug 2021 09:14:45 +0200 Subject: [PATCH 04/12] Correct test function name --- xarray/tests/test_computation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index b2d1d270e42..5d2210abefa 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -1560,7 +1560,7 @@ def test_covcorr_consistency(da_a, da_b, dim): arrays_w_tuples()[1], ) @pytest.mark.parametrize("dim", [None, "time", "x"]) -def test_cov_lazycov_consistency(da_a, da_b, dim): +def test_corr_lazycorr_consistency(da_a, da_b, dim): da_al = da_a.chunk() da_bl = da_b.chunk() c_abl = xr.corr(da_al, da_bl) From bd30f076a33336765c3e4e13f1f4d5ad1f2963ce Mon Sep 17 00:00:00 2001 From: Guillaume Chanel Date: Mon, 30 Aug 2021 18:36:07 +0200 Subject: [PATCH 05/12] Remove dask importskip The importskip was canceling all tests if dask is not present. Some tests are relevant to be done without dask. @require_dask was added to one test since it import dask directly --- xarray/tests/test_computation.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 5d2210abefa..ac77ffe0ab6 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -24,8 +24,6 @@ from . import has_dask, raise_if_dask_computes, requires_dask -dask = pytest.importorskip("dask") - def assert_identical(a, b): """A version of this function which accepts numpy arrays""" @@ -1420,6 +1418,7 @@ def arrays_w_tuples(): ], ) @pytest.mark.parametrize("dim", [None, "x", "time"]) +@requires_dask def test_lazy_corrcov(da_a, da_b, dim, ddof): # GH 5284 from dask import is_dask_collection From dbf5eec86ab36a2e4c195b62caa536aaaac46129 Mon Sep 17 00:00:00 2001 From: Guillaume Chanel Date: Tue, 31 Aug 2021 11:49:47 +0200 Subject: [PATCH 06/12] Remove tests for arrays 5 and 6 The tests do not pass for arrays 5 and 6. More work is needed to indentify why --- xarray/tests/test_computation.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index ac77ffe0ab6..6033cfb53af 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -1556,7 +1556,11 @@ def test_covcorr_consistency(da_a, da_b, dim): @requires_dask @pytest.mark.parametrize( "da_a, da_b", - arrays_w_tuples()[1], + [ + arrays_w_tuples()[1][i] + for i in range(len(arrays_w_tuples()[1])) + if i not in [5, 6] + ], # TODO: arrays 5 and 6 make errors, why ? ) @pytest.mark.parametrize("dim", [None, "time", "x"]) def test_corr_lazycorr_consistency(da_a, da_b, dim): From b9dfbe66ab12eeab40dc91d396986591393a3fdf Mon Sep 17 00:00:00 2001 From: Guillaume Chanel Date: Tue, 31 Aug 2021 12:08:56 +0200 Subject: [PATCH 07/12] Add changes to what-new.rst --- doc/whats-new.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4f79a37eb4b..7f4b5bb7f9d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -41,6 +41,7 @@ Deprecations Bug fixes ~~~~~~~~~ +- Fix bug on duck access without dask which was impacting correlation computation Documentation ~~~~~~~~~~~~~ From 39cffd1a791c2c1bf90b7ee0b30bc79ef51837c4 Mon Sep 17 00:00:00 2001 From: Guillaume Chanel Date: Tue, 31 Aug 2021 17:16:44 +0200 Subject: [PATCH 08/12] Return None --- xarray/tests/test_computation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index d68af52f728..9322cbc57b0 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -1563,7 +1563,7 @@ def test_covcorr_consistency(da_a, da_b, dim) -> None: ], # TODO: arrays 5 and 6 make errors, why ? ) @pytest.mark.parametrize("dim", [None, "time", "x"]) -def test_corr_lazycorr_consistency(da_a, da_b, dim): +def test_corr_lazycorr_consistency(da_a, da_b, dim) -> None: da_al = da_a.chunk() da_bl = da_b.chunk() c_abl = xr.corr(da_al, da_bl) From 3dde39b1f1f6c406740555c7a55ab1803ef52aed Mon Sep 17 00:00:00 2001 From: Guillaume Chanel Date: Tue, 31 Aug 2021 17:19:50 +0200 Subject: [PATCH 09/12] fix - add dimension to tests --- xarray/tests/test_computation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 9322cbc57b0..7378882932b 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -1566,9 +1566,9 @@ def test_covcorr_consistency(da_a, da_b, dim) -> None: def test_corr_lazycorr_consistency(da_a, da_b, dim) -> None: da_al = da_a.chunk() da_bl = da_b.chunk() - c_abl = xr.corr(da_al, da_bl) - c_ab = xr.corr(da_a, da_b) - c_ab_mixed = xr.corr(da_a, da_bl) + c_abl = xr.corr(da_al, da_bl, dim=dim) + c_ab = xr.corr(da_a, da_b, dim=dim) + c_ab_mixed = xr.corr(da_a, da_bl, dim=dim) assert_allclose(c_ab, c_abl) assert_allclose(c_ab, c_ab_mixed) From 2d86f2c2190f487d16a5d62c21be89be10e005c4 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 23 Nov 2021 20:07:43 -0700 Subject: [PATCH 10/12] update whats-new --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d25821fa24d..62c72ecd1c3 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -34,6 +34,7 @@ Deprecations Bug fixes ~~~~~~~~~ +- Fix bug on duck access without dask which was impacting correlation computation. By `Gijom `_. - Fix plot.line crash for data of shape ``(1, N)`` in _title_for_slice on format_item (:pull:`5948`). By `Sebastian Weigand `_. - Fix a regression in the removal of duplicate backend entrypoints (:issue:`5944`, :pull:`5959`) @@ -158,7 +159,6 @@ Deprecations Bug fixes ~~~~~~~~~ -- Fix bug on duck access without dask which was impacting correlation computation - Fix ZeroDivisionError from saving dask array with empty dimension (:issue: `5741`). By `Joseph K Aicher `_. - Fixed performance bug where ``cftime`` import attempted within various core operations if ``cftime`` not From 32545a394734b2dd0cc82df7941cf84969198780 Mon Sep 17 00:00:00 2001 From: dcherian Date: Tue, 23 Nov 2021 20:16:15 -0700 Subject: [PATCH 11/12] bugfix --- doc/whats-new.rst | 3 ++- xarray/core/computation.py | 3 ++- xarray/tests/test_computation.py | 9 +++++++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 62c72ecd1c3..9479258f81c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -34,7 +34,8 @@ Deprecations Bug fixes ~~~~~~~~~ -- Fix bug on duck access without dask which was impacting correlation computation. By `Gijom `_. +- :py:func:`xr.corr` works when dask is not installed. + By `Gijom `_. - Fix plot.line crash for data of shape ``(1, N)`` in _title_for_slice on format_item (:pull:`5948`). By `Sebastian Weigand `_. - Fix a regression in the removal of duplicate backend entrypoints (:issue:`5944`, :pull:`5959`) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 0c21ca07744..04fda5a7cb3 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1359,7 +1359,8 @@ def _get_valid_values(da, other): da = da.where(~missing_vals) return da else: - return da + # ensure consistent return dtype + return da.astype(float) da_a = da_a.map_blocks(_get_valid_values, args=[da_b]) da_b = da_b.map_blocks(_get_valid_values, args=[da_a]) diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 7378882932b..e4e67f756d8 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -1573,6 +1573,15 @@ def test_corr_lazycorr_consistency(da_a, da_b, dim) -> None: assert_allclose(c_ab, c_ab_mixed) +@requires_dask +def test_corr_dtype_error(): + da_a = xr.DataArray([[1, 2], [2, 1]], dims=["x", "time"]) + da_b = xr.DataArray([[1, 2], [1, np.nan]], dims=["x", "time"]) + + xr.testing.assert_equal(xr.corr(da_a, da_b), xr.corr(da_a.chunk(), da_b.chunk())) + xr.testing.assert_equal(xr.corr(da_a, da_b), xr.corr(da_a, da_b.chunk())) + + @pytest.mark.parametrize( "da_a", arrays_w_tuples()[0], From 52d82c92455b20887d9facb8188ee21e377313b8 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 24 Nov 2021 09:07:07 -0700 Subject: [PATCH 12/12] Apply suggestions from code review Co-authored-by: Mathias Hauser --- doc/whats-new.rst | 2 +- xarray/tests/test_computation.py | 9 +-------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9479258f81c..eb61cd154cf 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -34,7 +34,7 @@ Deprecations Bug fixes ~~~~~~~~~ -- :py:func:`xr.corr` works when dask is not installed. +- :py:func:`xr.map_blocks` and :py:func:`xr.corr` now work when dask is not installed (:issue:`3391`, :issue:`5715`, :pull:`5731`). By `Gijom `_. - Fix plot.line crash for data of shape ``(1, N)`` in _title_for_slice on format_item (:pull:`5948`). By `Sebastian Weigand `_. diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index e4e67f756d8..8af7604cae5 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -1554,14 +1554,7 @@ def test_covcorr_consistency(da_a, da_b, dim) -> None: @requires_dask -@pytest.mark.parametrize( - "da_a, da_b", - [ - arrays_w_tuples()[1][i] - for i in range(len(arrays_w_tuples()[1])) - if i not in [5, 6] - ], # TODO: arrays 5 and 6 make errors, why ? -) +@pytest.mark.parametrize("da_a, da_b", arrays_w_tuples()[1]) @pytest.mark.parametrize("dim", [None, "time", "x"]) def test_corr_lazycorr_consistency(da_a, da_b, dim) -> None: da_al = da_a.chunk()