From 2c18fe87c1d8d8297f0f2b958257a4d1ac4c955a Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 29 Nov 2021 15:46:14 -0500 Subject: [PATCH 01/13] Skipna in interp_on_quantiles --- xclim/sdba/utils.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/xclim/sdba/utils.py b/xclim/sdba/utils.py index 2b92739c5..3d2eeedb7 100644 --- a/xclim/sdba/utils.py +++ b/xclim/sdba/utils.py @@ -435,17 +435,20 @@ def _interp_quantiles_1D(newx, oldx, oldy): ) return newx - mask = np.isnan(oldx) & np.isnan(oldy) + mask_old = np.isnan(oldy) # The bounds might be NaN for a good reason. - mask[np.array([0, -1])] = False - - return interp1d( - oldx[~mask], - oldy[~mask], + mask_old[np.array([0, -1])] = False + mask_old = mask_old | np.isnan(oldx) + + mask_new = np.isnan(newx) + out = np.full_like(newx, np.NaN) + out[~mask_new] = interp1d( + oldx[~mask_old], + oldy[~mask_old], bounds_error=False, kind=method, fill_value=fill_value, - )(newx) + )(newx[~mask_new]) if "group" in xq.dims: xq = xq.squeeze("group", drop=True) @@ -474,13 +477,16 @@ def _interp_quantiles_2D(_newx, _newg, _oldx, _oldy, _oldg): # noqa ) return _newx - mask = np.isnan(_oldx) & np.isnan(_oldy) & np.isnan(_oldg) - mask[:, np.array([0, -1])] = False # bounds might be NaN for a good reason. + mask_old = np.isnan(_oldy) + mask_old[:, np.array([0, -1])] = False # bounds might be NaN for a good reason. + mask_old = mask_old | np.isnan(_oldx) | np.isnan(_oldg) - return griddata( + mask_new = np.isnan(_newx) | np.isnan(_newg) + out = np.full_like(_newx, np.NaN) + out[~mask_new] = griddata( (_oldx[~mask], _oldg[~mask]), _oldy[~mask], - (_newx, _newg), + (_newx[~mask_new], _newg[~mask_new]), method=method, ) From 995ea62706f599ef1f7e8af5509cd53d0b963b01 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 29 Nov 2021 16:01:58 -0500 Subject: [PATCH 02/13] Full skipna --- xclim/sdba/utils.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/xclim/sdba/utils.py b/xclim/sdba/utils.py index 3d2eeedb7..d5291d449 100644 --- a/xclim/sdba/utils.py +++ b/xclim/sdba/utils.py @@ -449,6 +449,7 @@ def _interp_quantiles_1D(newx, oldx, oldy): kind=method, fill_value=fill_value, )(newx[~mask_new]) + return out if "group" in xq.dims: xq = xq.squeeze("group", drop=True) @@ -462,7 +463,7 @@ def _interp_quantiles_1D(newx, oldx, oldy): output_core_dims=[[dim]], vectorize=True, dask="parallelized", - output_dtypes=[float], + output_dtypes=[yq.dtype], ) # else: @@ -484,11 +485,12 @@ def _interp_quantiles_2D(_newx, _newg, _oldx, _oldy, _oldg): # noqa mask_new = np.isnan(_newx) | np.isnan(_newg) out = np.full_like(_newx, np.NaN) out[~mask_new] = griddata( - (_oldx[~mask], _oldg[~mask]), - _oldy[~mask], + (_oldx[~mask_old], _oldg[~mask_old]), + _oldy[~mask_old], (_newx[~mask_new], _newg[~mask_new]), method=method, ) + return out xq = add_cyclic_bounds(xq, prop, cyclic_coords=False) yq = add_cyclic_bounds(yq, prop, cyclic_coords=False) From 749f776b3b7d9262cae6b5cd1fc88037d62a5a4e Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 29 Nov 2021 16:08:01 -0500 Subject: [PATCH 03/13] Fixed dtype --- xclim/sdba/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xclim/sdba/utils.py b/xclim/sdba/utils.py index d5291d449..6c6741b4d 100644 --- a/xclim/sdba/utils.py +++ b/xclim/sdba/utils.py @@ -441,7 +441,7 @@ def _interp_quantiles_1D(newx, oldx, oldy): mask_old = mask_old | np.isnan(oldx) mask_new = np.isnan(newx) - out = np.full_like(newx, np.NaN) + out = np.full_like(newx, np.NaN, dtype=oldy.dtype) out[~mask_new] = interp1d( oldx[~mask_old], oldy[~mask_old], @@ -483,7 +483,7 @@ def _interp_quantiles_2D(_newx, _newg, _oldx, _oldy, _oldg): # noqa mask_old = mask_old | np.isnan(_oldx) | np.isnan(_oldg) mask_new = np.isnan(_newx) | np.isnan(_newg) - out = np.full_like(_newx, np.NaN) + out = np.full_like(_newx, np.NaN, dtype=_oldy.dtype) out[~mask_new] = griddata( (_oldx[~mask_old], _oldg[~mask_old]), _oldy[~mask_old], From 85ec302c92187fe72e5264ef31de924236b4c9c5 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 29 Nov 2021 16:57:20 -0500 Subject: [PATCH 04/13] Added test --- xclim/sdba/utils.py | 35 +++++++++-------- xclim/testing/tests/test_sdba/test_utils.py | 42 +++++++++++++++++++++ 2 files changed, 59 insertions(+), 18 deletions(-) diff --git a/xclim/sdba/utils.py b/xclim/sdba/utils.py index 6c6741b4d..eaaebb0f9 100644 --- a/xclim/sdba/utils.py +++ b/xclim/sdba/utils.py @@ -403,7 +403,10 @@ def interp_on_quantiles( ): """Interpolate values of yq on new values of x. - Interpolate in 2D if grouping is used, in 1D otherwise. + Interpolate in 2D with :py:func:`~scipy.interpolate.griddata` if grouping is used, + in 1D otherwise, with :py:func:`~scipy.interpolate.interp1d`. Except for the + boundaries as added by :py:func:`~xclim.sdba.utils.extrapolate_qm`, any NaNs in xq + or yq are removed from the input map. Similarly, NaNs in newx are left NaNs. Parameters ---------- @@ -417,18 +420,19 @@ def interp_on_quantiles( If it does, interpolation is done in 2D on "quantiles" and on the group dimension. group : Union[str, Grouper] The dimension and grouping information. (ex: "time" or "time.month"). - Defaults to the "group" attribute of xq, or "time" if there is none. + Defaults to "time". method : {'nearest', 'linear', 'cubic'} - The interpolation method. + The interpolation method. Using cubic interpolation will NOT respect the constant + extrapolation as added by :py:func:`~xclim.sdba.utils.extrapolate_qm`. """ dim = group.dim prop = group.prop if prop == "group": - fill_value = "extrapolate" if method == "nearest" else np.nan def _interp_quantiles_1D(newx, oldx, oldy): - if np.all(np.isnan(newx)): + mask_new = np.isnan(newx) + if np.all(mask_new): warn( "All-NaN slice encountered in interp_on_quantiles", category=RuntimeWarning, @@ -440,15 +444,10 @@ def _interp_quantiles_1D(newx, oldx, oldy): mask_old[np.array([0, -1])] = False mask_old = mask_old | np.isnan(oldx) - mask_new = np.isnan(newx) out = np.full_like(newx, np.NaN, dtype=oldy.dtype) - out[~mask_new] = interp1d( - oldx[~mask_old], - oldy[~mask_old], - bounds_error=False, - kind=method, - fill_value=fill_value, - )(newx[~mask_new]) + out[~mask_new] = interp1d(oldx[~mask_old], oldy[~mask_old], kind=method)( + newx[~mask_new] + ) return out if "group" in xq.dims: @@ -468,21 +467,21 @@ def _interp_quantiles_1D(newx, oldx, oldy): # else: def _interp_quantiles_2D(_newx, _newg, _oldx, _oldy, _oldg): # noqa - if method != "nearest": - _oldx = np.clip(_oldx, _newx.min() - 1, _newx.max() + 1) - - if np.all(np.isnan(_newx)): + mask_new = np.isnan(_newx) | np.isnan(_newg) + if np.all(mask_new): warn( "All-NaN slice encountered in interp_on_quantiles", category=RuntimeWarning, ) return _newx + if method != "nearest": + _oldx = np.clip(_oldx, _newx.min() - 1, _newx.max() + 1) + mask_old = np.isnan(_oldy) mask_old[:, np.array([0, -1])] = False # bounds might be NaN for a good reason. mask_old = mask_old | np.isnan(_oldx) | np.isnan(_oldg) - mask_new = np.isnan(_newx) | np.isnan(_newg) out = np.full_like(_newx, np.NaN, dtype=_oldy.dtype) out[~mask_new] = griddata( (_oldx[~mask_old], _oldg[~mask_old]), diff --git a/xclim/testing/tests/test_sdba/test_utils.py b/xclim/testing/tests/test_sdba/test_utils.py index f7097fb40..8fcbce75e 100644 --- a/xclim/testing/tests/test_sdba/test_utils.py +++ b/xclim/testing/tests/test_sdba/test_utils.py @@ -113,6 +113,48 @@ def test_interp_on_quantiles(shape, group, method): xr.testing.assert_equal(fut_corr.isnull(), fut == 1000) +@pytest.mark.parametrize("group", ["time", "time.month"]) +@pytest.mark.parametrize("method", ["nearest", "linear", "cubic"]) +def test_interp_on_quantiles_nans(group, method): + quantiles = np.linspace(0, 1, num=13) + xq = xr.DataArray( + np.concatenate(([0], np.linspace(205, 215, num=11), [1000])), + dims=("quantiles",), + coords={"quantiles": quantiles}, + ) + + yq = xr.DataArray( + np.concatenate(([2.0], np.linspace(2, 4, num=11), [4])), + dims=("quantiles",), + coords={"quantiles": quantiles}, + ) + + if group == "time.month": + xq = xq.expand_dims(month=np.arange(12) + 1) + yq = yq.expand_dims(month=np.arange(12) + 1) + + newx = xr.DataArray( + np.linspace(240, 200, num=30), + dims=("time",), + coords={"time": xr.cftime_range("1900-03-01", freq="D", periods=30)}, + ) + newx = newx.where(newx > 201) # Put some NaNs in newx + + out = u.interp_on_quantiles(newx, xq, yq, group=group, method=method) + + if method != "cubic": + assert out[0] == 4 + assert out[-1].isnull().item() + + xq = xq.where(xq != 214) + yq = yq.where(yq != 3) + out = u.interp_on_quantiles(newx, xq, yq, group=group, method=method) + + if method != "cubic": + assert out[0] == 4 + assert out[-1].isnull().item() + + def test_rank(): arr = np.random.random_sample(size=(10, 10, 1000)) da = xr.DataArray(arr, dims=("x", "y", "time")) From ccf0473ed613757f0a479907b225a496f4dbe1a7 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 29 Nov 2021 16:59:38 -0500 Subject: [PATCH 05/13] upd hist --- HISTORY.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/HISTORY.rst b/HISTORY.rst index b7d034974..d316d7b1e 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -36,6 +36,7 @@ Bug fixes ^^^^^^^^^ * Fix bugs in the `cf_attrs` and/or `abstract` of `continuous_snow_cover_end` and `continuous_snow_cover_start`. (:pull:`908`). * Remove unnecessary `keep_attrs` from `resample` call which would raise an error in futur Xarray version. (:pull:`937`). +* Skip all missing values in ``xclim.sdba.utils.interp_on_quantiles``, drop them from both the old and new coordinates, as well as from the old values. 0.31.0 (2021-11-05) ------------------- From 8ba486acc0dcba90b2e087e4f05a38cacbbafc6b Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 29 Nov 2021 17:11:22 -0500 Subject: [PATCH 06/13] pretty doc --- xclim/sdba/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xclim/sdba/utils.py b/xclim/sdba/utils.py index eaaebb0f9..db3605350 100644 --- a/xclim/sdba/utils.py +++ b/xclim/sdba/utils.py @@ -404,7 +404,7 @@ def interp_on_quantiles( """Interpolate values of yq on new values of x. Interpolate in 2D with :py:func:`~scipy.interpolate.griddata` if grouping is used, - in 1D otherwise, with :py:func:`~scipy.interpolate.interp1d`. Except for the + in 1D otherwise, with :py:class:`~scipy.interpolate.interp1d`. Except for the boundaries as added by :py:func:`~xclim.sdba.utils.extrapolate_qm`, any NaNs in xq or yq are removed from the input map. Similarly, NaNs in newx are left NaNs. From fae635d7409a9edf0a9c4b69fe9f75006fe736d1 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 29 Nov 2021 17:21:29 -0500 Subject: [PATCH 07/13] upd hist --- HISTORY.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HISTORY.rst b/HISTORY.rst index d316d7b1e..8590acf51 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -36,7 +36,7 @@ Bug fixes ^^^^^^^^^ * Fix bugs in the `cf_attrs` and/or `abstract` of `continuous_snow_cover_end` and `continuous_snow_cover_start`. (:pull:`908`). * Remove unnecessary `keep_attrs` from `resample` call which would raise an error in futur Xarray version. (:pull:`937`). -* Skip all missing values in ``xclim.sdba.utils.interp_on_quantiles``, drop them from both the old and new coordinates, as well as from the old values. +* Skip all missing values in ``xclim.sdba.utils.interp_on_quantiles``, drop them from both the old and new coordinates, as well as from the old values. (:pull:`941`). 0.31.0 (2021-11-05) ------------------- From c35c1878d59009ba248969933506edfb169de4b9 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 29 Nov 2021 17:54:53 -0500 Subject: [PATCH 08/13] Put back NaN when outside --- xclim/sdba/utils.py | 15 ++++++++++++--- xclim/testing/tests/test_sdba/test_utils.py | 13 +++++-------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/xclim/sdba/utils.py b/xclim/sdba/utils.py index db3605350..5c01b818c 100644 --- a/xclim/sdba/utils.py +++ b/xclim/sdba/utils.py @@ -408,6 +408,8 @@ def interp_on_quantiles( boundaries as added by :py:func:`~xclim.sdba.utils.extrapolate_qm`, any NaNs in xq or yq are removed from the input map. Similarly, NaNs in newx are left NaNs. + Values of newx outside the range of xq will return as NaNs. + Parameters ---------- newx : xr.DataArray @@ -445,9 +447,13 @@ def _interp_quantiles_1D(newx, oldx, oldy): mask_old = mask_old | np.isnan(oldx) out = np.full_like(newx, np.NaN, dtype=oldy.dtype) - out[~mask_new] = interp1d(oldx[~mask_old], oldy[~mask_old], kind=method)( - newx[~mask_new] - ) + out[~mask_new] = interp1d( + oldx[~mask_old], + oldy[~mask_old], + kind=method, + bounds_error=False, + fill_value=np.NaN, + )(newx[~mask_new]) return out if "group" in xq.dims: @@ -489,6 +495,9 @@ def _interp_quantiles_2D(_newx, _newg, _oldx, _oldy, _oldg): # noqa (_newx[~mask_new], _newg[~mask_new]), method=method, ) + + if method == "nearest": # It is implicit for other methods + out[(_newx < _oldx.min()) | (_newx > _oldx.max())] = np.NaN return out xq = add_cyclic_bounds(xq, prop, cyclic_coords=False) diff --git a/xclim/testing/tests/test_sdba/test_utils.py b/xclim/testing/tests/test_sdba/test_utils.py index 8fcbce75e..d94a80dbc 100644 --- a/xclim/testing/tests/test_sdba/test_utils.py +++ b/xclim/testing/tests/test_sdba/test_utils.py @@ -103,14 +103,11 @@ def test_interp_on_quantiles(shape, group, method): *("time", "lat", "lon")[: len(shape)] ) - if method == "nearest": - np.testing.assert_allclose(fut_corr.values, obs.values, rtol=0.3) - assert fut_corr.isnull().sum() == 0 - else: - np.testing.assert_allclose( - fut_corr.values, obs.where(fut != 1000).values, rtol=2e-3 - ) - xr.testing.assert_equal(fut_corr.isnull(), fut == 1000) + rtol = 0.02 if method == "nearest" else 0.002 + np.testing.assert_allclose( + fut_corr.values, obs.where(fut != 1000).values, rtol=rtol + ) + xr.testing.assert_equal(fut_corr.isnull(), fut == 1000) @pytest.mark.parametrize("group", ["time", "time.month"]) From 731ade6c004afc47b988998a4920241c676e6f29 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 2 Dec 2021 08:48:18 -0500 Subject: [PATCH 09/13] Began refactor of interp_on_quantiles --- xclim/sdba/base.py | 36 ++--------- xclim/sdba/utils.py | 151 ++++++++++++++++++++++++++------------------ 2 files changed, 95 insertions(+), 92 deletions(-) diff --git a/xclim/sdba/base.py b/xclim/sdba/base.py index 85b0dae60..1d1d2e21a 100644 --- a/xclim/sdba/base.py +++ b/xclim/sdba/base.py @@ -105,7 +105,6 @@ def __init__( group: str, window: int = 1, add_dims: Optional[Union[Sequence[str], Set[str]]] = None, - interp: Union[bool, str] = False, ): """Create the Grouper object. @@ -122,20 +121,12 @@ def __init__( Additional dimensions that should be reduced in grouping operations. This behaviour is also controlled by the `main_only` parameter of the `apply` method. If any of these dimensions are absent from the dataarrays, they will be omitted. - interp : Union[bool, str] - Whether to return an interpolatable index in the `get_index` method. Only effective for `month` grouping. - Interpolation method names are accepted for convenience, "nearest" is translated to False, all other names - are translated to True. - This modifies the default, but `get_index` also accepts an `interp` argument overriding the one defined here.. """ if "." in group: dim, prop = group.split(".") else: dim, prop = group, "group" - if isinstance(interp, str): - interp = interp != "nearest" - if isinstance(add_dims, str): add_dims = [add_dims] @@ -146,7 +137,6 @@ def __init__( prop=prop, name=group, window=window, - interp=interp, ) @classmethod @@ -155,7 +145,6 @@ def from_kwargs(cls, **kwargs): group=kwargs.pop("group"), window=kwargs.pop("window", 1), add_dims=kwargs.pop("add_dims", []), - interp=kwargs.get("interp", False), ) return kwargs @@ -239,7 +228,7 @@ def group( def get_index( self, da: Union[xr.DataArray, xr.Dataset], - interp: Optional[Union[bool, str]] = None, + interp: Optional[bool] = None, ): """Return the group index of each element along the main dimension. @@ -248,9 +237,9 @@ def get_index( da : Union[xr.DataArray, xr.Dataset] The input array/dataset for which the group index is returned. It must have Grouper.dim as a coordinate. - interp : Union[bool, str] - Argument `interp` defaults to `self.interp`. If True, the returned index can be - used for interpolation. For month grouping, integer values represent the middle of the month, all other + interp : bool, optional + If True, the returned index can be used for interpolation. Only value for month + grouping, where integer values represent the middle of the month, all other days are linearly interpolated in between. Returns @@ -275,21 +264,8 @@ def get_index( f"Index {self.name} is not of type int (rather {i.dtype}), but {self.__class__.__name__} requires integer indexes." ) - interp = ( - (interp or self.interp) - if not isinstance(interp, str) - else interp != "nearest" - ) - if interp: - if self.dim == "time": - if self.prop == "month": - i = ind.month - 0.5 + ind.day / ind.days_in_month - elif self.prop == "dayofyear": - i = ind.dayofyear - else: - raise NotImplementedError - else: - raise NotImplementedError + if interp and self.dim == "time" and self.prop == "month": + i = ind.month - 0.5 + ind.day / ind.days_in_month xi = xr.DataArray( i, diff --git a/xclim/sdba/utils.py b/xclim/sdba/utils.py index 5c01b818c..911259e21 100644 --- a/xclim/sdba/utils.py +++ b/xclim/sdba/utils.py @@ -392,6 +392,70 @@ def add_endpoints( return ensure_chunk_size(out, **{dim: -1}) +def _interp_on_quantiles_1D(newx, oldx, oldy, method, extrap): + mask_new = np.isnan(newx) + if np.all(mask_new): + warn( + "All-NaN slice encountered in interp_on_quantiles", + category=RuntimeWarning, + ) + return newx + + if extrap == "constant": + # This is not needed with nearest, as it is the default behaviour + fill_value = (oldy[0], oldy[-1]) + else: # extrap == 'nan' + fill_value = np.NaN + + mask_old = np.isnan(oldy) | np.isnan(oldx) + + out = np.full_like(newx, np.NaN, dtype=f"float{oldy.dtype.itemsize * 8}") + out[~mask_new] = interp1d( + oldx[~mask_old], + oldy[~mask_old], + kind=method, + bounds_error=False, + fill_value=fill_value, + )(newx[~mask_new]) + return out + + +def _interp_on_quantiles_2D(newx, newg, oldx, oldy, oldg, method, extrap): # noqa + mask_new = np.isnan(newx) | np.isnan(newg) + if np.all(mask_new): + warn( + "All-NaN slice encountered in interp_on_quantiles", + category=RuntimeWarning, + ) + return newx + + mask_old = np.isnan(oldy) | np.isnan(oldx) | np.isnan(oldg) + + out = np.full_like(newx, np.NaN, dtype=f"float{oldy.dtype.itemsize * 8}") + out[~mask_new] = griddata( + (oldx[~mask_old], oldg[~mask_old]), + oldy[~mask_old], + (newx[~mask_new], newg[~mask_new]), + method=method, + ) + + def first_and_last_nonnull(vector): + return vector[np.where(~np.isnan(vector))[0][np.array([0, -1])]] + + if (method == "nearest" and extrap == "nan") or extrap == "constant": + igrp = np.searchsorted(oldg[:, 0], np.round(newg).astype(int)) + toolow = newx < np.nanmin(oldx, axis=-1)[igrp] + toohigh = newx > np.nanmax(oldx, axis=-1)[igrp] + if extrap == "constant": + out[toolow] = oldy[igrp, 0][toolow] + out[toohigh] = oldy[igrp, -1][toohigh] + else: + out[toolow] = np.NaN + out[toohigh] = np.NaN + + return out + + @parse_group def interp_on_quantiles( newx: xr.DataArray, @@ -400,16 +464,14 @@ def interp_on_quantiles( *, group: Union[str, Grouper] = "time", method: str = "linear", + extrapolation: str = "constant", ): """Interpolate values of yq on new values of x. Interpolate in 2D with :py:func:`~scipy.interpolate.griddata` if grouping is used, - in 1D otherwise, with :py:class:`~scipy.interpolate.interp1d`. Except for the - boundaries as added by :py:func:`~xclim.sdba.utils.extrapolate_qm`, any NaNs in xq + in 1D otherwise, with :py:class:`~scipy.interpolate.interp1d`. Any NaNs in xq or yq are removed from the input map. Similarly, NaNs in newx are left NaNs. - Values of newx outside the range of xq will return as NaNs. - Parameters ---------- newx : xr.DataArray @@ -424,46 +486,34 @@ def interp_on_quantiles( The dimension and grouping information. (ex: "time" or "time.month"). Defaults to "time". method : {'nearest', 'linear', 'cubic'} - The interpolation method. Using cubic interpolation will NOT respect the constant - extrapolation as added by :py:func:`~xclim.sdba.utils.extrapolate_qm`. + The interpolation method. + extrapolation : {'constant', 'nan'} + The extrapolation method used for values of `newx` outside the range of `xq`. + See notes. + + Notes + ----- + Extrapolation methods: + + - 'nan' : Any value of `newx` outside the range of `xq` is set to NaN. + - 'constant' : Values of `newx` smaller than the minimum of `xq` are set to the first + value of `yq` and those larger than the maximum, set to the last one (first and + last values along the "quantiles" dimension). """ dim = group.dim prop = group.prop if prop == "group": - - def _interp_quantiles_1D(newx, oldx, oldy): - mask_new = np.isnan(newx) - if np.all(mask_new): - warn( - "All-NaN slice encountered in interp_on_quantiles", - category=RuntimeWarning, - ) - return newx - - mask_old = np.isnan(oldy) - # The bounds might be NaN for a good reason. - mask_old[np.array([0, -1])] = False - mask_old = mask_old | np.isnan(oldx) - - out = np.full_like(newx, np.NaN, dtype=oldy.dtype) - out[~mask_new] = interp1d( - oldx[~mask_old], - oldy[~mask_old], - kind=method, - bounds_error=False, - fill_value=np.NaN, - )(newx[~mask_new]) - return out - if "group" in xq.dims: xq = xq.squeeze("group", drop=True) yq = yq.squeeze("group", drop=True) + return xr.apply_ufunc( - _interp_quantiles_1D, + _interp_on_quantiles_1D, newx, xq, yq, + kwargs={"method": method, "extrap": extrapolation}, input_core_dims=[[dim], ["quantiles"], ["quantiles"]], output_core_dims=[[dim]], vectorize=True, @@ -471,47 +521,24 @@ def _interp_quantiles_1D(newx, oldx, oldy): output_dtypes=[yq.dtype], ) # else: - - def _interp_quantiles_2D(_newx, _newg, _oldx, _oldy, _oldg): # noqa - mask_new = np.isnan(_newx) | np.isnan(_newg) - if np.all(mask_new): - warn( - "All-NaN slice encountered in interp_on_quantiles", - category=RuntimeWarning, - ) - return _newx - - if method != "nearest": - _oldx = np.clip(_oldx, _newx.min() - 1, _newx.max() + 1) - - mask_old = np.isnan(_oldy) - mask_old[:, np.array([0, -1])] = False # bounds might be NaN for a good reason. - mask_old = mask_old | np.isnan(_oldx) | np.isnan(_oldg) - - out = np.full_like(_newx, np.NaN, dtype=_oldy.dtype) - out[~mask_new] = griddata( - (_oldx[~mask_old], _oldg[~mask_old]), - _oldy[~mask_old], - (_newx[~mask_new], _newg[~mask_new]), - method=method, - ) - - if method == "nearest": # It is implicit for other methods - out[(_newx < _oldx.min()) | (_newx > _oldx.max())] = np.NaN - return out + if prop not in xq.dims: + xq = xq.expand_dims({prop: group.get_coordinate()}) + if prop not in yq.dims: + yq = yq.expand_dims({prop: group.get_coordinate()}) xq = add_cyclic_bounds(xq, prop, cyclic_coords=False) yq = add_cyclic_bounds(yq, prop, cyclic_coords=False) - newg = group.get_index(newx) + newg = group.get_index(newx, interp=method != "nearest") oldg = xq[prop].expand_dims(quantiles=xq.coords["quantiles"]) return xr.apply_ufunc( - _interp_quantiles_2D, + _interp_on_quantiles_2D, newx, newg, xq, yq, oldg, + kwargs={"method": method, "extrap": extrapolation}, input_core_dims=[ [dim], [dim], From 0f990e7ccfe541018d82d686fc1ad7a5d3582dce Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 2 Dec 2021 14:30:27 -0500 Subject: [PATCH 10/13] refactor interp_on_quantiles --- xclim/sdba/nbutils.py | 38 ++++++++- xclim/sdba/utils.py | 20 +---- xclim/testing/tests/test_sdba/test_utils.py | 88 ++++++++------------- xclim/testing/tests/test_utils.py | 1 - 4 files changed, 74 insertions(+), 73 deletions(-) diff --git a/xclim/sdba/nbutils.py b/xclim/sdba/nbutils.py index 324f56e41..68ca1aa8a 100644 --- a/xclim/sdba/nbutils.py +++ b/xclim/sdba/nbutils.py @@ -3,7 +3,7 @@ --------------------------- """ import numpy as np -from numba import boolean, float32, float64, guvectorize, njit +from numba import boolean, float32, float64, guvectorize, int64, njit from xarray import DataArray from xarray.core import utils @@ -172,3 +172,39 @@ def _escore(tgt, sim, out): w = n1 * n2 / (n1 + n2) out[0] = w * (sXY + sXY - sXX - sYY) / 2 + + +@njit +def _first_and_last_nonnull(arr): + """For each row of arr, get the first and last non NaN elements.""" + out = np.empty((arr.shape[0], 2)) + for i in range(arr.shape[0]): + idxs = np.where(~np.isnan(arr[i]))[0] + if idxs.size > 0: + out[i] = arr[i][idxs[np.array([0, -1])]] + else: + out[i] = np.array([np.NaN, np.NaN]) + return out + + +@njit +def _extrapolate_on_quantiles(interp, oldx, oldg, oldy, newx, newg, method="constant"): + """Apply extrapolation to the output of interpolation on quantiles with a given + grouping. Arguments are the same as _interp_on_quantiles_2D. + + "constant" extrapolation is done independently for each group. + """ + igrp = np.empty_like(newg) + np.around(newg, 0, igrp) + igrp = igrp.astype(int64) + bnds = _first_and_last_nonnull(oldx) + toolow = newx < bnds[:, 0][igrp] + toohigh = newx > bnds[:, 1][igrp] + if method == "constant": + constants = _first_and_last_nonnull(oldy) + interp[toolow] = constants[igrp, 0][toolow] + interp[toohigh] = constants[igrp, 1][toohigh] + else: # 'nan' + interp[toolow] = np.NaN + interp[toohigh] = np.NaN + return interp diff --git a/xclim/sdba/utils.py b/xclim/sdba/utils.py index 911259e21..dcbc5f822 100644 --- a/xclim/sdba/utils.py +++ b/xclim/sdba/utils.py @@ -15,6 +15,7 @@ from xclim.core.utils import ensure_chunk_size from .base import Grouper, parse_group +from .nbutils import _extrapolate_on_quantiles MULTIPLICATIVE = "*" ADDITIVE = "+" @@ -402,7 +403,6 @@ def _interp_on_quantiles_1D(newx, oldx, oldy, method, extrap): return newx if extrap == "constant": - # This is not needed with nearest, as it is the default behaviour fill_value = (oldy[0], oldy[-1]) else: # extrap == 'nan' fill_value = np.NaN @@ -438,21 +438,9 @@ def _interp_on_quantiles_2D(newx, newg, oldx, oldy, oldg, method, extrap): # no (newx[~mask_new], newg[~mask_new]), method=method, ) - - def first_and_last_nonnull(vector): - return vector[np.where(~np.isnan(vector))[0][np.array([0, -1])]] - - if (method == "nearest" and extrap == "nan") or extrap == "constant": - igrp = np.searchsorted(oldg[:, 0], np.round(newg).astype(int)) - toolow = newx < np.nanmin(oldx, axis=-1)[igrp] - toohigh = newx > np.nanmax(oldx, axis=-1)[igrp] - if extrap == "constant": - out[toolow] = oldy[igrp, 0][toolow] - out[toohigh] = oldy[igrp, -1][toohigh] - else: - out[toolow] = np.NaN - out[toohigh] = np.NaN - + if method == "nearest" or extrap != "nan": + # 'nan' extrapolation implicit for cubic and linear interpolation. + out = _extrapolate_on_quantiles(out, oldx, oldg, oldy, newx, newg, extrap) return out diff --git a/xclim/testing/tests/test_sdba/test_utils.py b/xclim/testing/tests/test_sdba/test_utils.py index d94a80dbc..73aa0ece7 100644 --- a/xclim/testing/tests/test_sdba/test_utils.py +++ b/xclim/testing/tests/test_sdba/test_utils.py @@ -71,57 +71,21 @@ def test_extrapolate_qm(make_qm, method, exp): assert x[0, 0] == exp[1] -@pytest.mark.parametrize("shape", [(2920,), (2920, 5, 5)]) @pytest.mark.parametrize("group", ["time", "time.month"]) -@pytest.mark.parametrize("method", ["nearest", "linear", "cubic"]) -def test_interp_on_quantiles(shape, group, method): - group = Grouper(group) - raw = np.random.random_sample(shape) # [0, 1] - t = pd.date_range("2000-01-01", periods=shape[0], freq="D") - # obs : [9, 11] - obs = xr.DataArray( - raw * 2 + 9, dims=("time", "lat", "lon")[: len(shape)], coords={"time": t} - ) - # sim [9, 11.4] (x1.2 + 0.2) - sim = xr.DataArray( - raw * 2.4 + 9, dims=("time", "lat", "lon")[: len(shape)], coords={"time": t} - ) - # fut [9.02, 11.38] (x1.18 + 0.2) In order to have every point of fut inside the range of sim - fut_raw = raw * 2.36 + 9.02 - fut_raw[ - np.array([100, 300, 500, 700]) - ] = 1000 # Points outside the sim range will be NaN - fut = xr.DataArray( - fut_raw, dims=("time", "lat", "lon")[: len(shape)], coords={"time": t} - ) - - q = np.linspace(0, 1, 11) - xq = group.apply("quantile", sim, q=q).rename(quantile="quantiles") - yq = group.apply("quantile", obs, q=q).rename(quantile="quantiles") - - fut_corr = u.interp_on_quantiles(fut, xq, yq, group=group, method=method).transpose( - *("time", "lat", "lon")[: len(shape)] - ) - - rtol = 0.02 if method == "nearest" else 0.002 - np.testing.assert_allclose( - fut_corr.values, obs.where(fut != 1000).values, rtol=rtol - ) - xr.testing.assert_equal(fut_corr.isnull(), fut == 1000) - - -@pytest.mark.parametrize("group", ["time", "time.month"]) -@pytest.mark.parametrize("method", ["nearest", "linear", "cubic"]) -def test_interp_on_quantiles_nans(group, method): - quantiles = np.linspace(0, 1, num=13) +@pytest.mark.parametrize( + "interp,expi", [("nearest", 2.9), ("linear", 2.95), ("cubic", 2.95)] +) +@pytest.mark.parametrize("extrap,expe", [("constant", 4.4), ("nan", np.NaN)]) +def test_interp_on_quantiles_constant(group, interp, expi, extrap, expe): + quantiles = np.linspace(0, 1, num=25) xq = xr.DataArray( - np.concatenate(([0], np.linspace(205, 215, num=11), [1000])), + np.linspace(205, 229, num=25), dims=("quantiles",), coords={"quantiles": quantiles}, ) yq = xr.DataArray( - np.concatenate(([2.0], np.linspace(2, 4, num=11), [4])), + np.linspace(2, 4.4, num=25), dims=("quantiles",), coords={"quantiles": quantiles}, ) @@ -131,25 +95,39 @@ def test_interp_on_quantiles_nans(group, method): yq = yq.expand_dims(month=np.arange(12) + 1) newx = xr.DataArray( - np.linspace(240, 200, num=30), + np.linspace(240, 200, num=41) - 0.5, dims=("time",), - coords={"time": xr.cftime_range("1900-03-01", freq="D", periods=30)}, + coords={"time": xr.cftime_range("1900-03-01", freq="D", periods=41)}, ) newx = newx.where(newx > 201) # Put some NaNs in newx - out = u.interp_on_quantiles(newx, xq, yq, group=group, method=method) + xq = xq.expand_dims(lat=[1, 2, 3]) + yq = yq.expand_dims(lat=[1, 2, 3]) + newx = newx.expand_dims(lat=[1, 2, 3]) - if method != "cubic": - assert out[0] == 4 - assert out[-1].isnull().item() + out = u.interp_on_quantiles( + newx, xq, yq, group=group, method=interp, extrapolation=extrap + ) - xq = xq.where(xq != 214) + if np.isnan(expe): + assert out.isel(time=0).isnull().all() + else: + assert out.isel(lat=1, time=0) == expe + np.testing.assert_allclose(out.isel(time=25), expi) + assert out.isel(time=-1).isnull().all() + + xq = xq.where(xq != 220) yq = yq.where(yq != 3) - out = u.interp_on_quantiles(newx, xq, yq, group=group, method=method) + out = u.interp_on_quantiles( + newx, xq, yq, group=group, method=interp, extrapolation=extrap + ) - if method != "cubic": - assert out[0] == 4 - assert out[-1].isnull().item() + if np.isnan(expe): + assert out.isel(time=0).isnull().all() + else: + assert out.isel(lat=1, time=0) == expe + np.testing.assert_allclose(out.isel(time=25), expi) + assert out.isel(time=-1).isnull().all() def test_rank(): diff --git a/xclim/testing/tests/test_utils.py b/xclim/testing/tests/test_utils.py index 564d55ff4..74bb35d26 100644 --- a/xclim/testing/tests/test_utils.py +++ b/xclim/testing/tests/test_utils.py @@ -6,7 +6,6 @@ import numpy as np import xarray as xr -from xclim.core.indicator import Daily from xclim.core.utils import ( ensure_chunk_size, nan_calc_percentiles, From 20971eaee7effa7b54f6fcef550fada6d88b7446 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 2 Dec 2021 17:05:52 -0500 Subject: [PATCH 11/13] Fix and tests --- xclim/sdba/_adjustment.py | 33 +++++++++++----------- xclim/sdba/utils.py | 25 ++++++++-------- xclim/testing/tests/test_sdba/test_base.py | 4 +-- 3 files changed, 32 insertions(+), 30 deletions(-) diff --git a/xclim/sdba/_adjustment.py b/xclim/sdba/_adjustment.py index 10129b182..64f248a3e 100644 --- a/xclim/sdba/_adjustment.py +++ b/xclim/sdba/_adjustment.py @@ -70,13 +70,14 @@ def qm_adjust(ds, *, group, interp, extrapolation, kind) -> xr.Dataset: hist_q : Quantiles over the training data sim : Data to adjust. """ - af, hist_q = u.extrapolate_qm( - ds.af, + af = u.interp_on_quantiles( + ds.sim, ds.hist_q, - method=extrapolation, - abs_bounds=(ds.sim.min().item() - 1, ds.sim.max().item() + 1), + ds.af, + group=group, + method=interp, + extrapolation=extrapolation, ) - af = u.interp_on_quantiles(ds.sim, hist_q, af, group=group, method=interp) scen = u.apply_correction(ds.sim, af, kind).rename("scen") return scen.to_dataset() @@ -130,12 +131,15 @@ def qdm_adjust(ds, *, group, interp, extrapolation, kind) -> xr.Dataset: hist_q : Quantiles over the training data sim : Data to adjust. """ - af, _ = u.extrapolate_qm(ds.af, ds.hist_q, method=extrapolation) - sim_q = group.apply(u.rank, ds.sim, main_only=True, pct=True) - sel = {"quantiles": sim_q} - af = u.broadcast(af, ds.sim, group=group, interp=interp, sel=sel) - + af = u.interp_on_quantiles( + sim_q, + ds.quantiles, + ds.af, + group=group, + method=interp, + extrapolation=extrapolation, + ) scen = u.apply_correction(ds.sim, af, kind) return xr.Dataset(dict(scen=scen, sim_q=sim_q)) @@ -392,13 +396,10 @@ def extremes_adjust( vectorize=True, ) - # Extrapolate adjustment factors - af, px_hist = u.extrapolate_qm( - ds.af, ds.px_hist, method=extrapolation, abs_bounds=(0, 1) - ) - # Find factors by interpolating from hist probs to fut probs. apply them. - af = u.interp_on_quantiles(px_fut, px_hist, af, method=interp) + af = u.interp_on_quantiles( + px_fut, ds.px_hist, ds.af, method=interp, extrapolation=extrapolation + ) scen = u.apply_correction(ds.sim, af, "*") # Smooth transition function between sim and scen diff --git a/xclim/sdba/utils.py b/xclim/sdba/utils.py index dcbc5f822..ff906e358 100644 --- a/xclim/sdba/utils.py +++ b/xclim/sdba/utils.py @@ -201,7 +201,7 @@ def broadcast( sel = {} if group.prop != "group" and group.prop not in sel: - sel.update({group.prop: group.get_index(x, interp=interp)}) + sel.update({group.prop: group.get_index(x, interp=interp != "nearest")}) if sel: # Extract the correct mean factor for each time step. @@ -395,21 +395,20 @@ def add_endpoints( def _interp_on_quantiles_1D(newx, oldx, oldy, method, extrap): mask_new = np.isnan(newx) - if np.all(mask_new): + mask_old = np.isnan(oldy) | np.isnan(oldx) + out = np.full_like(newx, np.NaN, dtype=f"float{oldy.dtype.itemsize * 8}") + if np.all(mask_new) or np.all(mask_old): warn( "All-NaN slice encountered in interp_on_quantiles", category=RuntimeWarning, ) - return newx + return out if extrap == "constant": fill_value = (oldy[0], oldy[-1]) else: # extrap == 'nan' fill_value = np.NaN - mask_old = np.isnan(oldy) | np.isnan(oldx) - - out = np.full_like(newx, np.NaN, dtype=f"float{oldy.dtype.itemsize * 8}") out[~mask_new] = interp1d( oldx[~mask_old], oldy[~mask_old], @@ -422,16 +421,15 @@ def _interp_on_quantiles_1D(newx, oldx, oldy, method, extrap): def _interp_on_quantiles_2D(newx, newg, oldx, oldy, oldg, method, extrap): # noqa mask_new = np.isnan(newx) | np.isnan(newg) - if np.all(mask_new): + mask_old = np.isnan(oldy) | np.isnan(oldx) | np.isnan(oldg) + out = np.full_like(newx, np.NaN, dtype=f"float{oldy.dtype.itemsize * 8}") + if np.all(mask_new) or np.all(mask_old): warn( "All-NaN slice encountered in interp_on_quantiles", category=RuntimeWarning, ) - return newx - - mask_old = np.isnan(oldy) | np.isnan(oldx) | np.isnan(oldg) + return out - out = np.full_like(newx, np.NaN, dtype=f"float{oldy.dtype.itemsize * 8}") out[~mask_new] = griddata( (oldx[~mask_old], oldg[~mask_old]), oldy[~mask_old], @@ -494,9 +492,10 @@ def interp_on_quantiles( if prop == "group": if "group" in xq.dims: xq = xq.squeeze("group", drop=True) + if "group" in yq.dims: yq = yq.squeeze("group", drop=True) - return xr.apply_ufunc( + out = xr.apply_ufunc( _interp_on_quantiles_1D, newx, xq, @@ -508,6 +507,8 @@ def interp_on_quantiles( dask="parallelized", output_dtypes=[yq.dtype], ) + return out + # else: if prop not in xq.dims: xq = xq.expand_dims({prop: group.get_coordinate()}) diff --git a/xclim/testing/tests/test_sdba/test_base.py b/xclim/testing/tests/test_sdba/test_base.py index 3fa1cbff7..ec52956df 100644 --- a/xclim/testing/tests/test_sdba/test_base.py +++ b/xclim/testing/tests/test_sdba/test_base.py @@ -51,8 +51,8 @@ def test_grouper_group(tas_series, group, window, nvals): ) def test_grouper_get_index(tas_series, group, interp, val90): tas = tas_series(np.ones(366), start="2000-01-01") - grouper = Grouper(group, interp=interp) - indx = grouper.get_index(tas) + grouper = Grouper(group) + indx = grouper.get_index(tas, interp=interp) # 90 is March 31st assert indx[90] == val90 From de3d498408a35db8e46cbdd7e6582aaf001c05c5 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Thu, 2 Dec 2021 17:08:00 -0500 Subject: [PATCH 12/13] upd hist --- HISTORY.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/HISTORY.rst b/HISTORY.rst index 06b24ad12..028b85786 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -35,6 +35,7 @@ Internal changes * French translation metadata fields are now cleaner and much more internally consistent, and many empty metadata fields (e.g. ``comment_fr``) have been removed. (:pull:`930`, :issue:`929`). * Adjustments to the `tox` builds so that slow tests are now run alongside standard tests (for more accurate coverage reporting). (:pull:`938`) * Use ``xarray.apply_ufunc`` to vectorize statistical functions. (:pull:`943`) +* Refactor of ``xclim.sdba.utils.interp_on_quantiles`` so that it now handles the extrapolation directly and to better handle missing values. (:pull:`941`). Bug fixes ^^^^^^^^^ From 68dd5c222adbdd6ab9f657f65e64aa1483f90891 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Fri, 3 Dec 2021 14:55:25 -0500 Subject: [PATCH 13/13] Add deprecation warning --- HISTORY.rst | 1 + xclim/sdba/utils.py | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/HISTORY.rst b/HISTORY.rst index 028b85786..37e279ba3 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -25,6 +25,7 @@ New features and enhancements Breaking changes ^^^^^^^^^^^^^^^^ * Following version 1.9 of the CF Conventions, published in September 2021, the calendar name "gregorian" is deprecated. ``core.calendar.get_calendar`` will return "standard", even if the underlying cftime objects still use "gregorian" (cftime <= 1.5.1). (:pull:`935`). +* ``sdba.utils.extrapolate_qm`` is now deprecated and will be removed in version 0.33. (:pull:`941`). Internal changes ^^^^^^^^^^^^^^^^ diff --git a/xclim/sdba/utils.py b/xclim/sdba/utils.py index ff906e358..6e801f37c 100644 --- a/xclim/sdba/utils.py +++ b/xclim/sdba/utils.py @@ -334,6 +334,13 @@ def extrapolate_qm( The adjustment factor above and below the computed values are equal to the last and first values respectively. """ + warn( + ( + "`extrapolate_qm` is deprecated and will be removed in xclim 0.33. " + "Extrapolation is now handled directly in `interp_on_quantiles`." + ), + DeprecationWarning, + ) # constant_iqr # Same as `constant`, but values are set to NaN if farther than one interquartile range from the min and max. q_l, q_r = [0], [1]