Ouranosinc · aulemahal · Dec 3, 2021 · Nov 29, 2021 · Nov 29, 2021 · Nov 29, 2021
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -25,6 +25,7 @@ New features and enhancements
 Breaking changes
 ^^^^^^^^^^^^^^^^
 * Following version 1.9 of the CF Conventions, published in September 2021, the calendar name "gregorian" is deprecated. ``core.calendar.get_calendar`` will return "standard", even if the underlying cftime objects still use "gregorian" (cftime <= 1.5.1). (:pull:`935`).
+* ``sdba.utils.extrapolate_qm`` is now deprecated and will be removed in version 0.33. (:pull:`941`).
 
 Internal changes
 ^^^^^^^^^^^^^^^^
@@ -35,12 +36,14 @@ Internal changes
 * French translation metadata fields are now cleaner and much more internally consistent, and many empty metadata fields (e.g. ``comment_fr``) have been removed. (:pull:`930`, :issue:`929`).
 * Adjustments to the `tox` builds so that slow tests are now run alongside standard tests (for more accurate coverage reporting). (:pull:`938`)
 * Use ``xarray.apply_ufunc`` to vectorize statistical functions. (:pull:`943`)
+* Refactor of ``xclim.sdba.utils.interp_on_quantiles`` so that it now handles the extrapolation directly and to better handle missing values. (:pull:`941`).
 
 Bug fixes
 ^^^^^^^^^
 * Fix bugs in the `cf_attrs` and/or `abstract` of `continuous_snow_cover_end` and `continuous_snow_cover_start`. (:pull:`908`).
 * Remove unnecessary `keep_attrs` from `resample` call which would raise an error in futur Xarray version. (:pull:`937`).
-* Fixed a bug in the regex that parses usernames in the history. (:pull:`945`)
+* Fixed a bug in the regex that parses usernames in the history. (:pull:`945`).
+* Skip all missing values in ``xclim.sdba.utils.interp_on_quantiles``, drop them from both the old and new coordinates, as well as from the old values. (:pull:`941`).
 
 0.31.0 (2021-11-05)
 -------------------

diff --git a/xclim/sdba/_adjustment.py b/xclim/sdba/_adjustment.py
@@ -70,13 +70,14 @@ def qm_adjust(ds, *, group, interp, extrapolation, kind) -> xr.Dataset:
       hist_q : Quantiles over the training data
       sim : Data to adjust.
     """
-    af, hist_q = u.extrapolate_qm(
-        ds.af,
+    af = u.interp_on_quantiles(
+        ds.sim,
         ds.hist_q,
-        method=extrapolation,
-        abs_bounds=(ds.sim.min().item() - 1, ds.sim.max().item() + 1),
+        ds.af,
+        group=group,
+        method=interp,
+        extrapolation=extrapolation,
     )
-    af = u.interp_on_quantiles(ds.sim, hist_q, af, group=group, method=interp)
 
     scen = u.apply_correction(ds.sim, af, kind).rename("scen")
     return scen.to_dataset()
@@ -130,12 +131,15 @@ def qdm_adjust(ds, *, group, interp, extrapolation, kind) -> xr.Dataset:
       hist_q : Quantiles over the training data
       sim : Data to adjust.
     """
-    af, _ = u.extrapolate_qm(ds.af, ds.hist_q, method=extrapolation)
-
     sim_q = group.apply(u.rank, ds.sim, main_only=True, pct=True)
-    sel = {"quantiles": sim_q}
-    af = u.broadcast(af, ds.sim, group=group, interp=interp, sel=sel)
-
+    af = u.interp_on_quantiles(
+        sim_q,
+        ds.quantiles,
+        ds.af,
+        group=group,
+        method=interp,
+        extrapolation=extrapolation,
+    )
     scen = u.apply_correction(ds.sim, af, kind)
     return xr.Dataset(dict(scen=scen, sim_q=sim_q))
 
@@ -392,13 +396,10 @@ def extremes_adjust(
         vectorize=True,
     )
 
-    # Extrapolate adjustment factors
-    af, px_hist = u.extrapolate_qm(
-        ds.af, ds.px_hist, method=extrapolation, abs_bounds=(0, 1)
-    )
-
     # Find factors by interpolating from hist probs to fut probs. apply them.
-    af = u.interp_on_quantiles(px_fut, px_hist, af, method=interp)
+    af = u.interp_on_quantiles(
+        px_fut, ds.px_hist, ds.af, method=interp, extrapolation=extrapolation
+    )
     scen = u.apply_correction(ds.sim, af, "*")
 
     # Smooth transition function between sim and scen

diff --git a/xclim/sdba/base.py b/xclim/sdba/base.py
@@ -105,7 +105,6 @@ def __init__(
         group: str,
         window: int = 1,
         add_dims: Optional[Union[Sequence[str], Set[str]]] = None,
-        interp: Union[bool, str] = False,
     ):
         """Create the Grouper object.
 
@@ -122,20 +121,12 @@ def __init__(
           Additional dimensions that should be reduced in grouping operations. This behaviour is also controlled
           by the `main_only` parameter of the `apply` method. If any of these dimensions are absent from the dataarrays,
           they will be omitted.
-        interp : Union[bool, str]
-          Whether to return an interpolatable index in the `get_index` method. Only effective for `month` grouping.
-          Interpolation method names are accepted for convenience, "nearest" is translated to False, all other names
-          are translated to True.
-          This modifies the default, but `get_index` also accepts an `interp` argument overriding the one defined here..
         """
         if "." in group:
             dim, prop = group.split(".")
         else:
             dim, prop = group, "group"
 
-        if isinstance(interp, str):
-            interp = interp != "nearest"
-
         if isinstance(add_dims, str):
             add_dims = [add_dims]
 
@@ -146,7 +137,6 @@ def __init__(
             prop=prop,
             name=group,
             window=window,
-            interp=interp,
         )
 
     @classmethod
@@ -155,7 +145,6 @@ def from_kwargs(cls, **kwargs):
             group=kwargs.pop("group"),
             window=kwargs.pop("window", 1),
             add_dims=kwargs.pop("add_dims", []),
-            interp=kwargs.get("interp", False),
         )
         return kwargs
 
@@ -239,7 +228,7 @@ def group(
     def get_index(
         self,
         da: Union[xr.DataArray, xr.Dataset],
-        interp: Optional[Union[bool, str]] = None,
+        interp: Optional[bool] = None,
     ):
         """Return the group index of each element along the main dimension.
 
@@ -248,9 +237,9 @@ def get_index(
         da : Union[xr.DataArray, xr.Dataset]
           The input array/dataset for which the group index is returned.
           It must have Grouper.dim as a coordinate.
-        interp : Union[bool, str]
-          Argument `interp` defaults to `self.interp`. If True, the returned index can be
-          used for interpolation. For month grouping, integer values represent the middle of the month, all other
+        interp : bool, optional
+          If True, the returned index can be used for interpolation. Only value for month
+          grouping, where integer values represent the middle of the month, all other
           days are linearly interpolated in between.
 
         Returns
@@ -275,21 +264,8 @@ def get_index(
                 f"Index {self.name} is not of type int (rather {i.dtype}), but {self.__class__.__name__} requires integer indexes."
             )
 
-        interp = (
-            (interp or self.interp)
-            if not isinstance(interp, str)
-            else interp != "nearest"
-        )
-        if interp:
-            if self.dim == "time":
-                if self.prop == "month":
-                    i = ind.month - 0.5 + ind.day / ind.days_in_month
-                elif self.prop == "dayofyear":
-                    i = ind.dayofyear
-                else:
-                    raise NotImplementedError
-            else:
-                raise NotImplementedError
+        if interp and self.dim == "time" and self.prop == "month":
+            i = ind.month - 0.5 + ind.day / ind.days_in_month
 
         xi = xr.DataArray(
             i,

diff --git a/xclim/sdba/nbutils.py b/xclim/sdba/nbutils.py
@@ -3,7 +3,7 @@
 ---------------------------
 """
 import numpy as np
-from numba import boolean, float32, float64, guvectorize, njit
+from numba import boolean, float32, float64, guvectorize, int64, njit
 from xarray import DataArray
 from xarray.core import utils
 
@@ -172,3 +172,39 @@ def _escore(tgt, sim, out):
 
     w = n1 * n2 / (n1 + n2)
     out[0] = w * (sXY + sXY - sXX - sYY) / 2
+
+
+@njit
+def _first_and_last_nonnull(arr):
+    """For each row of arr, get the first and last non NaN elements."""
+    out = np.empty((arr.shape[0], 2))
+    for i in range(arr.shape[0]):
+        idxs = np.where(~np.isnan(arr[i]))[0]
+        if idxs.size > 0:
+            out[i] = arr[i][idxs[np.array([0, -1])]]
+        else:
+            out[i] = np.array([np.NaN, np.NaN])
+    return out
+
+
+@njit
+def _extrapolate_on_quantiles(interp, oldx, oldg, oldy, newx, newg, method="constant"):
+    """Apply extrapolation to the output of interpolation on quantiles with a given
+    grouping. Arguments are the same as _interp_on_quantiles_2D.
+
+    "constant" extrapolation is done independently for each group.
+    """
+    igrp = np.empty_like(newg)
+    np.around(newg, 0, igrp)
+    igrp = igrp.astype(int64)
+    bnds = _first_and_last_nonnull(oldx)
+    toolow = newx < bnds[:, 0][igrp]
+    toohigh = newx > bnds[:, 1][igrp]
+    if method == "constant":
+        constants = _first_and_last_nonnull(oldy)
+        interp[toolow] = constants[igrp, 0][toolow]
+        interp[toohigh] = constants[igrp, 1][toohigh]
+    else:  # 'nan'
+        interp[toolow] = np.NaN
+        interp[toohigh] = np.NaN
+    return interp