Skip to content
1 change: 1 addition & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ New features and enhancements
* Added an optimized pathway for ``xclim.indices.run_length`` functions when ``window=1``. (:pull:`911`, :issue:`910`).
* The data input frequency expected by ``Indicator``s is now in the ``src_freq`` attribute and is thus controllable by subclassing existing indicators. (:issue:`898`, :pull:`927`).
* New ``**indexer`` keyword args added to many indicators, it accepts the same arguments as ``xclim.indices.generic.select_time``, which has been improved. Unless otherwise specified, the time selection is done before any computation. (:pull:`934`, :issue:`899`).
* Rewrite of ``xclim.sdba.ExtremeValues``, now fixed with a correct algorithm. It has not been tested extensively and should be considered experimental. (:pull:`914`, :issue:`789`, :issue:`790`).

Breaking changes
^^^^^^^^^^^^^^^^
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.31.4-beta
current_version = 0.31.5-beta
commit = True
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+).(?P<patch>\d+)(\-(?P<release>[a-z]+))?
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
AUTHOR = "Travis Logan"
AUTHOR_EMAIL = "logan.travis@ouranos.ca"
REQUIRES_PYTHON = ">=3.7.0"
VERSION = "0.31.4-beta"
VERSION = "0.31.5-beta"
LICENSE = "Apache Software License 2.0"

with open("README.rst") as readme_file:
Expand Down
2 changes: 1 addition & 1 deletion xclim/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

__author__ = """Travis Logan"""
__email__ = "logan.travis@ouranos.ca"
__version__ = "0.31.4-beta"
__version__ = "0.31.5-beta"


# Load official locales
Expand Down
120 changes: 120 additions & 0 deletions xclim/sdba/_adjustment.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import numpy as np
import xarray as xr

from xclim.indices.stats import _fitfunc_1d

from . import nbutils as nbu
from . import utils as u
from .base import Grouper, map_blocks, map_groups
Expand Down Expand Up @@ -289,3 +291,121 @@ def npdf_transform(ds: xr.Dataset, **kwargs) -> xr.Dataset:
"escores": escores,
}
)


def _fit_on_cluster(data, thresh, dist, cluster_thresh):
Comment thread
aulemahal marked this conversation as resolved.
"""Extract clusters on 1D data and fit "dist" on the maximums."""
_, _, _, maximums = u.get_clusters_1d(data, thresh, cluster_thresh)
params = list(
_fitfunc_1d(maximums - thresh, dist=dist, floc=0, nparams=3, method="ML")
)
# We forced 0, put back thresh.
params[-2] = thresh
return params


def _extremes_train_1d(ref, hist, ref_params, *, q_thresh, cluster_thresh, dist, N):
Comment thread
aulemahal marked this conversation as resolved.
"""Train for method ExtremeValues, only for 1D input along time."""
# Find quantile q_thresh
thresh = (
np.quantile(ref[ref >= cluster_thresh], q_thresh)
+ np.quantile(hist[hist >= cluster_thresh], q_thresh)
) / 2

# Fit genpareto on cluster maximums on ref (if needed) and hist.
if np.isnan(ref_params).all():
ref_params = _fit_on_cluster(ref, thresh, dist, cluster_thresh)

hist_params = _fit_on_cluster(hist, thresh, dist, cluster_thresh)

# Find probabilities of extremes according to fitted dist
Px_ref = dist.cdf(ref[ref >= thresh], *ref_params)
hist = hist[hist >= thresh]
Px_hist = dist.cdf(hist, *hist_params)

# Find common probabilities range.
Pmax = min(Px_ref.max(), Px_hist.max())
Pmin = max(Px_ref.min(), Px_hist.min())
Pcommon = (Px_hist <= Pmax) & (Px_hist >= Pmin)
Px_hist = Px_hist[Pcommon]

# Find values of hist extremes if they followed ref's distribution.
hist_in_ref = dist.ppf(Px_hist, *ref_params)

# Adjustment factors, unsorted
af = hist_in_ref / hist[Pcommon]
# sort them in Px order, and pad to have N values.
order = np.argsort(Px_hist)
px_hist = np.pad(Px_hist[order], ((0, N - af.size),), constant_values=np.NaN)
af = np.pad(af[order], ((0, N - af.size),), constant_values=np.NaN)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

constant_values=np.NaN causes NaNs to appear in the result of adjust if some values in sim are above max(Pcommon)

Suggested change
af = np.pad(af[order], ((0, N - af.size),), constant_values=np.NaN)
af = np.pad(af[order], ((0, N - af.size),), constant_values=af[order][-1])

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I got this fixed by tweaking extrapolate_qm and interp_on_quantiles to ignore NaNs.

Maybe @huard could have a look on these changes. I modified the output of extrapolate_qm in the case of method='nan' (not really related to this PR). And interp_on_quantiles now drops ANY NaNs in the inputs EXCEPT for those expected extrapolated bounds.


return px_hist, af, thresh


@map_blocks(
reduces=["time"], px_hist=["quantiles"], af=["quantiles"], thresh=[Grouper.PROP]
)
def extremes_train(ds, *, group, q_thresh, cluster_thresh, dist, quantiles):
px_hist, af, thresh = xr.apply_ufunc(
_extremes_train_1d,
ds.ref,
ds.hist,
ds.ref_params or np.NaN,
input_core_dims=[("time",), ("time",), ()],
output_core_dims=[("quantiles",), ("quantiles",), ()],
vectorize=True,
kwargs={
"q_thresh": q_thresh,
"cluster_thresh": cluster_thresh,
"dist": dist,
"N": len(quantiles),
},
)
# Outputs of map_blocks must have dimensions.
if not isinstance(thresh, xr.DataArray):
thresh = xr.DataArray(thresh)
thresh = thresh.expand_dims(group=[1])
return xr.Dataset(
{"px_hist": px_hist, "af": af, "thresh": thresh},
coords={"quantiles": quantiles},
)


def _fit_cluster_and_cdf(data, thresh, dist, cluster_thresh):
"""Fit 1D cluster maximums and immediatly compute CDF."""
fut_params = _fit_on_cluster(data, thresh, dist, cluster_thresh)
return dist.cdf(data, *fut_params)


@map_blocks(reduces=["quantiles", Grouper.PROP], scen=[])
def extremes_adjust(
ds, *, group, frac, power, dist, interp, extrapolation, cluster_thresh
):
# Find probabilities of extremes of fut according to its own cluster-fitted dist.
px_fut = xr.apply_ufunc(
_fit_cluster_and_cdf,
ds.sim,
ds.thresh,
input_core_dims=[["time"], []],
output_core_dims=[["time"]],
kwargs={"dist": dist, "cluster_thresh": cluster_thresh},
vectorize=True,
)

# Extrapolate adjustment factors
af, px_hist = u.extrapolate_qm(
ds.af, ds.px_hist, method=extrapolation, abs_bounds=(0, 1)
)

# Find factors by interpolating from hist probs to fut probs. apply them.
af = u.interp_on_quantiles(px_fut, px_hist, af, method=interp)
scen = u.apply_correction(ds.sim, af, "*")

# Smooth transition function between sim and scen
transition = (
((ds.sim - ds.thresh) / ((ds.sim.max("time")) - ds.thresh)) / frac
) ** power
transition = transition.clip(0, 1)

out = (transition * scen) + ((1 - transition) * ds.scen)
return out.rename("scen").squeeze("group", drop=True).to_dataset()
Loading