From b206ef20f61f03dcbc991c13b129de4c4835047b Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 17 Nov 2021 11:51:12 -0500 Subject: [PATCH 1/4] Resampling indicator --- xclim/core/formatting.py | 8 +- xclim/core/indicator.py | 255 +++++++++++++------------ xclim/data/anuclim.yml | 23 ++- xclim/data/schema.yml | 1 + xclim/indicators/atmos/_conversion.py | 17 +- xclim/indicators/atmos/_precip.py | 16 +- xclim/indicators/atmos/_temperature.py | 23 +-- xclim/indicators/land/_streamflow.py | 17 +- xclim/indices/_anuclim.py | 23 ++- xclim/testing/tests/test_indicators.py | 39 +++- 10 files changed, 245 insertions(+), 177 deletions(-) diff --git a/xclim/core/formatting.py b/xclim/core/formatting.py index b1ce65ec3..3e3977c26 100644 --- a/xclim/core/formatting.py +++ b/xclim/core/formatting.py @@ -541,8 +541,10 @@ def generate_indicator_docstring(ind): """ header = f"{ind.title} (realm: {ind.realm})\n\n{ind.abstract}\n" - special = f'This indicator will check for missing values according to the method "{ind.missing}".\n' + special = "" + if hasattr(ind, "missing"): # Only ResamplingIndicators + special += f'This indicator will check for missing values according to the method "{ind.missing}".\n' if hasattr(ind.compute, "__module__"): special += f"Based on indice :py:func:`~{ind.compute.__module__}.{ind.compute.__name__}`.\n" if ind.injected_parameters: @@ -554,7 +556,9 @@ def generate_indicator_docstring(ind): if ind.keywords: special += f"Keywords : {ind.keywords}.\n" - parameters = _gen_parameters_section(ind.parameters, ind.allowed_periods) + parameters = _gen_parameters_section( + ind.parameters, getattr(ind, "allowed_periods", None) + ) returns = _gen_returns_section(ind.cf_attrs) diff --git a/xclim/core/indicator.py b/xclim/core/indicator.py index 3c1dc7faa..ffd96ae9c 100644 --- a/xclim/core/indicator.py +++ b/xclim/core/indicator.py @@ -92,6 +92,7 @@ from collections import OrderedDict, defaultdict from copy import deepcopy from dataclasses import asdict, dataclass +from functools import reduce from inspect import Parameter as _Parameter from inspect import Signature from inspect import _empty as _empty_default @@ -293,19 +294,10 @@ class Indicator(IndicatorRegistrar): notes: str Notes regarding computing function, for example the mathematical formulation. Parsed from `compute` docstring if None (form the "Notes" section). - missing: {any, wmo, pct, at_least_n, skip, from_context} - The name of the missing value method. See `xclim.core.missing.MissingBase` to create new custom methods. If - None, this will be determined by the global configuration (see `xclim.set_options`). Defaults to "from_context". - missing_options : dict, None - Arguments to pass to the `missing` function. If None, this will be determined by the global configuration. - freq: str, sequence of strings, optional + src_freq: str, sequence of strings, optional The expected frequency of the input data. Can be a list for multiple frequencies, or None if irrelevant. context: str The `pint` unit context, for example use 'hydro' to allow conversion from kg m-2 s-1 to mm/day. - allowed_periods : Sequence[str], optional - A list of allowed periods, i.e. base parts of the `freq` parameter. For example, indicators meant to be - computed annually only will have `allowed_periods=["A"]`. `None` means "any period" or that the - indicator doesn't take a `freq` argument. Notes ----- @@ -327,18 +319,15 @@ class Indicator(IndicatorRegistrar): # metadata fields that are formatted as free text (first letter capitalized) _text_fields = ["long_name", "description", "comment"] # Class attributes that are function (so we know which to convert to static methods) - _funcs = ["compute", "cfcheck", "datacheck"] + _funcs = ["compute"] # Mapping from name in the compute function to official (CMIP6) variable name _variable_mapping = {} # Will become the class's name identifier = None - missing = "from_context" - missing_options = None context = "none" - freq = None - allowed_periods = None + src_freq = None # Global metadata (must be strings, not attributed to the output) realm = None @@ -424,11 +413,6 @@ def __new__(cls, **kwds): # All updates done. kwds["_all_parameters"] = parameters - # By default skip missing values handling if there is no resampling. - # Dont only check if freq is in current parameters but also if it was injected earlier. - if "freq" not in parameters: - kwds["missing"] = "skip" - # Parse kwds to organize `cf_attrs` # And before converting callables to staticmethods kwds["cf_attrs"] = cls._parse_output_attrs(kwds, identifier) @@ -562,8 +546,8 @@ def _parse_var_mapping(cls, variable_mapping, parameters, kwds): new_variable_mapping.update(variable_mapping) kwds["_variable_mapping"] = new_variable_mapping - @staticmethod - def _ensure_correct_parameters(parameters): + @classmethod + def _ensure_correct_parameters(cls, parameters): """Ensure the parameters are correctly set and ordered. Sets the correct variable default to be sure. @@ -709,16 +693,6 @@ def __init__(self, **kwds): """Run checks and organizes the metadata.""" # keywords of kwds that are class attributes have already been set in __new__ self._check_identifier(self.identifier) - if self.missing == "from_context" and self.missing_options is not None: - raise ValueError( - "Cannot set `missing_options` with `missing` method being from context." - ) - - # Validate hard-coded missing options - kls = MISSING_METHODS[self.missing] - self._missing = kls.execute - if self.missing_options: - kls.validate(**self.missing_options) # Validation is done : register the instance. super().__init__() @@ -779,6 +753,23 @@ def __call__(self, *args, **kwds): # indexer: If present, the "indexer" kwargs <- this is needed by _update_attrs and _mask das, params, indexer = self._parse_variables_from_call(args, kwds) + das, params, indexer = self._preprocess_and_checks(das, params, indexer) + + # Get correct variable names for the compute function. + inv_var_map = dict(map(reversed, self._variable_mapping.items())) + compute_das = {inv_var_map.get(nm, nm): das[nm] for nm in das} + # Compute the indicator values, ignoring NaNs and missing values. + outs = self.compute(**compute_das, **params, **indexer) + + if isinstance(outs, DataArray): + outs = [outs] + + if len(outs) != self.n_outs: + raise ValueError( + f"Indicator {self.identifier} was wrongly defined. Expected " + f"{self.n_outs} outputs, got {len(outs)}." + ) + # Metadata attributes from templates var_id = None cf_attrs = [] @@ -796,37 +787,6 @@ def __call__(self, *args, **kwds): ) ) - # Pre-computation validation checks on DataArray arguments - self._bind_call(self.datacheck, **das) - self._bind_call(self.cfcheck, **das) - - # Check if the period is allowed: - if ( - self.allowed_periods is not None - and "freq" in params - and parse_offset(params["freq"])[1] not in self.allowed_periods - ): - raise ValueError( - f"Resampling frequency {params['freq']} is not allowed for indicator " - f"{self.identifier} (needs something equivalent to one " - f"of {self.allowed_periods})." - ) - - # Get correct variable names for the compute function. - inv_var_map = dict(map(reversed, self._variable_mapping.items())) - compute_das = {inv_var_map.get(nm, nm): das[nm] for nm in das} - # Compute the indicator values, ignoring NaNs and missing values. - outs = self.compute(**compute_das, **params, **indexer) - - if isinstance(outs, DataArray): - outs = [outs] - - if len(outs) != self.n_outs: - raise ValueError( - f"Indicator {self.identifier} was wrongly defined. Expected " - f"{self.n_outs} outputs, got {len(outs)}." - ) - # Convert to output units outs = [ convert_units_to(out, attrs.get("units", ""), self.context) @@ -839,11 +799,7 @@ def __call__(self, *args, **kwds): out.attrs.update(attrs) out.name = var_name - if self.missing != "skip": - # Mask results that do not meet criteria defined by the `missing` method. - # This means all outputs must have the same dimensions as the broadcasted inputs (excluding time) - mask = self._mask(*das.values(), indexer=indexer, **params) - outs = [out.where(~mask) for out in outs] + outs = self._postprocess(outs, das, params, indexer) # Return a single DataArray in case of single output, otherwise a tuple if self.n_outs == 1: @@ -902,6 +858,18 @@ def _assign_named_args(self, ba): f"dataset (got {name}='{ba.arguments[name]}')" ) + def _preprocess_and_checks(self, das, params, indexer): + """Actions to be done after parsing the arguments and before computing.""" + # Pre-computation validation checks on DataArray arguments + self._bind_call(self.datacheck, **das) + self._bind_call(self.cfcheck, **das) + + return das, params, indexer + + def _postprocess(self, outs, das, params, indexer): + """Actions to done after computing.""" + return outs + def _bind_call(self, func, **das): """Call function using `__call__` `DataArray` arguments. @@ -1185,29 +1153,6 @@ def _format( return out - def _default_freq(self, **indexer): - """Return default frequency.""" - if self.freq in ["D", "H"]: - return indices.generic.default_freq(**indexer) - return None - - def _mask(self, *args, indexer=None, **kwds): - """Return whether mask for output values, based on the output of the `missing` method.""" - from functools import reduce - - freq = kwds.get("freq", self._default_freq(**(indexer or {}))) - - options = self.missing_options or OPTIONS[MISSING_OPTIONS].get(self.missing, {}) - - # We flag periods according to the missing method. skip variables without a time coordinate. - src_freq = self.freq if isinstance(self.freq, str) else None - miss = ( - self._missing(da, freq, src_freq, options, indexer) - for da in args - if "time" in da.coords - ) - return reduce(np.logical_or, miss) - # The following static methods are meant to be replaced to define custom indicators. @staticmethod def compute(*args, **kwds): @@ -1217,8 +1162,7 @@ def compute(*args, **kwds): """ raise NotImplementedError - @staticmethod - def cfcheck(**das): + def cfcheck(self, **das): """Compare metadata attributes to CF-Convention standards. Default cfchecks use the specifications in `xclim.core.utils.VARIABLES`, @@ -1234,19 +1178,22 @@ def cfcheck(**das): # Silently ignore unknown variables. pass - @staticmethod - def datacheck(**das): + def datacheck(self, **das): """Verify that input data is valid. When subclassing this method, use functions decorated using `xclim.core.options.datacheck`. For example, checks could include: - * assert temporal frequency is daily * assert no precipitation is negative * assert no temperature has the same value 5 days in a row + + This base datacheck checks that the input data has a valid sampling frequency, as given in self.src_freq. """ - pass + if self.src_freq is not None: + for key, da in das.items(): + if "time" in da.coords and da.time.ndim == 1 and len(da.time) > 3: + datachecks.check_freq(da, self.src_freq, strict=True) def __getattr__(self, attr): if attr in self._cf_names: @@ -1285,47 +1232,109 @@ def injected_parameters(self): } -class Daily(Indicator): - """Indicator defined for inputs at daily frequency.""" +class ResamplingIndicator(Indicator): + """Indicator that performs a resampling computation. - freq = "D" + Compared to the base Indicator, this adds the handling of missing data, + and the check of allowed periods. - @staticmethod - def datacheck(**das): # noqa - for key, da in das.items(): - if "time" in da.coords and da.time.ndim == 1 and len(da.time) > 3: - datachecks.check_daily(da) + Parameters + ---------- + missing: {any, wmo, pct, at_least_n, skip, from_context} + The name of the missing value method. See `xclim.core.missing.MissingBase` to create new custom methods. If + None, this will be determined by the global configuration (see `xclim.set_options`). Defaults to "from_context". + missing_options : dict, None + Arguments to pass to the `missing` function. If None, this will be determined by the global configuration. + allowed_periods : Sequence[str], optional + A list of allowed periods, i.e. base parts of the `freq` parameter. For example, indicators meant to be + computed annually only will have `allowed_periods=["A"]`. `None` means "any period" or that the + indicator doesn't take a `freq` argument. + """ + missing = "from_context" + missing_options = None + allowed_periods = None -class Hourly(Indicator): - """Indicator defined for inputs at strict hourly frequency, meaning 3-hourly inputs would raise an error.""" + @classmethod + def _ensure_correct_parameters(cls, parameters): + if "freq" not in parameters: + raise ValueError( + "ResamplingIndicator require a 'freq' argument, use the base Indicator" + " class if your computation doesn't perform any resampling." + ) + return super()._ensure_correct_parameters(parameters) - freq = "H" + def __init__(self, **kwds): + if self.missing == "from_context" and self.missing_options is not None: + raise ValueError( + "Cannot set `missing_options` with `missing` method being from context." + ) - @staticmethod - def datacheck(**das): # noqa - for key, da in das.items(): - datachecks.check_freq(da, "H") + # Validate hard-coded missing options + kls = MISSING_METHODS[self.missing] + self._missing = kls.execute + if self.missing_options: + kls.validate(**self.missing_options) + super().__init__(**kwds) -class DailyWeeklyMonthly(Indicator): - """Indicator defined for inputs at daily, weekly or monthly frequencies. + def _preprocess_and_checks(self, das, params, indexer): + """Perform parent's checks and also checks if freq is allowed.""" + das, params, indexer = super()._preprocess_and_checks(das, params, indexer) - Required by ANUCLIM indicators. - """ + # Check if the period is allowed: + if ( + self.allowed_periods is not None + and parse_offset(params["freq"])[1] not in self.allowed_periods + ): + raise ValueError( + f"Resampling frequency {params['freq']} is not allowed for indicator " + f"{self.identifier} (needs something equivalent to one " + f"of {self.allowed_periods})." + ) - freq = ["D", "7D", "M"] + return das, params, indexer - @staticmethod - def datacheck(**das): # noqa - for key, da in das.items(): - if "time" in da.coords and da.time.ndim == 1 and len(da.time) > 3: - datachecks.check_freq(da, ["D", "7D", "M"], strict=True) + def _postprocess(self, outs, das, params, indexer): + """Masking of missing values.""" + outs = super()._postprocess(outs, das, params, indexer) + + if self.missing != "skip": + # Mask results that do not meet criteria defined by the `missing` method. + # This means all outputs must have the same dimensions as the broadcasted inputs (excluding time) + options = self.missing_options or OPTIONS[MISSING_OPTIONS].get( + self.missing, {} + ) + + # We flag periods according to the missing method. skip variables without a time coordinate. + src_freq = self.src_freq if isinstance(self.src_freq, str) else None + miss = ( + self._missing(da, params["freq"], src_freq, options, indexer) + for da in das.values() + if "time" in da.coords + ) + mask = reduce(np.logical_or, miss) + outs = [out.where(~mask) for out in outs] + + return outs + + +class Daily(ResamplingIndicator): + """Class for daily inputs and resampling computes.""" + + src_freq = "D" + + +class Hourly(ResamplingIndicator): + """Class for hourly inputs and resampling computes.""" + + src_freq = "H" +base_registry["Indicator"] = Indicator +base_registry["ResamplingIndicator"] = ResamplingIndicator base_registry["Hourly"] = Hourly base_registry["Daily"] = Daily -base_registry["DailyWeeklyMonthly"] = DailyWeeklyMonthly def add_iter_indicators(module): diff --git a/xclim/data/anuclim.yml b/xclim/data/anuclim.yml index 0545cac13..1a4c5f834 100644 --- a/xclim/data/anuclim.yml +++ b/xclim/data/anuclim.yml @@ -12,10 +12,11 @@ doc: | definitions and can calculate the result with daily input data. .. [ANUCLIM] https://fennerschool.anu.edu.au/files/anuclim61.pdf (ch. 6) -base: DailyWeeklyMonthly +base: ResamplingIndicator indicators: P10_MeanTempWarmestQuarter: allowed_periods: [A] + src_freq: ['D', '7D', 'M'] compute: tg_mean_warmcold_quarter cf_attrs: standard_name: air_temperature @@ -25,6 +26,7 @@ indicators: op: warmest P11_MeanTempColdestQuarter: allowed_periods: [A] + src_freq: ['D', '7D', 'M'] compute: tg_mean_warmcold_quarter cf_attrs: standard_name: air_temperature @@ -34,6 +36,7 @@ indicators: op: coldest P12_AnnualPrecip: allowed_periods: [A] + src_freq: ['D', '7D', 'M'] compute: prcptot cf_attrs: long_name: Annual Precipitation @@ -42,6 +45,7 @@ indicators: units: mm P13_PrecipWettestPeriod: allowed_periods: [A] + src_freq: ['D', '7D', 'M'] compute: prcptot_wetdry_period cf_attrs: standard_name: lwe_thickness_of_precipitation_amount @@ -51,6 +55,7 @@ indicators: op: wettest P14_PrecipDriestPeriod: allowed_periods: [A] + src_freq: ['D', '7D', 'M'] compute: prcptot_wetdry_period cf_attrs: standard_name: lwe_thickness_of_precipitation_amount @@ -59,12 +64,15 @@ indicators: parameters: op: driest P15_PrecipSeasonality: + allowed_periods: [A] + src_freq: ['D', '7D', 'M'] compute: precip_seasonality cf_attrs: cell_methods: "time: standard_deviation" description: "The standard deviation of the precipitation estimates expressed as a percentage of the mean of those estimates." P16_PrecipWettestQuarter: allowed_periods: [A] + src_freq: ['D', '7D', 'M'] compute: prcptot_wetdry_quarter cf_attrs: standard_name: lwe_thickness_of_precipitation_amount @@ -74,6 +82,7 @@ indicators: op: wettest P17_PrecipDriestQuarter: allowed_periods: [A] + src_freq: ['D', '7D', 'M'] compute: prcptot_wetdry_quarter cf_attrs: standard_name: lwe_thickness_of_precipitation_amount @@ -82,6 +91,7 @@ indicators: parameters: op: driest P18_PrecipWarmestQuarter: + src_freq: ['D', '7D', 'M'] allowed_periods: [A] compute: prcptot_warmcold_quarter cf_attrs: @@ -91,6 +101,7 @@ indicators: parameters: op: warmest P19_PrecipColdestQuarter: + src_freq: ['D', '7D', 'M'] allowed_periods: [A] compute: prcptot_warmcold_quarter cf_attrs: @@ -101,6 +112,7 @@ indicators: op: coldest P1_AnnMeanTemp: allowed_periods: [A] + src_freq: ['D', '7D', 'M'] compute: tg_mean cf_attrs: units: K @@ -109,6 +121,7 @@ indicators: standard_name: air_temperature P2_MeanDiurnalRange: allowed_periods: [A] + src_freq: ['D', '7D', 'M'] compute: daily_temperature_range cf_attrs: units: K @@ -116,17 +129,21 @@ indicators: cell_methods: "time: range" P3_Isothermality: allowed_periods: [A] + src_freq: ['D', '7D', 'M'] compute: isothermality cf_attrs: cell_methods: "time: range" description: "The mean diurnal range (P2) divided by the Annual Temperature Range (P7)." P4_TempSeasonality: + allowed_periods: [A] + src_freq: ['D', '7D', 'M'] compute: temperature_seasonality cf_attrs: cell_methods: "time: standard_deviation" description: "The standard deviation of the mean temperatures expressed as a percentage of the mean of those temperatures. For this calculation, the mean in degrees Kelvin is used. This avoids the possibility of having to divide by zero, but it does mean that the values are usually quite small." P5_MaxTempWarmestPeriod: allowed_periods: [A] + src_freq: ['D', '7D', 'M'] compute: tx_max cf_attrs: long_name: Max Temperature of Warmest Period @@ -136,6 +153,7 @@ indicators: cell_methods: "time: maximum" P6_MinTempColdestPeriod: allowed_periods: [A] + src_freq: ['D', '7D', 'M'] compute: tn_min cf_attrs: long_name: Min Temperature of Coldest Period @@ -145,6 +163,7 @@ indicators: cell_methods: "time: minimum" P7_TempAnnualRange: allowed_periods: [A] + src_freq: ['D', '7D', 'M'] compute: extreme_temperature_range input: low_data: tasmin @@ -158,6 +177,7 @@ indicators: default: YS P8_MeanTempWettestQuarter: allowed_periods: [A] + src_freq: ['D', '7D', 'M'] compute: tg_mean_wetdry_quarter cf_attrs: standard_name: air_temperature @@ -167,6 +187,7 @@ indicators: op: wettest P9_MeanTempDriestQuarter: allowed_periods: [A] + src_freq: ['D', '7D', 'M'] compute: tg_mean_wetdry_quarter cf_attrs: standard_name: air_temperature diff --git a/xclim/data/schema.yml b/xclim/data/schema.yml index 6b4c471c3..64e7e0228 100644 --- a/xclim/data/schema.yml +++ b/xclim/data/schema.yml @@ -10,6 +10,7 @@ indicators: map(include('indicator'), key=regex('^[-\w]+$')) indicator: abstract: str(required=False) allowed_periods: list(enum('A', 'Q', 'M', 'W'), required=False) + src_freq: list(str(), required=False) base: str(required=False) compute: str(required=False) input: map(str(), key=str(), required=False) diff --git a/xclim/indicators/atmos/_conversion.py b/xclim/indicators/atmos/_conversion.py index 7421ce48a..0056da9c4 100644 --- a/xclim/indicators/atmos/_conversion.py +++ b/xclim/indicators/atmos/_conversion.py @@ -19,14 +19,13 @@ "wind_chill_index", "potential_evapotranspiration", "water_budget", + "corn_heat_units", ] class Converter(Indicator): """Class for indicators doing variable conversion (dimension-independent 1-to-1 computation).""" - missing = "skip" - humidex = Converter( identifier="humidex", @@ -238,3 +237,17 @@ class Converter(Indicator): ), compute=indices.water_budget, ) + + +corn_heat_units = Converter( + identifier="corn_heat_units", + units="", + long_name="Corn heat units (Tmin > {thresh_tasmin} and Tmax > {thresh_tasmax}).", + description="Temperature-based index used to estimate the development of corn crops. " + "Corn growth occurs when the minimum and maximum daily temperature both exceeds " + "specific thresholds : Tmin > {thresh_tasmin} and Tmax > {thresh_tasmax}.", + var_name="chu", + cell_methods="", + missing="skip", + compute=indices.corn_heat_units, +) diff --git a/xclim/indicators/atmos/_precip.py b/xclim/indicators/atmos/_precip.py index 7b9c5aa48..ee113ac25 100644 --- a/xclim/indicators/atmos/_precip.py +++ b/xclim/indicators/atmos/_precip.py @@ -4,7 +4,7 @@ from xclim import indices from xclim.core import cfchecks -from xclim.core.indicator import Daily, Hourly +from xclim.core.indicator import Daily, Hourly, Indicator from xclim.core.utils import InputKind __all__ = [ @@ -35,6 +35,13 @@ ] +class FireWeather(Indicator): + """Non resampling - precipitation related indicators.""" + + src_freq = "D" + context = "hydro" + + class Precip(Daily): """Indicator involving daily pr series.""" @@ -46,8 +53,7 @@ class PrTasx(Daily): context = "hydro" - @staticmethod - def cfcheck(pr, tas): + def cfcheck(self, pr, tas): cfchecks.cfcheck_from_name("pr", pr) cfchecks.check_valid(tas, "standard_name", "air_temperature") @@ -210,7 +216,7 @@ class HrPrecip(Hourly): parameters={"tas": {"kind": InputKind.VARIABLE}, "phase": "solid"}, ) -drought_code = Precip( +drought_code = FireWeather( identifier="dc", units="", standard_name="drought_code", @@ -220,7 +226,7 @@ class HrPrecip(Hourly): missing="skip", ) -fire_weather_indexes = Precip( +fire_weather_indexes = FireWeather( identifier="fwi", realm="atmos", var_name=["dc", "dmc", "ffmc", "isi", "bui", "fwi"], diff --git a/xclim/indicators/atmos/_temperature.py b/xclim/indicators/atmos/_temperature.py index 11d502b57..c09ea4b53 100644 --- a/xclim/indicators/atmos/_temperature.py +++ b/xclim/indicators/atmos/_temperature.py @@ -5,7 +5,7 @@ from xclim import indices from xclim.core import cfchecks -from xclim.core.indicator import Daily +from xclim.core.indicator import Daily, Indicator from xclim.core.utils import InputKind __all__ = [ @@ -74,7 +74,6 @@ "warm_spell_duration_index", "maximum_consecutive_warm_days", "fire_season", - "corn_heat_units", "huglin_index", "biologically_effective_degree_days", "latitude_temperature_index", @@ -849,11 +848,10 @@ class Temp(Daily): ) -class FireSeasonBase(Daily): - """Special Indicator class for FireSeason that accepts any tas[min/max] and optional snd.""" +class FireSeasonBase(Indicator): + """Special Indicator class for FireSeason that accepts any tas[min/max] and optional snd and is not resampling.""" - @staticmethod - def cfcheck(tas, snd=None): + def cfcheck(self, tas, snd=None): cfchecks.check_valid(tas, "standard_name", "air_temperature") cfchecks.cfcheck_from_name("snd", snd) @@ -866,19 +864,6 @@ def cfcheck(tas, snd=None): ) -corn_heat_units = Temp( - identifier="corn_heat_units", - units="", - long_name="Corn heat units (Tmin > {thresh_tasmin} and Tmax > {thresh_tasmax}).", - description="Temperature-based index used to estimate the development of corn crops. " - "Corn growth occurs when the minimum and maximum daily temperature both exceeds " - "specific thresholds : Tmin > {thresh_tasmin} and Tmax > {thresh_tasmax}.", - var_name="chu", - cell_methods="", - missing="skip", - compute=indices.corn_heat_units, -) - huglin_index = Temp( identifier="huglin_index", units="", diff --git a/xclim/indicators/land/_streamflow.py b/xclim/indicators/land/_streamflow.py index 162c7d3a3..9029b36d4 100644 --- a/xclim/indicators/land/_streamflow.py +++ b/xclim/indicators/land/_streamflow.py @@ -1,7 +1,7 @@ """Streamflow indicator definitions.""" from xclim.core.cfchecks import check_valid -from xclim.core.indicator import Daily +from xclim.core.indicator import Daily, Indicator from xclim.core.units import declare_units from xclim.indices import base_flow_index, generic, rb_flashiness_index from xclim.indices.stats import fit as _fit @@ -49,17 +49,10 @@ class FA(Streamflow): # Disable the daily checks because the inputs are period extremas. -class Fit(Streamflow): - freq = None - missing = "at_least_n" - # Do not set `missing_options` so it can be overriden by `set_options`. +class Fit(Indicator): + src_freq = None - @staticmethod - def cfcheck(**das): - pass - - @staticmethod - def datacheck(**das): + def cfcheck(self, **das): pass @@ -111,8 +104,6 @@ def datacheck(**das): title="Distribution parameters fitted over the time dimension.", cell_methods="time: fit", compute=declare_units(da=None)(_fit), - missing="skip", - missing_options=None, ) diff --git a/xclim/indices/_anuclim.py b/xclim/indices/_anuclim.py index cbe7b82a7..17b3f3f4e 100644 --- a/xclim/indices/_anuclim.py +++ b/xclim/indices/_anuclim.py @@ -97,7 +97,9 @@ def isothermality( @declare_units(tas="[temperature]") -def temperature_seasonality(tas: xarray.DataArray) -> xarray.DataArray: +def temperature_seasonality( + tas: xarray.DataArray, freq: str = "YS" +) -> xarray.DataArray: r"""ANUCLIM temperature seasonality (coefficient of variation). The annual temperature coefficient of variation expressed in percent. Calculated as the standard deviation @@ -107,11 +109,14 @@ def temperature_seasonality(tas: xarray.DataArray) -> xarray.DataArray: ---------- tas : xarray.DataArray Mean temperature at daily, weekly, or monthly frequency. + freq : Returns ------- xarray.DataArray, [%] Mean temperature coefficient of variation + freq : str + Resampling frequency. Examples -------- @@ -136,16 +141,14 @@ def temperature_seasonality(tas: xarray.DataArray) -> xarray.DataArray: tas = convert_units_to(tas, "K") with xarray.set_options(keep_attrs=True): - seas = 100 * _anuclim_coeff_var(tas) + seas = 100 * _anuclim_coeff_var(tas, freq=freq) seas.attrs["units"] = "%" return seas @declare_units(pr="[precipitation]") -def precip_seasonality( - pr: xarray.DataArray, -) -> xarray.DataArray: +def precip_seasonality(pr: xarray.DataArray, freq: str = "YS") -> xarray.DataArray: r"""ANUCLIM Precipitation Seasonality (C of V). The annual precipitation Coefficient of Variation (C of V) expressed in percent. Calculated as the standard deviation @@ -156,6 +159,8 @@ def precip_seasonality( pr : xarray.DataArray Total precipitation rate at daily, weekly, or monthly frequency. Units need to be defined as a rate (e.g. mm d-1, mm week-1). + freq : str + Resampling frequency. Returns ------- @@ -190,7 +195,7 @@ def precip_seasonality( pr = convert_units_to(pr, "mm d-1") with xarray.set_options(keep_attrs=True): - seas = 100 * _anuclim_coeff_var(pr) + seas = 100 * _anuclim_coeff_var(pr, freq=freq) seas.attrs["units"] = "%" return seas @@ -456,10 +461,10 @@ def prcptot_wetdry_period( ) -def _anuclim_coeff_var(arr: xarray.DataArray) -> xarray.DataArray: +def _anuclim_coeff_var(arr: xarray.DataArray, freq: str = "YS") -> xarray.DataArray: """Calculate the annual coefficient of variation for ANUCLIM indices.""" - std = arr.resample(time="YS").std(dim="time") - mu = arr.resample(time="YS").mean(dim="time") + std = arr.resample(time=freq).std(dim="time") + mu = arr.resample(time=freq).mean(dim="time") return std / mu diff --git a/xclim/testing/tests/test_indicators.py b/xclim/testing/tests/test_indicators.py index 5a6019398..9a3dfd7da 100644 --- a/xclim/testing/tests/test_indicators.py +++ b/xclim/testing/tests/test_indicators.py @@ -127,7 +127,8 @@ def multioptvar_compute( return tas -multiOptVar = Daily( +multiOptVar = Indicator( + src_freq="D", realm="atmos", identifier="multiopt", cf_attrs=[dict(units="K")], @@ -569,7 +570,7 @@ def test_indicator_from_dict(): def test_indicator_errors(): - def func(data: xr.DataArray, thresh: str = "0 degC"): + def func(data: xr.DataArray, thresh: str = "0 degC", freq: str = "YS"): return data doc = [ @@ -583,6 +584,8 @@ def func(data: xr.DataArray, thresh: str = "0 degC"): " A variable.", " thresh : str", " A threshold", + " freq : str", + " The resampling frequency.", "", " Returns", " -------", @@ -611,7 +614,7 @@ def func(data: xr.DataArray, thresh: str = "0 degC"): d["identifier"] = "bad_indi" d["module"] = "test" - bad_doc = doc[:10] + [" extra: bool", " Woupsi"] + doc[10:] + bad_doc = doc[:12] + [" extra: bool", " Woupsi"] + doc[12:] func.__doc__ = "\n".join(bad_doc) with pytest.raises(ValueError, match="Malformed docstring"): Daily(**d) @@ -642,6 +645,25 @@ def func(data: xr.DataArray, thresh: str = "0 degC"): with pytest.raises(AttributeError, match="Indicator's realm must be given as one"): Daily(**d) + def func(data: xr.DataArray, thresh: str = "0 degC"): + return data + + func.__doc__ = "\n".join(doc[:10] + doc[12:]) + d = dict( + realm="atmos", + cf_attrs=dict( + var_name="tmean{threshold}", + units="K", + long_name="{freq} mean surface temperature", + standard_name="{freq} mean temperature", + cell_methods=[{"time": "mean within days"}], + ), + compute=func, + input={"data": "tas"}, + ) + with pytest.raises(ValueError, match="ResamplingIndicator require a 'freq'"): + ind = Daily(identifier="indi", module="test", **d) + def test_indicator_call_errors(tas_series): tas = tas_series(np.arange(730), start="2001-01-01") @@ -652,3 +674,14 @@ def test_indicator_call_errors(tas_series): with pytest.raises(TypeError, match="got an unexpected keyword argument 'oups'"): uniIndTemp(tas, oups=3) + + +def test_resamplingIndicator_new_error(): + with pytest.raises(ValueError, match="ResamplingIndicator require a 'freq'"): + Daily( + realm="atmos", + identifier="multiopt", + cf_attrs=[dict(units="K")], + module="test", + compute=multioptvar_compute, + ) From 02cdeab20bdd2dc01eb210620e3924be1d3ab613 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 17 Nov 2021 12:10:30 -0500 Subject: [PATCH 2/4] upd hist --- HISTORY.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/HISTORY.rst b/HISTORY.rst index 9fbb512f5..ed8477fc6 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -9,11 +9,13 @@ Contributors to this version: Pascal Bourgault (:user:`aulemahal`), Travis Logan New features and enhancements ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * Added an optimized pathway for ``xclim.indices.run_length`` functions when ``window=1``. (:pull:`911`, :issue:`910`). +* The data input frequency expected by ``Indicator``s is now in the ``src_freq`` attribute and is thus controlable by subclassing existing indicators. (:issue:`898`, :pull:`927`). Internal changes ~~~~~~~~~~~~~~~~ * Removed some logging configurations in ``dataflags`` that were polluting python's main logging configuration. (:pull:`909`). * Synchronized logging formatters in `xclim.ensembles` and `xclim.core.utils`. (:pull:`909`). +* Split of resampling-related functionality of ``Indicator``s into a new ``ResamplingIndicator`` subclass. The use of new (private) methods makes it easier to inject functionality in indicator subclasses. (:issue:`867`, :pull:`927`). Bug fixes ~~~~~~~~~ From dd52b356657a5efc8d73bf5ea3c81336438628e5 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Fri, 19 Nov 2021 11:25:30 -0500 Subject: [PATCH 3/4] Update xclim/core/indicator.py Co-authored-by: David Huard --- xclim/core/indicator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xclim/core/indicator.py b/xclim/core/indicator.py index ffd96ae9c..c2409f39f 100644 --- a/xclim/core/indicator.py +++ b/xclim/core/indicator.py @@ -1279,7 +1279,7 @@ def __init__(self, **kwds): super().__init__(**kwds) def _preprocess_and_checks(self, das, params, indexer): - """Perform parent's checks and also checks if freq is allowed.""" + """Perform parent's checks and also check if freq is allowed.""" das, params, indexer = super()._preprocess_and_checks(das, params, indexer) # Check if the period is allowed: From eb114212ce77af45906b65004ef26542f405bfdc Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Fri, 19 Nov 2021 11:53:12 -0500 Subject: [PATCH 4/4] Add a few semi-related tests --- xclim/core/datachecks.py | 14 ++++----- xclim/testing/tests/test_indicators.py | 40 ++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 7 deletions(-) diff --git a/xclim/core/datachecks.py b/xclim/core/datachecks.py index e4c9bbdca..cbb5273b3 100644 --- a/xclim/core/datachecks.py +++ b/xclim/core/datachecks.py @@ -35,23 +35,23 @@ def check_freq(var: xr.DataArray, freq: Union[str, Sequence[str]], strict: bool exp_base = [parse_offset(frq)[1] for frq in freq] v_freq = xr.infer_freq(var.time) if v_freq is None: - raise ValidationError("Unable to infer the frequency of the time series.") + raise ValidationError( + "Unable to infer the frequency of the time series. " + "To mute this, set xclim's option data_validation='log'." + ) v_base = parse_offset(v_freq)[1] if v_base not in exp_base or ( strict and all(compare_offsets(v_freq, "!=", frq) for frq in freq) ): raise ValidationError( - f"Frequency of time series not {'strictly' if strict else ''} in {freq}" + f"Frequency of time series not {'strictly' if strict else ''} in {freq}. " + "To mute this, set xclim's option data_validation='log'." ) -@datacheck def check_daily(var: xr.DataArray): """Raise an error if not series has a frequency other that daily, or is not monotonically increasing. Note that this does not check for gaps in the series. """ - if xr.infer_freq(var.time) != "D": - raise ValidationError( - "time series is not recognized as daily. You can quiet this error by setting `data_validation` to 'warn' or 'log', in `xclim.set_options`." - ) + return check_freq(var, "D") diff --git a/xclim/testing/tests/test_indicators.py b/xclim/testing/tests/test_indicators.py index 9a3dfd7da..9f59ddb85 100644 --- a/xclim/testing/tests/test_indicators.py +++ b/xclim/testing/tests/test_indicators.py @@ -252,6 +252,22 @@ def test_multiindicator(tas_series): assert tmin.attrs["description"] == "Grouped computation of tmax and tmin" assert tmax.attrs["description"] == "Grouped computation of tmax and tmin" + with pytest.raises(ValueError, match="Output #2 is missing a var_name!"): + ind = Daily( + realm="atmos", + identifier="minmaxtemp2", + cf_attrs=[ + dict( + var_name="tmin", + units="K", + ), + dict( + units="K", + ), + ], + compute=multitemp_compute, + ) + # Attrs passed as keywords - individually ind = Daily( realm="atmos", @@ -270,6 +286,30 @@ def test_multiindicator(tas_series): assert tmax.attrs["description"] == "Grouped computation of tmax and tmin" assert ind.units == ["K", "K"] + # All must be the same length + with pytest.raises(ValueError, match="Attribute var_name has 2 elements"): + ind = Daily( + realm="atmos", + identifier="minmaxtemp3", + var_name=["tmin", "tmax"], + units="K", + standard_name=["Min temp"], + description="Grouped computation of tmax and tmin", + compute=uniindpr_compute, + ) + + ind = Daily( + realm="atmos", + identifier="minmaxtemp4", + var_name=["tmin", "tmax"], + units="K", + standard_name=["Min temp", ""], + description="Grouped computation of tmax and tmin", + compute=uniindtemp_compute, + ) + with pytest.raises(ValueError, match="Indicator minmaxtemp4 was wrongly defined"): + tmin, tmax = ind(tas, freq="YS") + def test_missing(tas_series): a = tas_series(np.ones(365, float), start="1/1/2000")