Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions doc/recipe/overview.rst
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ data specifications:
``RCP8.5``)
- mip (for CMIP data, key ``mip``, value e.g. ``Amon``, ``Omon``, ``LImon``)
- ensemble member (key ``ensemble``, value e.g. ``r1i1p1``, ``r1i1p1f1``)
- sub-experiment id (key `sub_experiment`, value e.g. `s2000`, `s(2000:2002)`,
for DCPP data only)
- time range (e.g. key-value ``start_year: 1982``, ``end_year: 1990``. Please
note that `yaml`_ interprets numbers with a leading ``0`` as octal numbers,
so we recommend to avoid them. For example, use ``128`` to specify the year
Expand All @@ -97,6 +99,7 @@ For example, a datasets section could be:
- {dataset: CanESM2, project: CMIP5, exp: historical, ensemble: r1i1p1, start_year: 2001, end_year: 2004}
- {dataset: UKESM1-0-LL, project: CMIP6, exp: historical, ensemble: r1i1p1f2, start_year: 2001, end_year: 2004, grid: gn}
- {dataset: EC-EARTH3, alias: custom_alias, project: CMIP6, exp: historical, ensemble: r1i1p1f1, start_year: 2001, end_year: 2004, grid: gn}
- {dataset: HadGEM3-GC31-MM, alias: custom_alias, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: s2000, grid: gn, start_year: 2000, end_year, 2002}

It is possible to define the experiment as a list to concatenate two experiments.
Here it is an example concatenating the `historical` experiment with `rcp85`
Expand Down Expand Up @@ -136,6 +139,14 @@ Please, bear in mind that this syntax can only be used in the ensemble tag.
Also, note that the combination of multiple experiments and ensembles, like
exp: [historical, rcp85], ensemble: [r1i1p1, "r(2:3)i1p1"] is not supported and will raise an error.

The same simplified syntax can be used to add multiple sub-experiment ids:

.. code-block:: yaml

datasets:
- {dataset: MIROC6, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: s(2000:2002), grid: gn, start_year: 2003, end_year: 2004}


Note that this section is not required, as datasets can also be provided in the
Diagnostics_ section.

Expand Down
17 changes: 12 additions & 5 deletions esmvalcore/_data_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,13 +92,21 @@ def select_files(filenames, start_year, end_year):
def _replace_tags(paths, variable):
"""Replace tags in the config-developer's file with actual values."""
if isinstance(paths, str):
paths = (paths.strip('/'), )
paths = set((paths.strip('/'),))
else:
paths = [path.strip('/') for path in paths]
paths = set(path.strip('/') for path in paths)
tlist = set()

for path in paths:
tlist = tlist.union(re.findall(r'{([^}]*)}', path))
if 'sub_experiment' in variable:
new_paths = []
for path in paths:
new_paths.extend((
re.sub(r'(\b{ensemble}\b)', r'{sub_experiment}-\1', path),
re.sub(r'({ensemble})', r'{sub_experiment}-\1', path)
))
tlist.add('sub_experiment')
paths = new_paths
logger.debug(tlist)

for tag in tlist:
Expand All @@ -112,7 +120,6 @@ def _replace_tags(paths, variable):
else:
raise KeyError("Dataset key {} must be specified for {}, check "
"your recipe entry".format(tag, variable))

paths = _replace_tag(paths, original_tag, replacewith)
return paths

Expand All @@ -127,7 +134,7 @@ def _replace_tag(paths, tag, replacewith):
else:
text = _apply_caps(str(replacewith), lower, upper)
result.extend(p.replace('{' + tag + '}', text) for p in paths)
return result
return list(set(result))


def _get_caps_options(tag):
Expand Down
37 changes: 22 additions & 15 deletions esmvalcore/_recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1027,37 +1027,38 @@ def _initialize_datasets(raw_datasets):
return datasets

@staticmethod
def _expand_ensemble(variables):
"""Expand ensemble members to multiple datasets.
def _expand_tag(variables, input_tag):
"""
Expand tags such as ensemble members or stardates to multiple datasets.

Expansion only supports ensembles defined as strings, not lists.
"""
expanded = []
regex = re.compile(r'\(\d+:\d+\)')

def expand_ensemble(variable):
ens = variable.get('ensemble', "")
match = regex.search(ens)
def expand_tag(variable, input_tag):
tag = variable.get(input_tag, "")
match = regex.search(tag)
if match:
start, end = match.group(0)[1:-1].split(':')
for i in range(int(start), int(end) + 1):
expand = deepcopy(variable)
expand['ensemble'] = regex.sub(str(i), ens, 1)
expand_ensemble(expand)
expand[input_tag] = regex.sub(str(i), tag, 1)
expand_tag(expand, input_tag)
else:
expanded.append(variable)

for variable in variables:
ensemble = variable.get('ensemble', "")
if isinstance(ensemble, (list, tuple)):
for elem in ensemble:
tag = variable.get(input_tag, "")
if isinstance(tag, (list, tuple)):
for elem in tag:
if regex.search(elem):
raise RecipeError(
f"In variable {variable}: ensemble expansion "
"cannot be combined with ensemble lists")
f"In variable {variable}: {input_tag} expansion "
f"cannot be combined with {input_tag} lists")
expanded.append(variable)
else:
expand_ensemble(variable)
expand_tag(variable, input_tag)

return expanded

Expand Down Expand Up @@ -1104,8 +1105,14 @@ def _initialize_variables(self, raw_variable, raw_datasets):
activity = get_activity(variable)
if activity:
variable['activity'] = activity
check.variable(variable, required_keys)
variables = self._expand_ensemble(variables)
if 'sub_experiment' in variable:
subexperiment_keys = deepcopy(required_keys)
subexperiment_keys.update({'sub_experiment'})
check.variable(variable, subexperiment_keys)
else:
check.variable(variable, required_keys)
variables = self._expand_tag(variables, 'ensemble')
variables = self._expand_tag(variables, 'sub_experiment')
return variables

def _initialize_preprocessor_output(self, diagnostic_name, raw_variables,
Expand Down
65 changes: 54 additions & 11 deletions tests/unit/data_finder/test_replace_tags.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,65 @@
"""Tests for _replace_tags in _data_finder.py."""

from esmvalcore._data_finder import _replace_tags

VARIABLE = {
'project': 'CMIP6',
'dataset': 'ACCURATE-MODEL',
'activity': 'act',
'exp': 'experiment',
'institute': 'HMA',
'ensemble': 'r1i1p1f1',
'mip': 'Amon',
'short_name': 'tas',
'grid': 'gr',
}


def test_replace_tags_str():
assert _replace_tags('folder/subfolder/{short_name}',
VARIABLE) == ['folder/subfolder/tas']
def test_replace_tags():
"""Tests for get_start_end_year function."""
path = _replace_tags(
'{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/'
'{grid}/{latestversion}', VARIABLE)
input_file = _replace_tags(
'{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc', VARIABLE)
output_file = _replace_tags(
'{project}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}', VARIABLE)
assert path == [
'act/HMA/ACCURATE-MODEL/experiment/r1i1p1f1/Amon/tas/gr/'
'{latestversion}'
]
assert input_file == ['tas_Amon_ACCURATE-MODEL_experiment_r1i1p1f1_gr*.nc']
assert output_file == ['CMIP6_ACCURATE-MODEL_Amon_experiment_r1i1p1f1_tas']


def test_replace_tags_list_of_str():
assert _replace_tags(('folder/subfolder/{short_name}',
'folder2/{short_name}', 'subfolder/{short_name}'),
VARIABLE) == [
'folder/subfolder/tas',
'folder2/tas',
'subfolder/tas',
]
assert sorted(
_replace_tags(('folder/subfolder/{short_name}', 'folder2/{short_name}',
'subfolder/{short_name}'), VARIABLE)) == sorted([
'folder2/tas',
'folder/subfolder/tas',
'subfolder/tas',
])


def test_replace_tags_with_subexperiment():
"""Tests for get_start_end_year function."""
variable = {'sub_experiment': '199411', **VARIABLE}
path = _replace_tags(
'{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/'
'{grid}/{latestversion}', variable)
input_file = _replace_tags(
'{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc', variable)
output_file = _replace_tags(
'{project}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}', variable)
assert sorted(path) == sorted([
'act/HMA/ACCURATE-MODEL/experiment/r1i1p1f1/Amon/tas/gr/'
'{latestversion}',
'act/HMA/ACCURATE-MODEL/experiment/199411-r1i1p1f1/Amon/tas/gr/'
'{latestversion}'
])
assert input_file == [
'tas_Amon_ACCURATE-MODEL_experiment_199411-r1i1p1f1_gr*.nc'
]
assert output_file == [
'CMIP6_ACCURATE-MODEL_Amon_experiment_199411-r1i1p1f1_tas'
]
29 changes: 27 additions & 2 deletions tests/unit/test_recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def test_expand_ensemble(self):
},
]

expanded = Recipe._expand_ensemble(datasets)
expanded = Recipe._expand_tag(datasets, 'ensemble')

ensembles = [
'r1i2p3',
Expand All @@ -29,6 +29,31 @@ def test_expand_ensemble(self):
for i, ensemble in enumerate(ensembles):
assert expanded[i] == {'dataset': 'XYZ', 'ensemble': ensemble}

def test_expand_subexperiment(self):

datasets = [
{
'dataset': 'XYZ',
'sub_experiment': 's(1998:2005)',
},
]

expanded = Recipe._expand_tag(datasets, 'sub_experiment')

subexperiments = [
's1998',
's1999',
's2000',
's2001',
's2002',
's2003',
's2004',
's2005',
]
for i, subexperiment in enumerate(subexperiments):
assert expanded[i] == {'dataset': 'XYZ',
'sub_experiment': subexperiment}

def test_expand_ensemble_nolist(self):

datasets = [
Expand All @@ -39,7 +64,7 @@ def test_expand_ensemble_nolist(self):
]

with pytest.raises(RecipeError):
Recipe._expand_ensemble(datasets)
Recipe._expand_tag(datasets, 'ensemble')


VAR_A = {'dataset': 'A'}
Expand Down