From 70f3b87070575ebd9760e6e404581dfca742598b Mon Sep 17 00:00:00 2001 From: sloosvel Date: Mon, 11 May 2020 16:37:07 +0200 Subject: [PATCH 01/20] First attempte --- esmvalcore/_data_finder.py | 16 +++++++++++++--- esmvalcore/_recipe.py | 15 ++++++++------- esmvalcore/_recipe_checks.py | 2 +- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py index 7a27dee4f6..82ce815d39 100644 --- a/esmvalcore/_data_finder.py +++ b/esmvalcore/_data_finder.py @@ -93,19 +93,21 @@ def _replace_tags(path, variable): """Replace tags in the config-developer's file with actual values.""" path = path.strip('/') tlist = re.findall(r'{([^}]*)}', path) + if 'startdate' in variable: + path = re.sub(r'(\b{ensemble}\b)', r'{startdate}-\1', path) + tlist.append('startdate') paths = [path] for tag in tlist: original_tag = tag tag, _, _ = _get_caps_options(tag) if tag == 'latestversion': # handled separately later - continue + continue if tag in variable: replacewith = variable[tag] else: raise KeyError("Dataset key {} must be specified for {}, check " "your recipe entry".format(tag, variable)) - paths = _replace_tag(paths, original_tag, replacewith) return paths @@ -233,8 +235,16 @@ def get_input_filelist(variable, rootpath, drs): if variable['project'] == 'CMIP5' and variable['frequency'] == 'fx': variable['ensemble'] = 'r0i0p0' (files, dirnames, filenames) = _find_input_files(variable, rootpath, drs) + if 'startdate' in variable: + # update start and end years, move to new function? + intervals = [get_start_end_year(name) for name in files] + variable['start_year'] = min(intervals)[0] + variable['end_year'] = max(intervals)[1] + # best way to write this? + variable['filename'] = re.sub('\d\d\d\d-\d\d\d\d', str(variable['start_year'])+'-'+str(variable['end_year']), variable['filename']) + # do time gating only for non-fx variables - if variable['frequency'] != 'fx': + if variable['frequency'] != 'fx' or 'startdate' not in variable: files = select_files(files, variable['start_year'], variable['end_year']) return (files, dirnames, filenames) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 27e988d514..a8f8925866 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -1007,27 +1007,27 @@ def _initialize_datasets(raw_datasets): return datasets @staticmethod - def _expand_ensemble(variables): + def _expand_tag(variables, input_tag): """ - Expand ensemble members to multiple datasets. + Expand tags such as ensemble members or stardates to multiple datasets. Expansion only supports ensembles defined as strings, not lists. """ expanded = [] regex = re.compile(r'\(\d+:\d+\)') for variable in variables: - ensemble = variable.get('ensemble', "") - if not isinstance(ensemble, str): + tag = variable.get(input_tag, "") + if not isinstance(tag, str): expanded.append(variable) continue - match = regex.search(ensemble) + match = regex.search(tag) if not match: expanded.append(variable) continue start, end = match.group(0)[1:-1].split(':') for i in range(int(start), int(end) + 1): expand = deepcopy(variable) - expand['ensemble'] = regex.sub(str(i), ensemble, 1) + expand[input_tag] = regex.sub(str(i), tag, 1) expanded.append(expand) return expanded @@ -1071,7 +1071,8 @@ def _initialize_variables(self, raw_variable, raw_datasets): if activity: variable['activity'] = activity check.variable(variable, required_keys) - variables = self._expand_ensemble(variables) + variables = self._expand_tag(variables, 'ensemble') + variables = self._expand_tag(variables, 'startdate') return variables def _initialize_preprocessor_output(self, diagnostic_name, raw_variables, diff --git a/esmvalcore/_recipe_checks.py b/esmvalcore/_recipe_checks.py index ddb2296e7f..8fbc0263dc 100644 --- a/esmvalcore/_recipe_checks.py +++ b/esmvalcore/_recipe_checks.py @@ -118,7 +118,7 @@ def data_availability(input_files, var, dirnames, filenames): raise RecipeError("Missing data") # check time avail only for non-fx variables - if var['frequency'] == 'fx': + if var['frequency'] == 'fx' or 'startdate' in var: return required_years = set(range(var['start_year'], var['end_year'] + 1)) From f5d4e05c8455878e670f0346a765b332eb00a03d Mon Sep 17 00:00:00 2001 From: sloosvel Date: Tue, 12 May 2020 11:41:25 +0200 Subject: [PATCH 02/20] Do not require start and end years, add them later --- esmvalcore/_data_finder.py | 17 ++++++++++------- esmvalcore/_recipe.py | 9 ++++++++- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py index 82ce815d39..de2000987d 100644 --- a/esmvalcore/_data_finder.py +++ b/esmvalcore/_data_finder.py @@ -219,6 +219,14 @@ def _get_filenames_glob(variable, drs): filenames_glob = _replace_tags(path_template, variable) return filenames_glob +def _update_output_file(variable, files): + intervals = [get_start_end_year(name) for name in files] + variable.update({'start_year': min(intervals)[0]}) + variable.update({'end_year': max(intervals)[1]}) + filename = variable['filename'].replace('.nc', '_{start_year}-{end_year}.nc'.format(**variable)) + variable['filename'] = filename + return variable + def _find_input_files(variable, rootpath, drs): input_dirs = _find_input_dirs(variable, rootpath, drs) @@ -236,12 +244,7 @@ def get_input_filelist(variable, rootpath, drs): variable['ensemble'] = 'r0i0p0' (files, dirnames, filenames) = _find_input_files(variable, rootpath, drs) if 'startdate' in variable: - # update start and end years, move to new function? - intervals = [get_start_end_year(name) for name in files] - variable['start_year'] = min(intervals)[0] - variable['end_year'] = max(intervals)[1] - # best way to write this? - variable['filename'] = re.sub('\d\d\d\d-\d\d\d\d', str(variable['start_year'])+'-'+str(variable['end_year']), variable['filename']) + variable = _update_output_file(variable, files) # do time gating only for non-fx variables if variable['frequency'] != 'fx' or 'startdate' not in variable: @@ -265,7 +268,7 @@ def get_output_file(variable, preproc_dir): variable['variable_group'], _replace_tags(cfg['output_file'], variable)[0], ) - if variable['frequency'] != 'fx': + if variable['frequency'] != 'fx' and 'startdate' not in variable: outfile += '_{start_year}-{end_year}'.format(**variable) outfile += '.nc' return outfile diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index a8f8925866..09390cae72 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -1070,7 +1070,14 @@ def _initialize_variables(self, raw_variable, raw_datasets): activity = get_activity(variable) if activity: variable['activity'] = activity - check.variable(variable, required_keys) + if 'startdate' in variable: + startdate_keys = deepcopy(required_keys) + startdate_keys.update({'startdate'}) + startdate_keys.remove('start_year') + startdate_keys.remove('end_year') + check.variable(variable, startdate_keys) + else: + check.variable(variable, required_keys) variables = self._expand_tag(variables, 'ensemble') variables = self._expand_tag(variables, 'startdate') return variables From c73eb4ee6e1de893c8b47fd37af1f4434766f544 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Wed, 13 May 2020 11:43:51 +0200 Subject: [PATCH 03/20] Correct condition --- esmvalcore/_data_finder.py | 15 +++++++++------ esmvalcore/_recipe.py | 6 ++++-- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py index de2000987d..f67540a1a4 100644 --- a/esmvalcore/_data_finder.py +++ b/esmvalcore/_data_finder.py @@ -102,7 +102,7 @@ def _replace_tags(path, variable): tag, _, _ = _get_caps_options(tag) if tag == 'latestversion': # handled separately later - continue + continue if tag in variable: replacewith = variable[tag] else: @@ -219,11 +219,14 @@ def _get_filenames_glob(variable, drs): filenames_glob = _replace_tags(path_template, variable) return filenames_glob + def _update_output_file(variable, files): intervals = [get_start_end_year(name) for name in files] variable.update({'start_year': min(intervals)[0]}) variable.update({'end_year': max(intervals)[1]}) - filename = variable['filename'].replace('.nc', '_{start_year}-{end_year}.nc'.format(**variable)) + filename = variable['filename'].replace( + '.nc', '_{start_year}-{end_year}.nc'.format(**variable) + ) variable['filename'] = filename return variable @@ -245,11 +248,11 @@ def get_input_filelist(variable, rootpath, drs): (files, dirnames, filenames) = _find_input_files(variable, rootpath, drs) if 'startdate' in variable: variable = _update_output_file(variable, files) - # do time gating only for non-fx variables - if variable['frequency'] != 'fx' or 'startdate' not in variable: - files = select_files(files, variable['start_year'], - variable['end_year']) + if variable['frequency'] != 'fx': + if 'startdate' not in variable: + files = select_files(files, variable['start_year'], + variable['end_year']) return (files, dirnames, filenames) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 09390cae72..dce43f178d 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -1073,8 +1073,10 @@ def _initialize_variables(self, raw_variable, raw_datasets): if 'startdate' in variable: startdate_keys = deepcopy(required_keys) startdate_keys.update({'startdate'}) - startdate_keys.remove('start_year') - startdate_keys.remove('end_year') + if 'star_year' in variable: + startdate_keys.remove('start_year') + if 'end_year' in variable: + startdate_keys.remove('end_year') check.variable(variable, startdate_keys) else: check.variable(variable, required_keys) From 0d30f7aeb984bf08aa5cf07786fa58487ba2be8c Mon Sep 17 00:00:00 2001 From: sloosvel Date: Wed, 13 May 2020 12:03:20 +0200 Subject: [PATCH 04/20] Avoid key error in fx variables --- esmvalcore/_recipe.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index dce43f178d..50bc76f98c 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -1073,10 +1073,11 @@ def _initialize_variables(self, raw_variable, raw_datasets): if 'startdate' in variable: startdate_keys = deepcopy(required_keys) startdate_keys.update({'startdate'}) - if 'star_year' in variable: + try: startdate_keys.remove('start_year') - if 'end_year' in variable: startdate_keys.remove('end_year') + except KeyError: + continue check.variable(variable, startdate_keys) else: check.variable(variable, required_keys) From 1771c6b74dd66aa0fbd4c4ce0cb5be5a7546d76a Mon Sep 17 00:00:00 2001 From: sloosvel Date: Mon, 7 Sep 2020 17:20:24 +0200 Subject: [PATCH 05/20] Consider two possible paths --- esmvalcore/_data_finder.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py index f67540a1a4..b28d1a9b3a 100644 --- a/esmvalcore/_data_finder.py +++ b/esmvalcore/_data_finder.py @@ -94,9 +94,13 @@ def _replace_tags(path, variable): path = path.strip('/') tlist = re.findall(r'{([^}]*)}', path) if 'startdate' in variable: - path = re.sub(r'(\b{ensemble}\b)', r'{startdate}-\1', path) + paths = [ + re.sub(r'(\b{ensemble}\b)', r'{startdate}-\1', path), + re.sub(r'({ensemble})', r'{startdate}-\1', path) + ] tlist.append('startdate') - paths = [path] + else: + paths = [path] for tag in tlist: original_tag = tag tag, _, _ = _get_caps_options(tag) @@ -246,13 +250,12 @@ def get_input_filelist(variable, rootpath, drs): if variable['project'] == 'CMIP5' and variable['frequency'] == 'fx': variable['ensemble'] = 'r0i0p0' (files, dirnames, filenames) = _find_input_files(variable, rootpath, drs) - if 'startdate' in variable: - variable = _update_output_file(variable, files) # do time gating only for non-fx variables if variable['frequency'] != 'fx': - if 'startdate' not in variable: - files = select_files(files, variable['start_year'], - variable['end_year']) + if 'startdate' in variable: + variable = _update_output_file(variable, files) + files = select_files(files, variable['start_year'], + variable['end_year']) return (files, dirnames, filenames) From 6ad31e5aca0b985b9902789896b0598ee7838df0 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Mon, 7 Sep 2020 17:45:25 +0200 Subject: [PATCH 06/20] Fix function name --- tests/unit/test_recipe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_recipe.py b/tests/unit/test_recipe.py index 1ed1875926..c0e764a3bd 100644 --- a/tests/unit/test_recipe.py +++ b/tests/unit/test_recipe.py @@ -14,7 +14,7 @@ def test_expand_ensemble(self): }, ] - expanded = Recipe._expand_ensemble(datasets) + expanded = Recipe._expand_tag(datasets, 'ensemble') ensembles = [ 'r1i2p3', @@ -39,4 +39,4 @@ def test_expand_ensemble_nolist(self): ] with pytest.raises(RecipeError): - Recipe._expand_ensemble(datasets) + Recipe._expand_tag(datasets, 'ensembles') From 811026fc72317806b6cb7833b831d125a31130aa Mon Sep 17 00:00:00 2001 From: sloosvel Date: Mon, 7 Sep 2020 17:51:04 +0200 Subject: [PATCH 07/20] Fix variable name --- tests/unit/test_recipe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_recipe.py b/tests/unit/test_recipe.py index c0e764a3bd..f1d73862b9 100644 --- a/tests/unit/test_recipe.py +++ b/tests/unit/test_recipe.py @@ -39,4 +39,4 @@ def test_expand_ensemble_nolist(self): ] with pytest.raises(RecipeError): - Recipe._expand_tag(datasets, 'ensembles') + Recipe._expand_tag(datasets, 'ensemble') From 96b236f4bead2fc309616ddfbbf39493bf9fb716 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Fri, 13 Nov 2020 14:32:59 +0100 Subject: [PATCH 08/20] Avoid duplicates in filename --- esmvalcore/_data_finder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py index b28d1a9b3a..d9d55f397a 100644 --- a/esmvalcore/_data_finder.py +++ b/esmvalcore/_data_finder.py @@ -94,10 +94,10 @@ def _replace_tags(path, variable): path = path.strip('/') tlist = re.findall(r'{([^}]*)}', path) if 'startdate' in variable: - paths = [ + paths = list(set([ re.sub(r'(\b{ensemble}\b)', r'{startdate}-\1', path), re.sub(r'({ensemble})', r'{startdate}-\1', path) - ] + ])) tlist.append('startdate') else: paths = [path] From ec10ee740559aecf04d2cb28f9f87052b3c0b3e0 Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Mon, 16 Nov 2020 11:52:45 +0100 Subject: [PATCH 09/20] Add test for startdate expansion --- tests/unit/test_recipe.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/unit/test_recipe.py b/tests/unit/test_recipe.py index f1d73862b9..dc64705473 100644 --- a/tests/unit/test_recipe.py +++ b/tests/unit/test_recipe.py @@ -29,6 +29,30 @@ def test_expand_ensemble(self): for i, ensemble in enumerate(ensembles): assert expanded[i] == {'dataset': 'XYZ', 'ensemble': ensemble} + def test_expand_startdate(self): + + datasets = [ + { + 'dataset': 'XYZ', + 'startdate': 's(1998:2005)', + }, + ] + + expanded = Recipe._expand_tag(datasets, 'startdate') + + startdates = [ + 's1998', + 's1999', + 's2000', + 's2001', + 's2002', + 's2003', + 's2004', + 's2005', + ] + for i, startdate in enumerate(startdates): + assert expanded[i] == {'dataset': 'XYZ', 'startdate': startdate} + def test_expand_ensemble_nolist(self): datasets = [ From 401abdcafee055f4bfb4d4c68940aae5527ad01b Mon Sep 17 00:00:00 2001 From: Javier Vegas-Regidor Date: Tue, 27 Apr 2021 11:46:43 +0200 Subject: [PATCH 10/20] Add test for the replace tags method --- tests/unit/data_finder/test_replace_tags.py | 55 +++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 tests/unit/data_finder/test_replace_tags.py diff --git a/tests/unit/data_finder/test_replace_tags.py b/tests/unit/data_finder/test_replace_tags.py new file mode 100644 index 0000000000..0b206952a4 --- /dev/null +++ b/tests/unit/data_finder/test_replace_tags.py @@ -0,0 +1,55 @@ +"""Unit tests for :func:`esmvalcore._data_finder.regrid._stock_cube`""" +from esmvalcore._data_finder import _replace_tags + +VARIABLE = { + 'project': 'CMIP6', + 'dataset': 'ACCURATE-MODEL', + 'activity': 'act', + 'exp': 'experiment', + 'institute': 'HMA', + 'ensemble': 'r1i1p1f1', + 'mip': 'Amon', + 'short_name': 'tas', + 'grid': 'gr', +} + + +def test_replace_tags(): + """Tests for get_start_end_year function.""" + path = _replace_tags( + '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/' + '{grid}/{latestversion}', VARIABLE) + input_file = _replace_tags( + '{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc', VARIABLE) + output_file = _replace_tags( + '{project}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}', VARIABLE) + assert path == [ + 'act/HMA/ACCURATE-MODEL/experiment/r1i1p1f1/Amon/tas/gr/' + '{latestversion}' + ] + assert input_file == ['tas_Amon_ACCURATE-MODEL_experiment_r1i1p1f1_gr*.nc'] + assert output_file == ['CMIP6_ACCURATE-MODEL_Amon_experiment_r1i1p1f1_tas'] + + +def test_replace_tags_with_startdate(): + """Tests for get_start_end_year function.""" + variable = {'startdate': '199411', **VARIABLE} + path = _replace_tags( + '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/' + '{grid}/{latestversion}', variable) + input_file = _replace_tags( + '{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc', variable) + output_file = _replace_tags( + '{project}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}', variable) + assert path == [ + 'act/HMA/ACCURATE-MODEL/experiment/r1i1p1f1/Amon/tas/gr/' + '{latestversion}', + 'act/HMA/ACCURATE-MODEL/experiment/199411-r1i1p1f1/Amon/tas/gr/' + '{latestversion}' + ] + assert input_file == [ + 'tas_Amon_ACCURATE-MODEL_experiment_199411-r1i1p1f1_gr*.nc' + ] + assert output_file == [ + 'CMIP6_ACCURATE-MODEL_Amon_experiment_199411-r1i1p1f1_tas' + ] From dbb4b07419b9fdfc69244c0be5c7b63252a8981e Mon Sep 17 00:00:00 2001 From: sloosvel Date: Thu, 29 Apr 2021 13:53:26 +0200 Subject: [PATCH 11/20] Rename tag --- esmvalcore/_data_finder.py | 12 ++++++------ esmvalcore/_recipe.py | 14 +++++++------- esmvalcore/_recipe_checks.py | 2 +- tests/unit/data_finder/test_replace_tags.py | 4 ++-- tests/unit/test_recipe.py | 13 +++++++------ 5 files changed, 23 insertions(+), 22 deletions(-) diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py index 9c690ea498..7bc6bc7a80 100644 --- a/esmvalcore/_data_finder.py +++ b/esmvalcore/_data_finder.py @@ -98,14 +98,14 @@ def _replace_tags(paths, variable): tlist = set() for path in paths: tlist = tlist.union(re.findall(r'{([^}]*)}', path)) - if 'startdate' in variable: + if 'sub_experiment' in variable: new_paths = [] for path in paths: new_paths.extend(( - re.sub(r'(\b{ensemble}\b)', r'{startdate}-\1', path), - re.sub(r'({ensemble})', r'{startdate}-\1', path) + re.sub(r'(\b{ensemble}\b)', r'{sub_experiment}-\1', path), + re.sub(r'({ensemble})', r'{sub_experiment}-\1', path) )) - tlist.add('startdate') + tlist.add('sub_experiment') paths = new_paths logger.debug(tlist) @@ -266,7 +266,7 @@ def get_input_filelist(variable, rootpath, drs): (files, dirnames, filenames) = _find_input_files(variable, rootpath, drs) # do time gating only for non-fx variables if variable['frequency'] != 'fx': - if 'startdate' in variable: + if 'sub_experiment' in variable: variable = _update_output_file(variable, files) files = select_files(files, variable['start_year'], variable['end_year']) @@ -288,7 +288,7 @@ def get_output_file(variable, preproc_dir): variable['variable_group'], _replace_tags(cfg['output_file'], variable)[0], ) - if variable['frequency'] != 'fx' and 'startdate' not in variable: + if variable['frequency'] != 'fx' and 'sub_experiment' not in variable: outfile += '_{start_year}-{end_year}'.format(**variable) outfile += '.nc' return outfile diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index df81be33b0..2df191bd8b 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -1075,19 +1075,19 @@ def _initialize_variables(self, raw_variable, raw_datasets): activity = get_activity(variable) if activity: variable['activity'] = activity - if 'startdate' in variable: - startdate_keys = deepcopy(required_keys) - startdate_keys.update({'startdate'}) + if 'sub_experiment' in variable: + subexperiment_keys = deepcopy(required_keys) + subexperiment_keys.update({'sub_experiment'}) try: - startdate_keys.remove('start_year') - startdate_keys.remove('end_year') + subexperiment_keys.remove('start_year') + subexperiment_keys.remove('end_year') except KeyError: continue - check.variable(variable, startdate_keys) + check.variable(variable, subexperiment_keys) else: check.variable(variable, required_keys) variables = self._expand_tag(variables, 'ensemble') - variables = self._expand_tag(variables, 'startdate') + variables = self._expand_tag(variables, 'sub_experiment') return variables def _initialize_preprocessor_output(self, diagnostic_name, raw_variables, diff --git a/esmvalcore/_recipe_checks.py b/esmvalcore/_recipe_checks.py index e5111e42bc..9b9b47f8eb 100644 --- a/esmvalcore/_recipe_checks.py +++ b/esmvalcore/_recipe_checks.py @@ -127,7 +127,7 @@ def data_availability(input_files, var, dirnames, filenames): f"Missing data for {var['alias']}: {var['short_name']}") # check time avail only for non-fx variables - if var['frequency'] == 'fx' or 'startdate' in var: + if var['frequency'] == 'fx' or 'sub_experiment' in var: return required_years = set(range(var['start_year'], var['end_year'] + 1)) diff --git a/tests/unit/data_finder/test_replace_tags.py b/tests/unit/data_finder/test_replace_tags.py index febd0b47fe..097cc7b328 100644 --- a/tests/unit/data_finder/test_replace_tags.py +++ b/tests/unit/data_finder/test_replace_tags.py @@ -41,9 +41,9 @@ def test_replace_tags_list_of_str(): ]) -def test_replace_tags_with_startdate(): +def test_replace_tags_with_subexperiment(): """Tests for get_start_end_year function.""" - variable = {'startdate': '199411', **VARIABLE} + variable = {'sub_experiment': '199411', **VARIABLE} path = _replace_tags( '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/' '{grid}/{latestversion}', variable) diff --git a/tests/unit/test_recipe.py b/tests/unit/test_recipe.py index dc64705473..bca2e0255b 100644 --- a/tests/unit/test_recipe.py +++ b/tests/unit/test_recipe.py @@ -29,18 +29,18 @@ def test_expand_ensemble(self): for i, ensemble in enumerate(ensembles): assert expanded[i] == {'dataset': 'XYZ', 'ensemble': ensemble} - def test_expand_startdate(self): + def test_expand_subexperiment(self): datasets = [ { 'dataset': 'XYZ', - 'startdate': 's(1998:2005)', + 'sub_experiment': 's(1998:2005)', }, ] - expanded = Recipe._expand_tag(datasets, 'startdate') + expanded = Recipe._expand_tag(datasets, 'sub_experiment') - startdates = [ + subexperiments = [ 's1998', 's1999', 's2000', @@ -50,8 +50,9 @@ def test_expand_startdate(self): 's2004', 's2005', ] - for i, startdate in enumerate(startdates): - assert expanded[i] == {'dataset': 'XYZ', 'startdate': startdate} + for i, subexperiment in enumerate(subexperiments): + assert expanded[i] == {'dataset': 'XYZ', + 'sub_experiment': subexperiment} def test_expand_ensemble_nolist(self): From 4296a74d6e3b93bae3a7b719e844a732002b89f2 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Mon, 3 May 2021 12:17:56 +0200 Subject: [PATCH 12/20] Add documentation --- doc/recipe/overview.rst | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/recipe/overview.rst b/doc/recipe/overview.rst index 7798878620..6080f9c2a4 100644 --- a/doc/recipe/overview.rst +++ b/doc/recipe/overview.rst @@ -82,10 +82,12 @@ data specifications: ``RCP8.5``) - mip (for CMIP data, key ``mip``, value e.g. ``Amon``, ``Omon``, ``LImon``) - ensemble member (key ``ensemble``, value e.g. ``r1i1p1``, ``r1i1p1f1``) +- sub experiment (key `sub_experiment`, value e.g. `s2000`, `s(2000:2010)`, + for DCPP data only) - time range (e.g. key-value ``start_year: 1982``, ``end_year: 1990``. Please note that `yaml`_ interprets numbers with a leading ``0`` as octal numbers, so we recommend to avoid them. For example, use ``128`` to specify the year - 128 instead of ``0128``.) + 128 instead of ``0128``. The time range is not needed for DCPP data.) - model grid (native grid ``grid: gn`` or regridded grid ``grid: gr``, for CMIP6 data only). @@ -97,6 +99,7 @@ For example, a datasets section could be: - {dataset: CanESM2, project: CMIP5, exp: historical, ensemble: r1i1p1, start_year: 2001, end_year: 2004} - {dataset: UKESM1-0-LL, project: CMIP6, exp: historical, ensemble: r1i1p1f2, start_year: 2001, end_year: 2004, grid: gn} - {dataset: EC-EARTH3, alias: custom_alias, project: CMIP6, exp: historical, ensemble: r1i1p1f1, start_year: 2001, end_year: 2004, grid: gn} + - {dataset: HadGEM3-GC31-MM, alias: custom_alias, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: s2000, grid: gn} It is possible to define the experiment as a list to concatenate two experiments. Here it is an example concatenating the `historical` experiment with `rcp85` @@ -114,7 +117,7 @@ In this case, the specified datasets are concatenated into a single cube: datasets: - {dataset: CanESM2, project: CMIP5, exp: [historical, rcp85], ensemble: [r1i1p1, r1i2p1], start_year: 2001, end_year: 2004} -ESMValTool also supports a simplified syntax to add multiple ensemble members from the same dataset. +ESMValTool also supports a simplified syntax to add multiple ensemble members and sub experiment ids from the same dataset. In the ensemble key, any element in the form `(x:y)` will be replaced with all numbers from x to y (both inclusive), adding a dataset entry for each replacement. For example, to add ensemble members r1i1p1 to r10i1p1 you can use the following abbreviated syntax: @@ -123,6 +126,7 @@ you can use the following abbreviated syntax: datasets: - {dataset: CanESM2, project: CMIP5, exp: historical, ensemble: "r(1:10)i1p1", start_year: 2001, end_year: 2004} + - {dataset: MIROC6, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: s(2000:2010), grid: gn} It can be included multiple times in one definition. For example, to generate the datasets definitions for the ensemble members r1i1p1 to r5i1p1 and from r1i2p1 to r5i1p1 you can use: From af7c285c46a3a6cf3bf96419232ed0500b9305dc Mon Sep 17 00:00:00 2001 From: sloosvel Date: Mon, 10 May 2021 17:30:42 +0200 Subject: [PATCH 13/20] Allow to load subexps per timerange or as a whole --- doc/recipe/overview.rst | 18 ++++++++++++++---- esmvalcore/_data_finder.py | 4 ++-- esmvalcore/_recipe.py | 9 ++++----- esmvalcore/recipe_schema.yml | 1 + 4 files changed, 21 insertions(+), 11 deletions(-) diff --git a/doc/recipe/overview.rst b/doc/recipe/overview.rst index 6080f9c2a4..fa71b7ccf4 100644 --- a/doc/recipe/overview.rst +++ b/doc/recipe/overview.rst @@ -87,7 +87,7 @@ data specifications: - time range (e.g. key-value ``start_year: 1982``, ``end_year: 1990``. Please note that `yaml`_ interprets numbers with a leading ``0`` as octal numbers, so we recommend to avoid them. For example, use ``128`` to specify the year - 128 instead of ``0128``. The time range is not needed for DCPP data.) + 128 instead of ``0128``.) - model grid (native grid ``grid: gn`` or regridded grid ``grid: gr``, for CMIP6 data only). @@ -99,7 +99,7 @@ For example, a datasets section could be: - {dataset: CanESM2, project: CMIP5, exp: historical, ensemble: r1i1p1, start_year: 2001, end_year: 2004} - {dataset: UKESM1-0-LL, project: CMIP6, exp: historical, ensemble: r1i1p1f2, start_year: 2001, end_year: 2004, grid: gn} - {dataset: EC-EARTH3, alias: custom_alias, project: CMIP6, exp: historical, ensemble: r1i1p1f1, start_year: 2001, end_year: 2004, grid: gn} - - {dataset: HadGEM3-GC31-MM, alias: custom_alias, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: s2000, grid: gn} + - {dataset: HadGEM3-GC31-MM, alias: custom_alias, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: s2000, grid: gn, start_year: 2000, end_year, 2002} It is possible to define the experiment as a list to concatenate two experiments. Here it is an example concatenating the `historical` experiment with `rcp85` @@ -117,9 +117,9 @@ In this case, the specified datasets are concatenated into a single cube: datasets: - {dataset: CanESM2, project: CMIP5, exp: [historical, rcp85], ensemble: [r1i1p1, r1i2p1], start_year: 2001, end_year: 2004} -ESMValTool also supports a simplified syntax to add multiple ensemble members and sub experiment ids from the same dataset. +ESMValTool also supports a simplified syntax to add multiple ensemble members. In the ensemble key, any element in the form `(x:y)` will be replaced with all numbers from x to y (both inclusive), -adding a dataset entry for each replacement. For example, to add ensemble members r1i1p1 to r10i1p1 +adding a dataset entry for each replacement. For example, to add ensemble members r1i1p1 to r10i1p1 you can use the following abbreviated syntax: .. code-block:: yaml @@ -140,6 +140,16 @@ Please, bear in mind that this syntax can only be used in the ensemble tag. Also, note that the combination of multiple experiments and ensembles, like exp: [historical, rcp85], ensemble: [r1i1p1, "r(2:3)i1p1"] is not supported and will raise an error. +The same simplified syntax can be used to add multiple sub-experiment ids in combination with the tag `all_years: True`. +This configuration will load all the available years for the sub-experiment, without having to specify +the `start_year` and `end_year` for each one of the ids: + +.. code-block:: yaml + + datasets: + - {dataset: MIROC6, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: s(2000:2010), grid: gn, all_years: True} + + Note that this section is not required, as datasets can also be provided in the Diagnostics_ section. diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py index 7bc6bc7a80..ebd16b9973 100644 --- a/esmvalcore/_data_finder.py +++ b/esmvalcore/_data_finder.py @@ -266,7 +266,7 @@ def get_input_filelist(variable, rootpath, drs): (files, dirnames, filenames) = _find_input_files(variable, rootpath, drs) # do time gating only for non-fx variables if variable['frequency'] != 'fx': - if 'sub_experiment' in variable: + if 'all_years' in variable: variable = _update_output_file(variable, files) files = select_files(files, variable['start_year'], variable['end_year']) @@ -288,7 +288,7 @@ def get_output_file(variable, preproc_dir): variable['variable_group'], _replace_tags(cfg['output_file'], variable)[0], ) - if variable['frequency'] != 'fx' and 'sub_experiment' not in variable: + if variable['frequency'] != 'fx' and 'all_years' not in variable: outfile += '_{start_year}-{end_year}'.format(**variable) outfile += '.nc' return outfile diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 2df191bd8b..71ac3da6f8 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -1078,11 +1078,10 @@ def _initialize_variables(self, raw_variable, raw_datasets): if 'sub_experiment' in variable: subexperiment_keys = deepcopy(required_keys) subexperiment_keys.update({'sub_experiment'}) - try: - subexperiment_keys.remove('start_year') - subexperiment_keys.remove('end_year') - except KeyError: - continue + if 'all_years' in variable: + if variable['all_years']: + subexperiment_keys.discard('start_year') + subexperiment_keys.discard('end_year') check.variable(variable, subexperiment_keys) else: check.variable(variable, required_keys) diff --git a/esmvalcore/recipe_schema.yml b/esmvalcore/recipe_schema.yml index 266f8b9f63..a3d3255257 100644 --- a/esmvalcore/recipe_schema.yml +++ b/esmvalcore/recipe_schema.yml @@ -41,6 +41,7 @@ variable: alternative_dataset: str(required=False) fx_files: list(required=False) additional_datasets: list(include('dataset'), required=False) + all_years: bool(required=False) # TODO: add preprocessor item From 68ff78306ad5cc64fd7486f0ef411a17a703e6ff Mon Sep 17 00:00:00 2001 From: sloosvel Date: Mon, 10 May 2021 18:46:01 +0200 Subject: [PATCH 14/20] Fix condition --- esmvalcore/_recipe_checks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/_recipe_checks.py b/esmvalcore/_recipe_checks.py index 9b9b47f8eb..81a539e9cd 100644 --- a/esmvalcore/_recipe_checks.py +++ b/esmvalcore/_recipe_checks.py @@ -127,7 +127,7 @@ def data_availability(input_files, var, dirnames, filenames): f"Missing data for {var['alias']}: {var['short_name']}") # check time avail only for non-fx variables - if var['frequency'] == 'fx' or 'sub_experiment' in var: + if var['frequency'] == 'fx' or 'all_years' in var: return required_years = set(range(var['start_year'], var['end_year'] + 1)) From a303e852e8e4fe6921a1afbe8d5826d1b97dc173 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Tue, 11 May 2021 10:58:31 +0200 Subject: [PATCH 15/20] Remove 'all_years' functionality --- doc/recipe/overview.rst | 9 +++------ esmvalcore/_data_finder.py | 14 +------------- esmvalcore/_recipe.py | 4 ---- esmvalcore/_recipe_checks.py | 2 +- esmvalcore/recipe_schema.yml | 1 - 5 files changed, 5 insertions(+), 25 deletions(-) diff --git a/doc/recipe/overview.rst b/doc/recipe/overview.rst index fa71b7ccf4..f7afc1a0ae 100644 --- a/doc/recipe/overview.rst +++ b/doc/recipe/overview.rst @@ -82,7 +82,7 @@ data specifications: ``RCP8.5``) - mip (for CMIP data, key ``mip``, value e.g. ``Amon``, ``Omon``, ``LImon``) - ensemble member (key ``ensemble``, value e.g. ``r1i1p1``, ``r1i1p1f1``) -- sub experiment (key `sub_experiment`, value e.g. `s2000`, `s(2000:2010)`, +- sub-experiment id (key `sub_experiment`, value e.g. `s2000`, `s(2000:2002)`, for DCPP data only) - time range (e.g. key-value ``start_year: 1982``, ``end_year: 1990``. Please note that `yaml`_ interprets numbers with a leading ``0`` as octal numbers, @@ -126,7 +126,6 @@ you can use the following abbreviated syntax: datasets: - {dataset: CanESM2, project: CMIP5, exp: historical, ensemble: "r(1:10)i1p1", start_year: 2001, end_year: 2004} - - {dataset: MIROC6, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: s(2000:2010), grid: gn} It can be included multiple times in one definition. For example, to generate the datasets definitions for the ensemble members r1i1p1 to r5i1p1 and from r1i2p1 to r5i1p1 you can use: @@ -140,14 +139,12 @@ Please, bear in mind that this syntax can only be used in the ensemble tag. Also, note that the combination of multiple experiments and ensembles, like exp: [historical, rcp85], ensemble: [r1i1p1, "r(2:3)i1p1"] is not supported and will raise an error. -The same simplified syntax can be used to add multiple sub-experiment ids in combination with the tag `all_years: True`. -This configuration will load all the available years for the sub-experiment, without having to specify -the `start_year` and `end_year` for each one of the ids: +The same simplified syntax can be used to add multiple sub-experiment ids: .. code-block:: yaml datasets: - - {dataset: MIROC6, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: s(2000:2010), grid: gn, all_years: True} + - {dataset: MIROC6, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: s(2000:2002), grid: gn, start_year: 2003, end_year: 2004} Note that this section is not required, as datasets can also be provided in the diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py index ebd16b9973..a756279097 100644 --- a/esmvalcore/_data_finder.py +++ b/esmvalcore/_data_finder.py @@ -236,16 +236,6 @@ def _get_filenames_glob(variable, drs): return filenames_glob -def _update_output_file(variable, files): - intervals = [get_start_end_year(name) for name in files] - variable.update({'start_year': min(intervals)[0]}) - variable.update({'end_year': max(intervals)[1]}) - filename = variable['filename'].replace( - '.nc', '_{start_year}-{end_year}.nc'.format(**variable) - ) - variable['filename'] = filename - return variable - def _find_input_files(variable, rootpath, drs): short_name = variable['short_name'] @@ -266,8 +256,6 @@ def get_input_filelist(variable, rootpath, drs): (files, dirnames, filenames) = _find_input_files(variable, rootpath, drs) # do time gating only for non-fx variables if variable['frequency'] != 'fx': - if 'all_years' in variable: - variable = _update_output_file(variable, files) files = select_files(files, variable['start_year'], variable['end_year']) return (files, dirnames, filenames) @@ -288,7 +276,7 @@ def get_output_file(variable, preproc_dir): variable['variable_group'], _replace_tags(cfg['output_file'], variable)[0], ) - if variable['frequency'] != 'fx' and 'all_years' not in variable: + if variable['frequency'] != 'fx' not in variable: outfile += '_{start_year}-{end_year}'.format(**variable) outfile += '.nc' return outfile diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 71ac3da6f8..46e231038b 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -1078,10 +1078,6 @@ def _initialize_variables(self, raw_variable, raw_datasets): if 'sub_experiment' in variable: subexperiment_keys = deepcopy(required_keys) subexperiment_keys.update({'sub_experiment'}) - if 'all_years' in variable: - if variable['all_years']: - subexperiment_keys.discard('start_year') - subexperiment_keys.discard('end_year') check.variable(variable, subexperiment_keys) else: check.variable(variable, required_keys) diff --git a/esmvalcore/_recipe_checks.py b/esmvalcore/_recipe_checks.py index 81a539e9cd..cd41e0f648 100644 --- a/esmvalcore/_recipe_checks.py +++ b/esmvalcore/_recipe_checks.py @@ -127,7 +127,7 @@ def data_availability(input_files, var, dirnames, filenames): f"Missing data for {var['alias']}: {var['short_name']}") # check time avail only for non-fx variables - if var['frequency'] == 'fx' or 'all_years' in var: + if var['frequency'] == 'fx' in var: return required_years = set(range(var['start_year'], var['end_year'] + 1)) diff --git a/esmvalcore/recipe_schema.yml b/esmvalcore/recipe_schema.yml index a3d3255257..266f8b9f63 100644 --- a/esmvalcore/recipe_schema.yml +++ b/esmvalcore/recipe_schema.yml @@ -41,7 +41,6 @@ variable: alternative_dataset: str(required=False) fx_files: list(required=False) additional_datasets: list(include('dataset'), required=False) - all_years: bool(required=False) # TODO: add preprocessor item From 1737bcb33b2569bf28f140dc92d381e60b240dc3 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Tue, 11 May 2021 11:26:19 +0200 Subject: [PATCH 16/20] Fix conditions --- esmvalcore/_data_finder.py | 2 +- esmvalcore/_recipe_checks.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py index a756279097..44f234c7c4 100644 --- a/esmvalcore/_data_finder.py +++ b/esmvalcore/_data_finder.py @@ -276,7 +276,7 @@ def get_output_file(variable, preproc_dir): variable['variable_group'], _replace_tags(cfg['output_file'], variable)[0], ) - if variable['frequency'] != 'fx' not in variable: + if variable['frequency'] != 'fx': outfile += '_{start_year}-{end_year}'.format(**variable) outfile += '.nc' return outfile diff --git a/esmvalcore/_recipe_checks.py b/esmvalcore/_recipe_checks.py index cd41e0f648..21cff6a4a9 100644 --- a/esmvalcore/_recipe_checks.py +++ b/esmvalcore/_recipe_checks.py @@ -127,7 +127,7 @@ def data_availability(input_files, var, dirnames, filenames): f"Missing data for {var['alias']}: {var['short_name']}") # check time avail only for non-fx variables - if var['frequency'] == 'fx' in var: + if var['frequency'] == 'fx': return required_years = set(range(var['start_year'], var['end_year'] + 1)) From e0f40cbe37f689e66fb748f400ff1c93e4fa01b4 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Tue, 11 May 2021 11:26:54 +0200 Subject: [PATCH 17/20] Fix flake --- esmvalcore/_data_finder.py | 1 - 1 file changed, 1 deletion(-) diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py index 44f234c7c4..8c700b2210 100644 --- a/esmvalcore/_data_finder.py +++ b/esmvalcore/_data_finder.py @@ -236,7 +236,6 @@ def _get_filenames_glob(variable, drs): return filenames_glob - def _find_input_files(variable, rootpath, drs): short_name = variable['short_name'] variable['short_name'] = variable['original_short_name'] From 664e313e6d51e166eda7a3f13d90554d6f4b03db Mon Sep 17 00:00:00 2001 From: sloosvel Date: Tue, 11 May 2021 14:31:40 +0200 Subject: [PATCH 18/20] Add all_years tag --- doc/recipe/overview.rst | 8 ++++++-- esmvalcore/_data_finder.py | 14 +++++++++++++- esmvalcore/_recipe.py | 4 ++++ esmvalcore/_recipe_checks.py | 2 +- esmvalcore/recipe_schema.yml | 2 +- 5 files changed, 25 insertions(+), 5 deletions(-) diff --git a/doc/recipe/overview.rst b/doc/recipe/overview.rst index f7afc1a0ae..71cce9eab7 100644 --- a/doc/recipe/overview.rst +++ b/doc/recipe/overview.rst @@ -90,6 +90,8 @@ data specifications: 128 instead of ``0128``.) - model grid (native grid ``grid: gn`` or regridded grid ``grid: gr``, for CMIP6 data only). +- load all years (key-value ``all_years: True`` to load all the years available in + a particular dataset) For example, a datasets section could be: @@ -97,6 +99,7 @@ For example, a datasets section could be: datasets: - {dataset: CanESM2, project: CMIP5, exp: historical, ensemble: r1i1p1, start_year: 2001, end_year: 2004} + - {dataset: ACCESS1-0, project: CMIP5, exp: historical, ensemble: r1i1p1, all_years: True} - {dataset: UKESM1-0-LL, project: CMIP6, exp: historical, ensemble: r1i1p1f2, start_year: 2001, end_year: 2004, grid: gn} - {dataset: EC-EARTH3, alias: custom_alias, project: CMIP6, exp: historical, ensemble: r1i1p1f1, start_year: 2001, end_year: 2004, grid: gn} - {dataset: HadGEM3-GC31-MM, alias: custom_alias, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: s2000, grid: gn, start_year: 2000, end_year, 2002} @@ -139,12 +142,13 @@ Please, bear in mind that this syntax can only be used in the ensemble tag. Also, note that the combination of multiple experiments and ensembles, like exp: [historical, rcp85], ensemble: [r1i1p1, "r(2:3)i1p1"] is not supported and will raise an error. -The same simplified syntax can be used to add multiple sub-experiment ids: +The same simplified syntax can be used to add multiple sub-experiment ids, as well as in combination with the ``all_years: True`` tag: .. code-block:: yaml datasets: - - {dataset: MIROC6, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: s(2000:2002), grid: gn, start_year: 2003, end_year: 2004} + - {dataset: MIROC6, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: "s(2000:2002)", grid: gn, start_year: 2003, end_year: 2004} + - {dataset: MIROC6, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: "s(1980:1990)", grid: gn, all_years: True} Note that this section is not required, as datasets can also be provided in the diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py index 8c700b2210..b2022e30b2 100644 --- a/esmvalcore/_data_finder.py +++ b/esmvalcore/_data_finder.py @@ -236,6 +236,16 @@ def _get_filenames_glob(variable, drs): return filenames_glob +def _update_output_file(variable, files): + intervals = [get_start_end_year(name) for name in files] + variable.update({'start_year': min(intervals)[0]}) + variable.update({'end_year': max(intervals)[1]}) + filename = variable['filename'].replace( + '.nc', '_{start_year}-{end_year}.nc'.format(**variable) + ) + variable['filename'] = filename + return variable + def _find_input_files(variable, rootpath, drs): short_name = variable['short_name'] variable['short_name'] = variable['original_short_name'] @@ -255,6 +265,8 @@ def get_input_filelist(variable, rootpath, drs): (files, dirnames, filenames) = _find_input_files(variable, rootpath, drs) # do time gating only for non-fx variables if variable['frequency'] != 'fx': + if 'all_years' in variable: + variable = _update_output_file(variable, files) files = select_files(files, variable['start_year'], variable['end_year']) return (files, dirnames, filenames) @@ -275,7 +287,7 @@ def get_output_file(variable, preproc_dir): variable['variable_group'], _replace_tags(cfg['output_file'], variable)[0], ) - if variable['frequency'] != 'fx': + if variable['frequency'] != 'fx' and 'all_years' not in variable: outfile += '_{start_year}-{end_year}'.format(**variable) outfile += '.nc' return outfile diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index bf536c84cc..52e6a51ba2 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -1105,6 +1105,10 @@ def _initialize_variables(self, raw_variable, raw_datasets): activity = get_activity(variable) if activity: variable['activity'] = activity + if 'all_years' in variable: + if variable['all_years']: + required_keys.discard('start_year') + required_keys.discard('end_year') if 'sub_experiment' in variable: subexperiment_keys = deepcopy(required_keys) subexperiment_keys.update({'sub_experiment'}) diff --git a/esmvalcore/_recipe_checks.py b/esmvalcore/_recipe_checks.py index 21cff6a4a9..81a539e9cd 100644 --- a/esmvalcore/_recipe_checks.py +++ b/esmvalcore/_recipe_checks.py @@ -127,7 +127,7 @@ def data_availability(input_files, var, dirnames, filenames): f"Missing data for {var['alias']}: {var['short_name']}") # check time avail only for non-fx variables - if var['frequency'] == 'fx': + if var['frequency'] == 'fx' or 'all_years' in var: return required_years = set(range(var['start_year'], var['end_year'] + 1)) diff --git a/esmvalcore/recipe_schema.yml b/esmvalcore/recipe_schema.yml index 266f8b9f63..186a4976ba 100644 --- a/esmvalcore/recipe_schema.yml +++ b/esmvalcore/recipe_schema.yml @@ -41,7 +41,7 @@ variable: alternative_dataset: str(required=False) fx_files: list(required=False) additional_datasets: list(include('dataset'), required=False) - + all_years: bool(required=False) # TODO: add preprocessor item diagnostic: From 3b01f8b50a9886ff1164fa6be5ceb64849cde223 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Tue, 11 May 2021 14:32:06 +0200 Subject: [PATCH 19/20] Add_tests --- tests/integration/test_recipe.py | 54 ++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 81e3ef7720..14c4d36b63 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -632,6 +632,60 @@ def test_empty_variable(tmp_path, patched_datafinder, config_user): assert product.attributes['dataset'] == 'CanESM2' +def test_all_years_tag(tmp_path, patched_datafinder, config_user): + """Test all_years tag for time-dependent variables.""" + content = dedent(""" + diagnostics: + diagnostic_name: + additional_datasets: + - dataset: CanESM2 + project: CMIP5 + mip: Amon + exp: historical + all_years: True + ensemble: r1i1p1 + variables: + pr: + scripts: null + """) + + recipe = get_recipe(tmp_path, content, config_user) + assert len(recipe.tasks) == 1 + task = recipe.tasks.pop() + assert len(task.products) == 1 + product = task.products.pop() + assert product.attributes['short_name'] == 'pr' + assert product.attributes['dataset'] == 'CanESM2' + assert '1990-2019' in product.filename + + +def test_fx_all_years_tag(tmp_path, patched_datafinder, config_user): + """Test all_years tag does not break time-independent variables.""" + content = dedent(""" + diagnostics: + diagnostic_name: + additional_datasets: + - dataset: CanESM2 + project: CMIP5 + mip: fx + exp: historical + all_years: True + ensemble: r1i1p1 + variables: + areacella: + scripts: null + """) + + recipe = get_recipe(tmp_path, content, config_user) + assert len(recipe.tasks) == 1 + task = recipe.tasks.pop() + assert len(task.products) == 1 + product = task.products.pop() + assert product.attributes['short_name'] == 'pr' + assert product.attributes['dataset'] == 'CanESM2' + assert '1990-2019' not in product.filename + + def test_cmip3_variable_autocomplete(tmp_path, patched_datafinder, config_user): """Test that required information is automatically added for CMIP5.""" From 0f0ddf503cc7798c84ccfd16c6e8dad3ea895d40 Mon Sep 17 00:00:00 2001 From: sloosvel Date: Tue, 11 May 2021 14:43:32 +0200 Subject: [PATCH 20/20] Fix tests --- esmvalcore/_data_finder.py | 1 + tests/integration/test_recipe.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py index b2022e30b2..ebd16b9973 100644 --- a/esmvalcore/_data_finder.py +++ b/esmvalcore/_data_finder.py @@ -246,6 +246,7 @@ def _update_output_file(variable, files): variable['filename'] = filename return variable + def _find_input_files(variable, rootpath, drs): short_name = variable['short_name'] variable['short_name'] = variable['original_short_name'] diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 14c4d36b63..4efc58c042 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -681,7 +681,7 @@ def test_fx_all_years_tag(tmp_path, patched_datafinder, config_user): task = recipe.tasks.pop() assert len(task.products) == 1 product = task.products.pop() - assert product.attributes['short_name'] == 'pr' + assert product.attributes['short_name'] == 'areacella' assert product.attributes['dataset'] == 'CanESM2' assert '1990-2019' not in product.filename