From 70f3b87070575ebd9760e6e404581dfca742598b Mon Sep 17 00:00:00 2001
From: sloosvel <saskia.loosveldt@bsc.es>
Date: Mon, 11 May 2020 16:37:07 +0200
Subject: [PATCH 01/20] First attempte

---
 esmvalcore/_data_finder.py   | 16 +++++++++++++---
 esmvalcore/_recipe.py        | 15 ++++++++-------
 esmvalcore/_recipe_checks.py |  2 +-
 3 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py
index 7a27dee4f6..82ce815d39 100644
--- a/esmvalcore/_data_finder.py
+++ b/esmvalcore/_data_finder.py
@@ -93,19 +93,21 @@ def _replace_tags(path, variable):
     """Replace tags in the config-developer's file with actual values."""
     path = path.strip('/')
     tlist = re.findall(r'{([^}]*)}', path)
+    if 'startdate' in variable:
+        path = re.sub(r'(\b{ensemble}\b)', r'{startdate}-\1', path)
+        tlist.append('startdate')
     paths = [path]
     for tag in tlist:
         original_tag = tag
         tag, _, _ = _get_caps_options(tag)
 
         if tag == 'latestversion':  # handled separately later
-            continue
+            continue   
         if tag in variable:
             replacewith = variable[tag]
         else:
             raise KeyError("Dataset key {} must be specified for {}, check "
                            "your recipe entry".format(tag, variable))
-
         paths = _replace_tag(paths, original_tag, replacewith)
     return paths
 
@@ -233,8 +235,16 @@ def get_input_filelist(variable, rootpath, drs):
     if variable['project'] == 'CMIP5' and variable['frequency'] == 'fx':
         variable['ensemble'] = 'r0i0p0'
     (files, dirnames, filenames) = _find_input_files(variable, rootpath, drs)
+    if 'startdate' in variable:
+        # update start and end years, move to new function?
+        intervals = [get_start_end_year(name) for name in files]
+        variable['start_year'] = min(intervals)[0]
+        variable['end_year'] = max(intervals)[1]
+        # best way to write this?
+        variable['filename'] = re.sub('\d\d\d\d-\d\d\d\d', str(variable['start_year'])+'-'+str(variable['end_year']), variable['filename'])
+          
     # do time gating only for non-fx variables
-    if variable['frequency'] != 'fx':
+    if variable['frequency'] != 'fx' or 'startdate' not in variable:
         files = select_files(files, variable['start_year'],
                              variable['end_year'])
     return (files, dirnames, filenames)
diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py
index 27e988d514..a8f8925866 100644
--- a/esmvalcore/_recipe.py
+++ b/esmvalcore/_recipe.py
@@ -1007,27 +1007,27 @@ def _initialize_datasets(raw_datasets):
         return datasets
 
     @staticmethod
-    def _expand_ensemble(variables):
+    def _expand_tag(variables, input_tag):
         """
-        Expand ensemble members to multiple datasets.
+        Expand tags such as ensemble members or stardates to multiple datasets.
 
         Expansion only supports ensembles defined as strings, not lists.
         """
         expanded = []
         regex = re.compile(r'\(\d+:\d+\)')
         for variable in variables:
-            ensemble = variable.get('ensemble', "")
-            if not isinstance(ensemble, str):
+            tag = variable.get(input_tag, "")
+            if not isinstance(tag, str):
                 expanded.append(variable)
                 continue
-            match = regex.search(ensemble)
+            match = regex.search(tag)
             if not match:
                 expanded.append(variable)
                 continue
             start, end = match.group(0)[1:-1].split(':')
             for i in range(int(start), int(end) + 1):
                 expand = deepcopy(variable)
-                expand['ensemble'] = regex.sub(str(i), ensemble, 1)
+                expand[input_tag] = regex.sub(str(i), tag, 1)
                 expanded.append(expand)
         return expanded
 
@@ -1071,7 +1071,8 @@ def _initialize_variables(self, raw_variable, raw_datasets):
                 if activity:
                     variable['activity'] = activity
             check.variable(variable, required_keys)
-        variables = self._expand_ensemble(variables)
+        variables = self._expand_tag(variables, 'ensemble')
+        variables = self._expand_tag(variables, 'startdate')
         return variables
 
     def _initialize_preprocessor_output(self, diagnostic_name, raw_variables,
diff --git a/esmvalcore/_recipe_checks.py b/esmvalcore/_recipe_checks.py
index ddb2296e7f..8fbc0263dc 100644
--- a/esmvalcore/_recipe_checks.py
+++ b/esmvalcore/_recipe_checks.py
@@ -118,7 +118,7 @@ def data_availability(input_files, var, dirnames, filenames):
         raise RecipeError("Missing data")
 
     # check time avail only for non-fx variables
-    if var['frequency'] == 'fx':
+    if var['frequency'] == 'fx' or 'startdate' in var:
         return
 
     required_years = set(range(var['start_year'], var['end_year'] + 1))

From f5d4e05c8455878e670f0346a765b332eb00a03d Mon Sep 17 00:00:00 2001
From: sloosvel <saskia.loosveldt@bsc.es>
Date: Tue, 12 May 2020 11:41:25 +0200
Subject: [PATCH 02/20] Do not require start and end years, add them later

---
 esmvalcore/_data_finder.py | 17 ++++++++++-------
 esmvalcore/_recipe.py      |  9 ++++++++-
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py
index 82ce815d39..de2000987d 100644
--- a/esmvalcore/_data_finder.py
+++ b/esmvalcore/_data_finder.py
@@ -219,6 +219,14 @@ def _get_filenames_glob(variable, drs):
     filenames_glob = _replace_tags(path_template, variable)
     return filenames_glob
 
+def _update_output_file(variable, files):
+    intervals = [get_start_end_year(name) for name in files]
+    variable.update({'start_year': min(intervals)[0]})
+    variable.update({'end_year': max(intervals)[1]})
+    filename = variable['filename'].replace('.nc', '_{start_year}-{end_year}.nc'.format(**variable))
+    variable['filename'] = filename
+    return variable
+
 
 def _find_input_files(variable, rootpath, drs):
     input_dirs = _find_input_dirs(variable, rootpath, drs)
@@ -236,12 +244,7 @@ def get_input_filelist(variable, rootpath, drs):
         variable['ensemble'] = 'r0i0p0'
     (files, dirnames, filenames) = _find_input_files(variable, rootpath, drs)
     if 'startdate' in variable:
-        # update start and end years, move to new function?
-        intervals = [get_start_end_year(name) for name in files]
-        variable['start_year'] = min(intervals)[0]
-        variable['end_year'] = max(intervals)[1]
-        # best way to write this?
-        variable['filename'] = re.sub('\d\d\d\d-\d\d\d\d', str(variable['start_year'])+'-'+str(variable['end_year']), variable['filename'])
+        variable = _update_output_file(variable, files)
           
     # do time gating only for non-fx variables
     if variable['frequency'] != 'fx' or 'startdate' not in variable:
@@ -265,7 +268,7 @@ def get_output_file(variable, preproc_dir):
         variable['variable_group'],
         _replace_tags(cfg['output_file'], variable)[0],
     )
-    if variable['frequency'] != 'fx':
+    if variable['frequency'] != 'fx' and 'startdate' not in variable:
         outfile += '_{start_year}-{end_year}'.format(**variable)
     outfile += '.nc'
     return outfile
diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py
index a8f8925866..09390cae72 100644
--- a/esmvalcore/_recipe.py
+++ b/esmvalcore/_recipe.py
@@ -1070,7 +1070,14 @@ def _initialize_variables(self, raw_variable, raw_datasets):
                 activity = get_activity(variable)
                 if activity:
                     variable['activity'] = activity
-            check.variable(variable, required_keys)
+            if 'startdate' in variable:
+                startdate_keys = deepcopy(required_keys)
+                startdate_keys.update({'startdate'})
+                startdate_keys.remove('start_year')
+                startdate_keys.remove('end_year')
+                check.variable(variable, startdate_keys)
+            else:
+                check.variable(variable, required_keys)
         variables = self._expand_tag(variables, 'ensemble')
         variables = self._expand_tag(variables, 'startdate')
         return variables

From c73eb4ee6e1de893c8b47fd37af1f4434766f544 Mon Sep 17 00:00:00 2001
From: sloosvel <saskia.loosveldt@bsc.es>
Date: Wed, 13 May 2020 11:43:51 +0200
Subject: [PATCH 03/20] Correct condition

---
 esmvalcore/_data_finder.py | 15 +++++++++------
 esmvalcore/_recipe.py      |  6 ++++--
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py
index de2000987d..f67540a1a4 100644
--- a/esmvalcore/_data_finder.py
+++ b/esmvalcore/_data_finder.py
@@ -102,7 +102,7 @@ def _replace_tags(path, variable):
         tag, _, _ = _get_caps_options(tag)
 
         if tag == 'latestversion':  # handled separately later
-            continue   
+            continue
         if tag in variable:
             replacewith = variable[tag]
         else:
@@ -219,11 +219,14 @@ def _get_filenames_glob(variable, drs):
     filenames_glob = _replace_tags(path_template, variable)
     return filenames_glob
 
+
 def _update_output_file(variable, files):
     intervals = [get_start_end_year(name) for name in files]
     variable.update({'start_year': min(intervals)[0]})
     variable.update({'end_year': max(intervals)[1]})
-    filename = variable['filename'].replace('.nc', '_{start_year}-{end_year}.nc'.format(**variable))
+    filename = variable['filename'].replace(
+        '.nc', '_{start_year}-{end_year}.nc'.format(**variable)
+    )
     variable['filename'] = filename
     return variable
 
@@ -245,11 +248,11 @@ def get_input_filelist(variable, rootpath, drs):
     (files, dirnames, filenames) = _find_input_files(variable, rootpath, drs)
     if 'startdate' in variable:
         variable = _update_output_file(variable, files)
-          
     # do time gating only for non-fx variables
-    if variable['frequency'] != 'fx' or 'startdate' not in variable:
-        files = select_files(files, variable['start_year'],
-                             variable['end_year'])
+    if variable['frequency'] != 'fx':
+        if 'startdate' not in variable:
+            files = select_files(files, variable['start_year'],
+                                 variable['end_year'])
     return (files, dirnames, filenames)
 
 
diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py
index 09390cae72..dce43f178d 100644
--- a/esmvalcore/_recipe.py
+++ b/esmvalcore/_recipe.py
@@ -1073,8 +1073,10 @@ def _initialize_variables(self, raw_variable, raw_datasets):
             if 'startdate' in variable:
                 startdate_keys = deepcopy(required_keys)
                 startdate_keys.update({'startdate'})
-                startdate_keys.remove('start_year')
-                startdate_keys.remove('end_year')
+                if 'star_year' in variable:
+                    startdate_keys.remove('start_year')
+                if 'end_year' in variable:
+                    startdate_keys.remove('end_year')
                 check.variable(variable, startdate_keys)
             else:
                 check.variable(variable, required_keys)

From 0d30f7aeb984bf08aa5cf07786fa58487ba2be8c Mon Sep 17 00:00:00 2001
From: sloosvel <saskia.loosveldt@bsc.es>
Date: Wed, 13 May 2020 12:03:20 +0200
Subject: [PATCH 04/20] Avoid key error in fx variables

---
 esmvalcore/_recipe.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py
index dce43f178d..50bc76f98c 100644
--- a/esmvalcore/_recipe.py
+++ b/esmvalcore/_recipe.py
@@ -1073,10 +1073,11 @@ def _initialize_variables(self, raw_variable, raw_datasets):
             if 'startdate' in variable:
                 startdate_keys = deepcopy(required_keys)
                 startdate_keys.update({'startdate'})
-                if 'star_year' in variable:
+                try:
                     startdate_keys.remove('start_year')
-                if 'end_year' in variable:
                     startdate_keys.remove('end_year')
+                except KeyError:
+                    continue
                 check.variable(variable, startdate_keys)
             else:
                 check.variable(variable, required_keys)

From 1771c6b74dd66aa0fbd4c4ce0cb5be5a7546d76a Mon Sep 17 00:00:00 2001
From: sloosvel <saskia.loosveldt@bsc.es>
Date: Mon, 7 Sep 2020 17:20:24 +0200
Subject: [PATCH 05/20] Consider two possible paths

---
 esmvalcore/_data_finder.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py
index f67540a1a4..b28d1a9b3a 100644
--- a/esmvalcore/_data_finder.py
+++ b/esmvalcore/_data_finder.py
@@ -94,9 +94,13 @@ def _replace_tags(path, variable):
     path = path.strip('/')
     tlist = re.findall(r'{([^}]*)}', path)
     if 'startdate' in variable:
-        path = re.sub(r'(\b{ensemble}\b)', r'{startdate}-\1', path)
+        paths = [
+            re.sub(r'(\b{ensemble}\b)', r'{startdate}-\1', path),
+            re.sub(r'({ensemble})', r'{startdate}-\1', path)
+            ]
         tlist.append('startdate')
-    paths = [path]
+    else:
+        paths = [path]
     for tag in tlist:
         original_tag = tag
         tag, _, _ = _get_caps_options(tag)
@@ -246,13 +250,12 @@ def get_input_filelist(variable, rootpath, drs):
     if variable['project'] == 'CMIP5' and variable['frequency'] == 'fx':
         variable['ensemble'] = 'r0i0p0'
     (files, dirnames, filenames) = _find_input_files(variable, rootpath, drs)
-    if 'startdate' in variable:
-        variable = _update_output_file(variable, files)
     # do time gating only for non-fx variables
     if variable['frequency'] != 'fx':
-        if 'startdate' not in variable:
-            files = select_files(files, variable['start_year'],
-                                 variable['end_year'])
+        if 'startdate' in variable:
+            variable = _update_output_file(variable, files)
+        files = select_files(files, variable['start_year'],
+                             variable['end_year'])
     return (files, dirnames, filenames)
 
 

From 6ad31e5aca0b985b9902789896b0598ee7838df0 Mon Sep 17 00:00:00 2001
From: sloosvel <saskia.loosveldt@bsc.es>
Date: Mon, 7 Sep 2020 17:45:25 +0200
Subject: [PATCH 06/20] Fix function name

---
 tests/unit/test_recipe.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/unit/test_recipe.py b/tests/unit/test_recipe.py
index 1ed1875926..c0e764a3bd 100644
--- a/tests/unit/test_recipe.py
+++ b/tests/unit/test_recipe.py
@@ -14,7 +14,7 @@ def test_expand_ensemble(self):
             },
         ]
 
-        expanded = Recipe._expand_ensemble(datasets)
+        expanded = Recipe._expand_tag(datasets, 'ensemble')
 
         ensembles = [
             'r1i2p3',
@@ -39,4 +39,4 @@ def test_expand_ensemble_nolist(self):
         ]
 
         with pytest.raises(RecipeError):
-            Recipe._expand_ensemble(datasets)
+            Recipe._expand_tag(datasets, 'ensembles')

From 811026fc72317806b6cb7833b831d125a31130aa Mon Sep 17 00:00:00 2001
From: sloosvel <saskia.loosveldt@bsc.es>
Date: Mon, 7 Sep 2020 17:51:04 +0200
Subject: [PATCH 07/20] Fix variable name

---
 tests/unit/test_recipe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/test_recipe.py b/tests/unit/test_recipe.py
index c0e764a3bd..f1d73862b9 100644
--- a/tests/unit/test_recipe.py
+++ b/tests/unit/test_recipe.py
@@ -39,4 +39,4 @@ def test_expand_ensemble_nolist(self):
         ]
 
         with pytest.raises(RecipeError):
-            Recipe._expand_tag(datasets, 'ensembles')
+            Recipe._expand_tag(datasets, 'ensemble')

From 96b236f4bead2fc309616ddfbbf39493bf9fb716 Mon Sep 17 00:00:00 2001
From: sloosvel <saskia.loosveldt@bsc.es>
Date: Fri, 13 Nov 2020 14:32:59 +0100
Subject: [PATCH 08/20] Avoid duplicates in filename

---
 esmvalcore/_data_finder.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py
index b28d1a9b3a..d9d55f397a 100644
--- a/esmvalcore/_data_finder.py
+++ b/esmvalcore/_data_finder.py
@@ -94,10 +94,10 @@ def _replace_tags(path, variable):
     path = path.strip('/')
     tlist = re.findall(r'{([^}]*)}', path)
     if 'startdate' in variable:
-        paths = [
+        paths = list(set([
             re.sub(r'(\b{ensemble}\b)', r'{startdate}-\1', path),
             re.sub(r'({ensemble})', r'{startdate}-\1', path)
-            ]
+            ]))
         tlist.append('startdate')
     else:
         paths = [path]

From ec10ee740559aecf04d2cb28f9f87052b3c0b3e0 Mon Sep 17 00:00:00 2001
From: Javier Vegas-Regidor <javier.vegas@bsc.es>
Date: Mon, 16 Nov 2020 11:52:45 +0100
Subject: [PATCH 09/20] Add test for startdate expansion

---
 tests/unit/test_recipe.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/tests/unit/test_recipe.py b/tests/unit/test_recipe.py
index f1d73862b9..dc64705473 100644
--- a/tests/unit/test_recipe.py
+++ b/tests/unit/test_recipe.py
@@ -29,6 +29,30 @@ def test_expand_ensemble(self):
         for i, ensemble in enumerate(ensembles):
             assert expanded[i] == {'dataset': 'XYZ', 'ensemble': ensemble}
 
+    def test_expand_startdate(self):
+
+        datasets = [
+            {
+                'dataset': 'XYZ',
+                'startdate': 's(1998:2005)',
+            },
+        ]
+
+        expanded = Recipe._expand_tag(datasets, 'startdate')
+
+        startdates = [
+            's1998',
+            's1999',
+            's2000',
+            's2001',
+            's2002',
+            's2003',
+            's2004',
+            's2005',
+        ]
+        for i, startdate in enumerate(startdates):
+            assert expanded[i] == {'dataset': 'XYZ', 'startdate': startdate}
+
     def test_expand_ensemble_nolist(self):
 
         datasets = [

From 401abdcafee055f4bfb4d4c68940aae5527ad01b Mon Sep 17 00:00:00 2001
From: Javier Vegas-Regidor <javier.vegas@bsc.es>
Date: Tue, 27 Apr 2021 11:46:43 +0200
Subject: [PATCH 10/20] Add test for the replace tags method

---
 tests/unit/data_finder/test_replace_tags.py | 55 +++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 tests/unit/data_finder/test_replace_tags.py

diff --git a/tests/unit/data_finder/test_replace_tags.py b/tests/unit/data_finder/test_replace_tags.py
new file mode 100644
index 0000000000..0b206952a4
--- /dev/null
+++ b/tests/unit/data_finder/test_replace_tags.py
@@ -0,0 +1,55 @@
+"""Unit tests for :func:`esmvalcore._data_finder.regrid._stock_cube`"""
+from esmvalcore._data_finder import _replace_tags
+
+VARIABLE = {
+    'project': 'CMIP6',
+    'dataset': 'ACCURATE-MODEL',
+    'activity': 'act',
+    'exp': 'experiment',
+    'institute': 'HMA',
+    'ensemble': 'r1i1p1f1',
+    'mip': 'Amon',
+    'short_name': 'tas',
+    'grid': 'gr',
+}
+
+
+def test_replace_tags():
+    """Tests for get_start_end_year function."""
+    path = _replace_tags(
+        '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/'
+        '{grid}/{latestversion}', VARIABLE)
+    input_file = _replace_tags(
+        '{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc', VARIABLE)
+    output_file = _replace_tags(
+        '{project}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}', VARIABLE)
+    assert path == [
+        'act/HMA/ACCURATE-MODEL/experiment/r1i1p1f1/Amon/tas/gr/'
+        '{latestversion}'
+    ]
+    assert input_file == ['tas_Amon_ACCURATE-MODEL_experiment_r1i1p1f1_gr*.nc']
+    assert output_file == ['CMIP6_ACCURATE-MODEL_Amon_experiment_r1i1p1f1_tas']
+
+
+def test_replace_tags_with_startdate():
+    """Tests for get_start_end_year function."""
+    variable = {'startdate': '199411', **VARIABLE}
+    path = _replace_tags(
+        '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/'
+        '{grid}/{latestversion}', variable)
+    input_file = _replace_tags(
+        '{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc', variable)
+    output_file = _replace_tags(
+        '{project}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}', variable)
+    assert path == [
+        'act/HMA/ACCURATE-MODEL/experiment/r1i1p1f1/Amon/tas/gr/'
+        '{latestversion}',
+        'act/HMA/ACCURATE-MODEL/experiment/199411-r1i1p1f1/Amon/tas/gr/'
+        '{latestversion}'
+    ]
+    assert input_file == [
+        'tas_Amon_ACCURATE-MODEL_experiment_199411-r1i1p1f1_gr*.nc'
+    ]
+    assert output_file == [
+        'CMIP6_ACCURATE-MODEL_Amon_experiment_199411-r1i1p1f1_tas'
+    ]

From dbb4b07419b9fdfc69244c0be5c7b63252a8981e Mon Sep 17 00:00:00 2001
From: sloosvel <saskia.loosveldt@bsc.es>
Date: Thu, 29 Apr 2021 13:53:26 +0200
Subject: [PATCH 11/20] Rename tag

---
 esmvalcore/_data_finder.py                  | 12 ++++++------
 esmvalcore/_recipe.py                       | 14 +++++++-------
 esmvalcore/_recipe_checks.py                |  2 +-
 tests/unit/data_finder/test_replace_tags.py |  4 ++--
 tests/unit/test_recipe.py                   | 13 +++++++------
 5 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py
index 9c690ea498..7bc6bc7a80 100644
--- a/esmvalcore/_data_finder.py
+++ b/esmvalcore/_data_finder.py
@@ -98,14 +98,14 @@ def _replace_tags(paths, variable):
     tlist = set()
     for path in paths:
         tlist = tlist.union(re.findall(r'{([^}]*)}', path))
-    if 'startdate' in variable:
+    if 'sub_experiment' in variable:
         new_paths = []
         for path in paths:
             new_paths.extend((
-                re.sub(r'(\b{ensemble}\b)', r'{startdate}-\1', path),
-                re.sub(r'({ensemble})', r'{startdate}-\1', path)
+                re.sub(r'(\b{ensemble}\b)', r'{sub_experiment}-\1', path),
+                re.sub(r'({ensemble})', r'{sub_experiment}-\1', path)
             ))
-            tlist.add('startdate')
+            tlist.add('sub_experiment')
         paths = new_paths
     logger.debug(tlist)
 
@@ -266,7 +266,7 @@ def get_input_filelist(variable, rootpath, drs):
     (files, dirnames, filenames) = _find_input_files(variable, rootpath, drs)
     # do time gating only for non-fx variables
     if variable['frequency'] != 'fx':
-        if 'startdate' in variable:
+        if 'sub_experiment' in variable:
             variable = _update_output_file(variable, files)
         files = select_files(files, variable['start_year'],
                              variable['end_year'])
@@ -288,7 +288,7 @@ def get_output_file(variable, preproc_dir):
         variable['variable_group'],
         _replace_tags(cfg['output_file'], variable)[0],
     )
-    if variable['frequency'] != 'fx' and 'startdate' not in variable:
+    if variable['frequency'] != 'fx' and 'sub_experiment' not in variable:
         outfile += '_{start_year}-{end_year}'.format(**variable)
     outfile += '.nc'
     return outfile
diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py
index df81be33b0..2df191bd8b 100644
--- a/esmvalcore/_recipe.py
+++ b/esmvalcore/_recipe.py
@@ -1075,19 +1075,19 @@ def _initialize_variables(self, raw_variable, raw_datasets):
                 activity = get_activity(variable)
                 if activity:
                     variable['activity'] = activity
-            if 'startdate' in variable:
-                startdate_keys = deepcopy(required_keys)
-                startdate_keys.update({'startdate'})
+            if 'sub_experiment' in variable:
+                subexperiment_keys = deepcopy(required_keys)
+                subexperiment_keys.update({'sub_experiment'})
                 try:
-                    startdate_keys.remove('start_year')
-                    startdate_keys.remove('end_year')
+                    subexperiment_keys.remove('start_year')
+                    subexperiment_keys.remove('end_year')
                 except KeyError:
                     continue
-                check.variable(variable, startdate_keys)
+                check.variable(variable, subexperiment_keys)
             else:
                 check.variable(variable, required_keys)
         variables = self._expand_tag(variables, 'ensemble')
-        variables = self._expand_tag(variables, 'startdate')
+        variables = self._expand_tag(variables, 'sub_experiment')
         return variables
 
     def _initialize_preprocessor_output(self, diagnostic_name, raw_variables,
diff --git a/esmvalcore/_recipe_checks.py b/esmvalcore/_recipe_checks.py
index e5111e42bc..9b9b47f8eb 100644
--- a/esmvalcore/_recipe_checks.py
+++ b/esmvalcore/_recipe_checks.py
@@ -127,7 +127,7 @@ def data_availability(input_files, var, dirnames, filenames):
             f"Missing data for {var['alias']}: {var['short_name']}")
 
     # check time avail only for non-fx variables
-    if var['frequency'] == 'fx' or 'startdate' in var:
+    if var['frequency'] == 'fx' or 'sub_experiment' in var:
         return
 
     required_years = set(range(var['start_year'], var['end_year'] + 1))
diff --git a/tests/unit/data_finder/test_replace_tags.py b/tests/unit/data_finder/test_replace_tags.py
index febd0b47fe..097cc7b328 100644
--- a/tests/unit/data_finder/test_replace_tags.py
+++ b/tests/unit/data_finder/test_replace_tags.py
@@ -41,9 +41,9 @@ def test_replace_tags_list_of_str():
                        ])
 
 
-def test_replace_tags_with_startdate():
+def test_replace_tags_with_subexperiment():
     """Tests for get_start_end_year function."""
-    variable = {'startdate': '199411', **VARIABLE}
+    variable = {'sub_experiment': '199411', **VARIABLE}
     path = _replace_tags(
         '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/'
         '{grid}/{latestversion}', variable)
diff --git a/tests/unit/test_recipe.py b/tests/unit/test_recipe.py
index dc64705473..bca2e0255b 100644
--- a/tests/unit/test_recipe.py
+++ b/tests/unit/test_recipe.py
@@ -29,18 +29,18 @@ def test_expand_ensemble(self):
         for i, ensemble in enumerate(ensembles):
             assert expanded[i] == {'dataset': 'XYZ', 'ensemble': ensemble}
 
-    def test_expand_startdate(self):
+    def test_expand_subexperiment(self):
 
         datasets = [
             {
                 'dataset': 'XYZ',
-                'startdate': 's(1998:2005)',
+                'sub_experiment': 's(1998:2005)',
             },
         ]
 
-        expanded = Recipe._expand_tag(datasets, 'startdate')
+        expanded = Recipe._expand_tag(datasets, 'sub_experiment')
 
-        startdates = [
+        subexperiments = [
             's1998',
             's1999',
             's2000',
@@ -50,8 +50,9 @@ def test_expand_startdate(self):
             's2004',
             's2005',
         ]
-        for i, startdate in enumerate(startdates):
-            assert expanded[i] == {'dataset': 'XYZ', 'startdate': startdate}
+        for i, subexperiment in enumerate(subexperiments):
+            assert expanded[i] == {'dataset': 'XYZ',
+                                   'sub_experiment': subexperiment}
 
     def test_expand_ensemble_nolist(self):
 

From 4296a74d6e3b93bae3a7b719e844a732002b89f2 Mon Sep 17 00:00:00 2001
From: sloosvel <saskia.loosveldt@bsc.es>
Date: Mon, 3 May 2021 12:17:56 +0200
Subject: [PATCH 12/20] Add documentation

---
 doc/recipe/overview.rst | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/doc/recipe/overview.rst b/doc/recipe/overview.rst
index 7798878620..6080f9c2a4 100644
--- a/doc/recipe/overview.rst
+++ b/doc/recipe/overview.rst
@@ -82,10 +82,12 @@ data specifications:
   ``RCP8.5``)
 - mip (for CMIP data, key ``mip``, value e.g. ``Amon``, ``Omon``, ``LImon``)
 - ensemble member (key ``ensemble``, value e.g. ``r1i1p1``, ``r1i1p1f1``)
+- sub experiment (key `sub_experiment`, value e.g. `s2000`, `s(2000:2010)`, 
+  for DCPP data only)
 - time range (e.g. key-value ``start_year: 1982``, ``end_year: 1990``. Please
   note that `yaml`_ interprets numbers with a leading ``0`` as octal numbers,
   so we recommend to avoid them. For example, use ``128`` to specify the year
-  128 instead of ``0128``.)
+  128 instead of ``0128``. The time range is not needed for DCPP data.)
 - model grid (native grid ``grid: gn`` or regridded grid ``grid: gr``, for
   CMIP6 data only).
 
@@ -97,6 +99,7 @@ For example, a datasets section could be:
       - {dataset: CanESM2, project: CMIP5, exp: historical, ensemble: r1i1p1, start_year: 2001, end_year: 2004}
       - {dataset: UKESM1-0-LL, project: CMIP6, exp: historical, ensemble: r1i1p1f2, start_year: 2001, end_year: 2004, grid: gn}
       - {dataset: EC-EARTH3, alias: custom_alias, project: CMIP6, exp: historical, ensemble: r1i1p1f1, start_year: 2001, end_year: 2004, grid: gn}
+      - {dataset: HadGEM3-GC31-MM, alias: custom_alias, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: s2000, grid: gn}
 
 It is possible to define the experiment as a list to concatenate two experiments.
 Here it is an example concatenating the `historical` experiment with `rcp85`
@@ -114,7 +117,7 @@ In this case, the specified datasets are concatenated into a single cube:
     datasets:
       - {dataset: CanESM2, project: CMIP5, exp: [historical, rcp85], ensemble: [r1i1p1, r1i2p1], start_year: 2001, end_year: 2004}
 
-ESMValTool also supports a simplified syntax to add multiple ensemble members from the same dataset.
+ESMValTool also supports a simplified syntax to add multiple ensemble members and sub experiment ids from the same dataset.
 In the ensemble key, any element in the form `(x:y)` will be replaced with all numbers from x to y (both inclusive),
 adding a dataset entry for each replacement. For example, to add ensemble members r1i1p1 to r10i1p1
 you can use the following abbreviated syntax:
@@ -123,6 +126,7 @@ you can use the following abbreviated syntax:
 
     datasets:
       - {dataset: CanESM2, project: CMIP5, exp: historical, ensemble: "r(1:10)i1p1", start_year: 2001, end_year: 2004}
+      - {dataset: MIROC6, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: s(2000:2010), grid: gn}
 
 It can be included multiple times in one definition. For example, to generate the datasets definitions
 for the ensemble members r1i1p1 to r5i1p1 and from r1i2p1 to r5i1p1 you can use:

From af7c285c46a3a6cf3bf96419232ed0500b9305dc Mon Sep 17 00:00:00 2001
From: sloosvel <saskia.loosveldt@bsc.es>
Date: Mon, 10 May 2021 17:30:42 +0200
Subject: [PATCH 13/20] Allow to load subexps per timerange or as a whole

---
 doc/recipe/overview.rst      | 18 ++++++++++++++----
 esmvalcore/_data_finder.py   |  4 ++--
 esmvalcore/_recipe.py        |  9 ++++-----
 esmvalcore/recipe_schema.yml |  1 +
 4 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/doc/recipe/overview.rst b/doc/recipe/overview.rst
index 6080f9c2a4..fa71b7ccf4 100644
--- a/doc/recipe/overview.rst
+++ b/doc/recipe/overview.rst
@@ -87,7 +87,7 @@ data specifications:
 - time range (e.g. key-value ``start_year: 1982``, ``end_year: 1990``. Please
   note that `yaml`_ interprets numbers with a leading ``0`` as octal numbers,
   so we recommend to avoid them. For example, use ``128`` to specify the year
-  128 instead of ``0128``. The time range is not needed for DCPP data.)
+  128 instead of ``0128``.)
 - model grid (native grid ``grid: gn`` or regridded grid ``grid: gr``, for
   CMIP6 data only).
 
@@ -99,7 +99,7 @@ For example, a datasets section could be:
       - {dataset: CanESM2, project: CMIP5, exp: historical, ensemble: r1i1p1, start_year: 2001, end_year: 2004}
       - {dataset: UKESM1-0-LL, project: CMIP6, exp: historical, ensemble: r1i1p1f2, start_year: 2001, end_year: 2004, grid: gn}
       - {dataset: EC-EARTH3, alias: custom_alias, project: CMIP6, exp: historical, ensemble: r1i1p1f1, start_year: 2001, end_year: 2004, grid: gn}
-      - {dataset: HadGEM3-GC31-MM, alias: custom_alias, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: s2000, grid: gn}
+      - {dataset: HadGEM3-GC31-MM, alias: custom_alias, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: s2000, grid: gn, start_year: 2000, end_year, 2002}
 
 It is possible to define the experiment as a list to concatenate two experiments.
 Here it is an example concatenating the `historical` experiment with `rcp85`
@@ -117,9 +117,9 @@ In this case, the specified datasets are concatenated into a single cube:
     datasets:
       - {dataset: CanESM2, project: CMIP5, exp: [historical, rcp85], ensemble: [r1i1p1, r1i2p1], start_year: 2001, end_year: 2004}
 
-ESMValTool also supports a simplified syntax to add multiple ensemble members and sub experiment ids from the same dataset.
+ESMValTool also supports a simplified syntax to add multiple ensemble members.
 In the ensemble key, any element in the form `(x:y)` will be replaced with all numbers from x to y (both inclusive),
-adding a dataset entry for each replacement. For example, to add ensemble members r1i1p1 to r10i1p1
+adding a dataset entry for each replacement. For example, to add ensemble members r1i1p1 to r10i1p1 
 you can use the following abbreviated syntax:
 
 .. code-block:: yaml
@@ -140,6 +140,16 @@ Please, bear in mind that this syntax can only be used in the ensemble tag.
 Also, note that the combination of multiple experiments and ensembles, like
 exp: [historical, rcp85], ensemble: [r1i1p1, "r(2:3)i1p1"] is not supported and will raise an error.
 
+The same simplified syntax can be used to add multiple sub-experiment ids in combination with the tag `all_years: True`.
+This configuration will load all the available years for the sub-experiment, without having to specify 
+the `start_year` and `end_year` for each one of the ids:
+
+.. code-block:: yaml
+
+    datasets:
+      - {dataset: MIROC6, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: s(2000:2010), grid: gn, all_years: True}
+
+
 Note that this section is not required, as datasets can also be provided in the
 Diagnostics_ section.
 
diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py
index 7bc6bc7a80..ebd16b9973 100644
--- a/esmvalcore/_data_finder.py
+++ b/esmvalcore/_data_finder.py
@@ -266,7 +266,7 @@ def get_input_filelist(variable, rootpath, drs):
     (files, dirnames, filenames) = _find_input_files(variable, rootpath, drs)
     # do time gating only for non-fx variables
     if variable['frequency'] != 'fx':
-        if 'sub_experiment' in variable:
+        if 'all_years' in variable:
             variable = _update_output_file(variable, files)
         files = select_files(files, variable['start_year'],
                              variable['end_year'])
@@ -288,7 +288,7 @@ def get_output_file(variable, preproc_dir):
         variable['variable_group'],
         _replace_tags(cfg['output_file'], variable)[0],
     )
-    if variable['frequency'] != 'fx' and 'sub_experiment' not in variable:
+    if variable['frequency'] != 'fx' and 'all_years' not in variable:
         outfile += '_{start_year}-{end_year}'.format(**variable)
     outfile += '.nc'
     return outfile
diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py
index 2df191bd8b..71ac3da6f8 100644
--- a/esmvalcore/_recipe.py
+++ b/esmvalcore/_recipe.py
@@ -1078,11 +1078,10 @@ def _initialize_variables(self, raw_variable, raw_datasets):
             if 'sub_experiment' in variable:
                 subexperiment_keys = deepcopy(required_keys)
                 subexperiment_keys.update({'sub_experiment'})
-                try:
-                    subexperiment_keys.remove('start_year')
-                    subexperiment_keys.remove('end_year')
-                except KeyError:
-                    continue
+                if 'all_years' in variable:
+                    if variable['all_years']:
+                        subexperiment_keys.discard('start_year')
+                        subexperiment_keys.discard('end_year')
                 check.variable(variable, subexperiment_keys)
             else:
                 check.variable(variable, required_keys)
diff --git a/esmvalcore/recipe_schema.yml b/esmvalcore/recipe_schema.yml
index 266f8b9f63..a3d3255257 100644
--- a/esmvalcore/recipe_schema.yml
+++ b/esmvalcore/recipe_schema.yml
@@ -41,6 +41,7 @@ variable:
   alternative_dataset: str(required=False)
   fx_files: list(required=False)
   additional_datasets: list(include('dataset'), required=False)
+  all_years: bool(required=False)
 
 # TODO: add preprocessor item
 

From 68ff78306ad5cc64fd7486f0ef411a17a703e6ff Mon Sep 17 00:00:00 2001
From: sloosvel <saskia.loosveldt@bsc.es>
Date: Mon, 10 May 2021 18:46:01 +0200
Subject: [PATCH 14/20] Fix condition

---
 esmvalcore/_recipe_checks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/esmvalcore/_recipe_checks.py b/esmvalcore/_recipe_checks.py
index 9b9b47f8eb..81a539e9cd 100644
--- a/esmvalcore/_recipe_checks.py
+++ b/esmvalcore/_recipe_checks.py
@@ -127,7 +127,7 @@ def data_availability(input_files, var, dirnames, filenames):
             f"Missing data for {var['alias']}: {var['short_name']}")
 
     # check time avail only for non-fx variables
-    if var['frequency'] == 'fx' or 'sub_experiment' in var:
+    if var['frequency'] == 'fx' or 'all_years' in var:
         return
 
     required_years = set(range(var['start_year'], var['end_year'] + 1))

From a303e852e8e4fe6921a1afbe8d5826d1b97dc173 Mon Sep 17 00:00:00 2001
From: sloosvel <saskia.loosveldt@bsc.es>
Date: Tue, 11 May 2021 10:58:31 +0200
Subject: [PATCH 15/20] Remove 'all_years' functionality

---
 doc/recipe/overview.rst      |  9 +++------
 esmvalcore/_data_finder.py   | 14 +-------------
 esmvalcore/_recipe.py        |  4 ----
 esmvalcore/_recipe_checks.py |  2 +-
 esmvalcore/recipe_schema.yml |  1 -
 5 files changed, 5 insertions(+), 25 deletions(-)

diff --git a/doc/recipe/overview.rst b/doc/recipe/overview.rst
index fa71b7ccf4..f7afc1a0ae 100644
--- a/doc/recipe/overview.rst
+++ b/doc/recipe/overview.rst
@@ -82,7 +82,7 @@ data specifications:
   ``RCP8.5``)
 - mip (for CMIP data, key ``mip``, value e.g. ``Amon``, ``Omon``, ``LImon``)
 - ensemble member (key ``ensemble``, value e.g. ``r1i1p1``, ``r1i1p1f1``)
-- sub experiment (key `sub_experiment`, value e.g. `s2000`, `s(2000:2010)`, 
+- sub-experiment id (key `sub_experiment`, value e.g. `s2000`, `s(2000:2002)`, 
   for DCPP data only)
 - time range (e.g. key-value ``start_year: 1982``, ``end_year: 1990``. Please
   note that `yaml`_ interprets numbers with a leading ``0`` as octal numbers,
@@ -126,7 +126,6 @@ you can use the following abbreviated syntax:
 
     datasets:
       - {dataset: CanESM2, project: CMIP5, exp: historical, ensemble: "r(1:10)i1p1", start_year: 2001, end_year: 2004}
-      - {dataset: MIROC6, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: s(2000:2010), grid: gn}
 
 It can be included multiple times in one definition. For example, to generate the datasets definitions
 for the ensemble members r1i1p1 to r5i1p1 and from r1i2p1 to r5i1p1 you can use:
@@ -140,14 +139,12 @@ Please, bear in mind that this syntax can only be used in the ensemble tag.
 Also, note that the combination of multiple experiments and ensembles, like
 exp: [historical, rcp85], ensemble: [r1i1p1, "r(2:3)i1p1"] is not supported and will raise an error.
 
-The same simplified syntax can be used to add multiple sub-experiment ids in combination with the tag `all_years: True`.
-This configuration will load all the available years for the sub-experiment, without having to specify 
-the `start_year` and `end_year` for each one of the ids:
+The same simplified syntax can be used to add multiple sub-experiment ids:
 
 .. code-block:: yaml
 
     datasets:
-      - {dataset: MIROC6, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: s(2000:2010), grid: gn, all_years: True}
+      - {dataset: MIROC6, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: s(2000:2002), grid: gn, start_year: 2003, end_year: 2004}
 
 
 Note that this section is not required, as datasets can also be provided in the
diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py
index ebd16b9973..a756279097 100644
--- a/esmvalcore/_data_finder.py
+++ b/esmvalcore/_data_finder.py
@@ -236,16 +236,6 @@ def _get_filenames_glob(variable, drs):
     return filenames_glob
 
 
-def _update_output_file(variable, files):
-    intervals = [get_start_end_year(name) for name in files]
-    variable.update({'start_year': min(intervals)[0]})
-    variable.update({'end_year': max(intervals)[1]})
-    filename = variable['filename'].replace(
-        '.nc', '_{start_year}-{end_year}.nc'.format(**variable)
-    )
-    variable['filename'] = filename
-    return variable
-
 
 def _find_input_files(variable, rootpath, drs):
     short_name = variable['short_name']
@@ -266,8 +256,6 @@ def get_input_filelist(variable, rootpath, drs):
     (files, dirnames, filenames) = _find_input_files(variable, rootpath, drs)
     # do time gating only for non-fx variables
     if variable['frequency'] != 'fx':
-        if 'all_years' in variable:
-            variable = _update_output_file(variable, files)
         files = select_files(files, variable['start_year'],
                              variable['end_year'])
     return (files, dirnames, filenames)
@@ -288,7 +276,7 @@ def get_output_file(variable, preproc_dir):
         variable['variable_group'],
         _replace_tags(cfg['output_file'], variable)[0],
     )
-    if variable['frequency'] != 'fx' and 'all_years' not in variable:
+    if variable['frequency'] != 'fx' not in variable:
         outfile += '_{start_year}-{end_year}'.format(**variable)
     outfile += '.nc'
     return outfile
diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py
index 71ac3da6f8..46e231038b 100644
--- a/esmvalcore/_recipe.py
+++ b/esmvalcore/_recipe.py
@@ -1078,10 +1078,6 @@ def _initialize_variables(self, raw_variable, raw_datasets):
             if 'sub_experiment' in variable:
                 subexperiment_keys = deepcopy(required_keys)
                 subexperiment_keys.update({'sub_experiment'})
-                if 'all_years' in variable:
-                    if variable['all_years']:
-                        subexperiment_keys.discard('start_year')
-                        subexperiment_keys.discard('end_year')
                 check.variable(variable, subexperiment_keys)
             else:
                 check.variable(variable, required_keys)
diff --git a/esmvalcore/_recipe_checks.py b/esmvalcore/_recipe_checks.py
index 81a539e9cd..cd41e0f648 100644
--- a/esmvalcore/_recipe_checks.py
+++ b/esmvalcore/_recipe_checks.py
@@ -127,7 +127,7 @@ def data_availability(input_files, var, dirnames, filenames):
             f"Missing data for {var['alias']}: {var['short_name']}")
 
     # check time avail only for non-fx variables
-    if var['frequency'] == 'fx' or 'all_years' in var:
+    if var['frequency'] == 'fx' in var:
         return
 
     required_years = set(range(var['start_year'], var['end_year'] + 1))
diff --git a/esmvalcore/recipe_schema.yml b/esmvalcore/recipe_schema.yml
index a3d3255257..266f8b9f63 100644
--- a/esmvalcore/recipe_schema.yml
+++ b/esmvalcore/recipe_schema.yml
@@ -41,7 +41,6 @@ variable:
   alternative_dataset: str(required=False)
   fx_files: list(required=False)
   additional_datasets: list(include('dataset'), required=False)
-  all_years: bool(required=False)
 
 # TODO: add preprocessor item
 

From 1737bcb33b2569bf28f140dc92d381e60b240dc3 Mon Sep 17 00:00:00 2001
From: sloosvel <saskia.loosveldt@bsc.es>
Date: Tue, 11 May 2021 11:26:19 +0200
Subject: [PATCH 16/20] Fix conditions

---
 esmvalcore/_data_finder.py   | 2 +-
 esmvalcore/_recipe_checks.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py
index a756279097..44f234c7c4 100644
--- a/esmvalcore/_data_finder.py
+++ b/esmvalcore/_data_finder.py
@@ -276,7 +276,7 @@ def get_output_file(variable, preproc_dir):
         variable['variable_group'],
         _replace_tags(cfg['output_file'], variable)[0],
     )
-    if variable['frequency'] != 'fx' not in variable:
+    if variable['frequency'] != 'fx':
         outfile += '_{start_year}-{end_year}'.format(**variable)
     outfile += '.nc'
     return outfile
diff --git a/esmvalcore/_recipe_checks.py b/esmvalcore/_recipe_checks.py
index cd41e0f648..21cff6a4a9 100644
--- a/esmvalcore/_recipe_checks.py
+++ b/esmvalcore/_recipe_checks.py
@@ -127,7 +127,7 @@ def data_availability(input_files, var, dirnames, filenames):
             f"Missing data for {var['alias']}: {var['short_name']}")
 
     # check time avail only for non-fx variables
-    if var['frequency'] == 'fx' in var:
+    if var['frequency'] == 'fx':
         return
 
     required_years = set(range(var['start_year'], var['end_year'] + 1))

From e0f40cbe37f689e66fb748f400ff1c93e4fa01b4 Mon Sep 17 00:00:00 2001
From: sloosvel <saskia.loosveldt@bsc.es>
Date: Tue, 11 May 2021 11:26:54 +0200
Subject: [PATCH 17/20] Fix flake

---
 esmvalcore/_data_finder.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py
index 44f234c7c4..8c700b2210 100644
--- a/esmvalcore/_data_finder.py
+++ b/esmvalcore/_data_finder.py
@@ -236,7 +236,6 @@ def _get_filenames_glob(variable, drs):
     return filenames_glob
 
 
-
 def _find_input_files(variable, rootpath, drs):
     short_name = variable['short_name']
     variable['short_name'] = variable['original_short_name']

From 664e313e6d51e166eda7a3f13d90554d6f4b03db Mon Sep 17 00:00:00 2001
From: sloosvel <saskia.loosveldt@bsc.es>
Date: Tue, 11 May 2021 14:31:40 +0200
Subject: [PATCH 18/20] Add all_years tag

---
 doc/recipe/overview.rst      |  8 ++++++--
 esmvalcore/_data_finder.py   | 14 +++++++++++++-
 esmvalcore/_recipe.py        |  4 ++++
 esmvalcore/_recipe_checks.py |  2 +-
 esmvalcore/recipe_schema.yml |  2 +-
 5 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/doc/recipe/overview.rst b/doc/recipe/overview.rst
index f7afc1a0ae..71cce9eab7 100644
--- a/doc/recipe/overview.rst
+++ b/doc/recipe/overview.rst
@@ -90,6 +90,8 @@ data specifications:
   128 instead of ``0128``.)
 - model grid (native grid ``grid: gn`` or regridded grid ``grid: gr``, for
   CMIP6 data only).
+- load all years (key-value ``all_years: True`` to load all the years available in 
+  a particular dataset)
 
 For example, a datasets section could be:
 
@@ -97,6 +99,7 @@ For example, a datasets section could be:
 
     datasets:
       - {dataset: CanESM2, project: CMIP5, exp: historical, ensemble: r1i1p1, start_year: 2001, end_year: 2004}
+      - {dataset: ACCESS1-0, project: CMIP5, exp: historical, ensemble: r1i1p1, all_years: True}
       - {dataset: UKESM1-0-LL, project: CMIP6, exp: historical, ensemble: r1i1p1f2, start_year: 2001, end_year: 2004, grid: gn}
       - {dataset: EC-EARTH3, alias: custom_alias, project: CMIP6, exp: historical, ensemble: r1i1p1f1, start_year: 2001, end_year: 2004, grid: gn}
       - {dataset: HadGEM3-GC31-MM, alias: custom_alias, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: s2000, grid: gn, start_year: 2000, end_year, 2002}
@@ -139,12 +142,13 @@ Please, bear in mind that this syntax can only be used in the ensemble tag.
 Also, note that the combination of multiple experiments and ensembles, like
 exp: [historical, rcp85], ensemble: [r1i1p1, "r(2:3)i1p1"] is not supported and will raise an error.
 
-The same simplified syntax can be used to add multiple sub-experiment ids:
+The same simplified syntax can be used to add multiple sub-experiment ids, as well as in combination with the ``all_years: True`` tag:
 
 .. code-block:: yaml
 
     datasets:
-      - {dataset: MIROC6, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: s(2000:2002), grid: gn, start_year: 2003, end_year: 2004}
+      - {dataset: MIROC6, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: "s(2000:2002)", grid: gn, start_year: 2003, end_year: 2004}
+      - {dataset: MIROC6, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: "s(1980:1990)", grid: gn, all_years: True}
 
 
 Note that this section is not required, as datasets can also be provided in the
diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py
index 8c700b2210..b2022e30b2 100644
--- a/esmvalcore/_data_finder.py
+++ b/esmvalcore/_data_finder.py
@@ -236,6 +236,16 @@ def _get_filenames_glob(variable, drs):
     return filenames_glob
 
 
+def _update_output_file(variable, files):
+    intervals = [get_start_end_year(name) for name in files]
+    variable.update({'start_year': min(intervals)[0]})
+    variable.update({'end_year': max(intervals)[1]})
+    filename = variable['filename'].replace(
+        '.nc', '_{start_year}-{end_year}.nc'.format(**variable)
+    )
+    variable['filename'] = filename
+    return variable
+
 def _find_input_files(variable, rootpath, drs):
     short_name = variable['short_name']
     variable['short_name'] = variable['original_short_name']
@@ -255,6 +265,8 @@ def get_input_filelist(variable, rootpath, drs):
     (files, dirnames, filenames) = _find_input_files(variable, rootpath, drs)
     # do time gating only for non-fx variables
     if variable['frequency'] != 'fx':
+        if 'all_years' in variable:
+            variable = _update_output_file(variable, files)
         files = select_files(files, variable['start_year'],
                              variable['end_year'])
     return (files, dirnames, filenames)
@@ -275,7 +287,7 @@ def get_output_file(variable, preproc_dir):
         variable['variable_group'],
         _replace_tags(cfg['output_file'], variable)[0],
     )
-    if variable['frequency'] != 'fx':
+    if variable['frequency'] != 'fx' and 'all_years' not in variable:
         outfile += '_{start_year}-{end_year}'.format(**variable)
     outfile += '.nc'
     return outfile
diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py
index bf536c84cc..52e6a51ba2 100644
--- a/esmvalcore/_recipe.py
+++ b/esmvalcore/_recipe.py
@@ -1105,6 +1105,10 @@ def _initialize_variables(self, raw_variable, raw_datasets):
                 activity = get_activity(variable)
                 if activity:
                     variable['activity'] = activity
+            if 'all_years' in variable:
+                if variable['all_years']:
+                    required_keys.discard('start_year')
+                    required_keys.discard('end_year')
             if 'sub_experiment' in variable:
                 subexperiment_keys = deepcopy(required_keys)
                 subexperiment_keys.update({'sub_experiment'})
diff --git a/esmvalcore/_recipe_checks.py b/esmvalcore/_recipe_checks.py
index 21cff6a4a9..81a539e9cd 100644
--- a/esmvalcore/_recipe_checks.py
+++ b/esmvalcore/_recipe_checks.py
@@ -127,7 +127,7 @@ def data_availability(input_files, var, dirnames, filenames):
             f"Missing data for {var['alias']}: {var['short_name']}")
 
     # check time avail only for non-fx variables
-    if var['frequency'] == 'fx':
+    if var['frequency'] == 'fx' or 'all_years' in var:
         return
 
     required_years = set(range(var['start_year'], var['end_year'] + 1))
diff --git a/esmvalcore/recipe_schema.yml b/esmvalcore/recipe_schema.yml
index 266f8b9f63..186a4976ba 100644
--- a/esmvalcore/recipe_schema.yml
+++ b/esmvalcore/recipe_schema.yml
@@ -41,7 +41,7 @@ variable:
   alternative_dataset: str(required=False)
   fx_files: list(required=False)
   additional_datasets: list(include('dataset'), required=False)
-
+  all_years: bool(required=False)
 # TODO: add preprocessor item
 
 diagnostic:

From 3b01f8b50a9886ff1164fa6be5ceb64849cde223 Mon Sep 17 00:00:00 2001
From: sloosvel <saskia.loosveldt@bsc.es>
Date: Tue, 11 May 2021 14:32:06 +0200
Subject: [PATCH 19/20] Add_tests

---
 tests/integration/test_recipe.py | 54 ++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py
index 81e3ef7720..14c4d36b63 100644
--- a/tests/integration/test_recipe.py
+++ b/tests/integration/test_recipe.py
@@ -632,6 +632,60 @@ def test_empty_variable(tmp_path, patched_datafinder, config_user):
     assert product.attributes['dataset'] == 'CanESM2'
 
 
+def test_all_years_tag(tmp_path, patched_datafinder, config_user):
+    """Test all_years tag for time-dependent variables."""
+    content = dedent("""
+        diagnostics:
+          diagnostic_name:
+            additional_datasets:
+              - dataset: CanESM2
+                project: CMIP5
+                mip: Amon
+                exp: historical
+                all_years: True
+                ensemble: r1i1p1
+            variables:
+              pr:
+            scripts: null
+        """)
+
+    recipe = get_recipe(tmp_path, content, config_user)
+    assert len(recipe.tasks) == 1
+    task = recipe.tasks.pop()
+    assert len(task.products) == 1
+    product = task.products.pop()
+    assert product.attributes['short_name'] == 'pr'
+    assert product.attributes['dataset'] == 'CanESM2'
+    assert '1990-2019' in product.filename
+
+
+def test_fx_all_years_tag(tmp_path, patched_datafinder, config_user):
+    """Test all_years tag does not break time-independent variables."""
+    content = dedent("""
+        diagnostics:
+          diagnostic_name:
+            additional_datasets:
+              - dataset: CanESM2
+                project: CMIP5
+                mip: fx
+                exp: historical
+                all_years: True
+                ensemble: r1i1p1
+            variables:
+              areacella:
+            scripts: null
+        """)
+
+    recipe = get_recipe(tmp_path, content, config_user)
+    assert len(recipe.tasks) == 1
+    task = recipe.tasks.pop()
+    assert len(task.products) == 1
+    product = task.products.pop()
+    assert product.attributes['short_name'] == 'pr'
+    assert product.attributes['dataset'] == 'CanESM2'
+    assert '1990-2019' not in product.filename
+
+
 def test_cmip3_variable_autocomplete(tmp_path, patched_datafinder,
                                      config_user):
     """Test that required information is automatically added for CMIP5."""

From 0f0ddf503cc7798c84ccfd16c6e8dad3ea895d40 Mon Sep 17 00:00:00 2001
From: sloosvel <saskia.loosveldt@bsc.es>
Date: Tue, 11 May 2021 14:43:32 +0200
Subject: [PATCH 20/20] Fix tests

---
 esmvalcore/_data_finder.py       | 1 +
 tests/integration/test_recipe.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py
index b2022e30b2..ebd16b9973 100644
--- a/esmvalcore/_data_finder.py
+++ b/esmvalcore/_data_finder.py
@@ -246,6 +246,7 @@ def _update_output_file(variable, files):
     variable['filename'] = filename
     return variable
 
+
 def _find_input_files(variable, rootpath, drs):
     short_name = variable['short_name']
     variable['short_name'] = variable['original_short_name']
diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py
index 14c4d36b63..4efc58c042 100644
--- a/tests/integration/test_recipe.py
+++ b/tests/integration/test_recipe.py
@@ -681,7 +681,7 @@ def test_fx_all_years_tag(tmp_path, patched_datafinder, config_user):
     task = recipe.tasks.pop()
     assert len(task.products) == 1
     product = task.products.pop()
-    assert product.attributes['short_name'] == 'pr'
+    assert product.attributes['short_name'] == 'areacella'
     assert product.attributes['dataset'] == 'CanESM2'
     assert '1990-2019' not in product.filename