diff --git a/doc/api/esmvalcore.local.rst b/doc/api/esmvalcore.local.rst
new file mode 100644
index 0000000000..12326929fa
--- /dev/null
+++ b/doc/api/esmvalcore.local.rst
@@ -0,0 +1,5 @@
+Find files on the local filesystem
+==================================
+
+.. automodule:: esmvalcore.local
+    :no-inherited-members:
diff --git a/doc/api/esmvalcore.rst b/doc/api/esmvalcore.rst
index 23787500da..fe61d3bfbc 100644
--- a/doc/api/esmvalcore.rst
+++ b/doc/api/esmvalcore.rst
@@ -14,5 +14,7 @@ library. This section documents the public API of ESMValCore.
    esmvalcore.esgf
    esmvalcore.exceptions
    esmvalcore.iris_helpers
+   esmvalcore.local
    esmvalcore.preprocessor
+   esmvalcore.typing
    esmvalcore.experimental
diff --git a/doc/api/esmvalcore.typing.rst b/doc/api/esmvalcore.typing.rst
new file mode 100644
index 0000000000..f0f45e7469
--- /dev/null
+++ b/doc/api/esmvalcore.typing.rst
@@ -0,0 +1,6 @@
+Type hints
+==========
+
+.. automodule:: esmvalcore.typing
+    :no-inherited-members:
+    :no-special-members:
diff --git a/doc/develop/fixing_data.rst b/doc/develop/fixing_data.rst
index d097327e65..8902fb5dbf 100644
--- a/doc/develop/fixing_data.rst
+++ b/doc/develop/fixing_data.rst
@@ -377,7 +377,7 @@ To allow ESMValCore to locate the data files, use the following steps:
         native6:
           ...
           input_dir:
-            default: 'Tier{tier}/{dataset}/{latestversion}/{frequency}/{short_name}'
+            default: 'Tier{tier}/{dataset}/{version}/{frequency}/{short_name}'
             MY_DATA_ORG: '{dataset}/{exp}/{simulation}/{version}/{type}'
           input_file:
             default: '*.nc'
diff --git a/doc/quickstart/configure.rst b/doc/quickstart/configure.rst
index 810e7484c8..5eaac1b8d2 100644
--- a/doc/quickstart/configure.rst
+++ b/doc/quickstart/configure.rst
@@ -438,8 +438,8 @@ Example of the CMIP6 project configuration:
    CMIP6:
      input_dir:
        default: '/'
-       BADC: '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{latestversion}'
-       DKRZ: '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{latestversion}'
+       BADC: '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{version}'
+       DKRZ: '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{version}'
        ETHZ: '{exp}/{mip}/{short_name}/{dataset}/{ensemble}/{grid}/'
      input_file: '{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc'
      output_file: '{project}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}'
@@ -462,7 +462,7 @@ at each site. As an example, the CMIP6 directory path on BADC would be:
 
 .. code-block:: yaml
 
-   '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{latestversion}'
+   '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{version}'
 
 The resulting directory path would look something like this:
 
@@ -475,8 +475,8 @@ which may be needed:
 
 .. code-block:: yaml
 
-  - '{exp}/{ensemble}/original/{mip}/{short_name}/{grid}/{latestversion}'
-  - '{exp}/{ensemble}/computed/{mip}/{short_name}/{grid}/{latestversion}'
+  - '{exp}/{ensemble}/original/{mip}/{short_name}/{grid}/{version}'
+  - '{exp}/{ensemble}/computed/{mip}/{short_name}/{grid}/{version}'
 
 In that case, the resultant directories will be:
 
@@ -629,7 +629,7 @@ Example:
    native6:
      cmor_strict: false
      input_dir:
-       default: 'Tier{tier}/{dataset}/{latestversion}/{frequency}/{short_name}'
+       default: 'Tier{tier}/{dataset}/{version}/{frequency}/{short_name}'
      input_file:
        default: '*.nc'
      output_file: '{project}_{dataset}_{type}_{version}_{mip}_{short_name}'
diff --git a/doc/quickstart/find_data.rst b/doc/quickstart/find_data.rst
index 347b5265e3..6802427ba6 100644
--- a/doc/quickstart/find_data.rst
+++ b/doc/quickstart/find_data.rst
@@ -33,16 +33,16 @@ ensures that files and paths to them are named according to a
 standardized convention. Examples of this convention, also used by
 ESMValTool for file discovery and data retrieval, include:
 
-* CMIP6 file: ``[variable_short_name]_[mip]_[dataset_name]_[experiment]_[ensemble]_[grid]_[start-date]-[end-date].nc``
-* CMIP5 file: ``[variable_short_name]_[mip]_[dataset_name]_[experiment]_[ensemble]_[start-date]-[end-date].nc``
-* OBS file: ``[project]_[dataset_name]_[type]_[version]_[mip]_[short_name]_[start-date]-[end-date].nc``
+* CMIP6 file: ``{variable_short_name}_{mip}_{dataset_name}_{experiment}_{ensemble}_{grid}_{start-date}-{end-date}.nc``
+* CMIP5 file: ``{variable_short_name}_{mip}_{dataset_name}_{experiment}_{ensemble}_{start-date}-{end-date}.nc``
+* OBS file: ``{project}_{dataset_name}_{type}_{version}_{mip}_{short_name}_{start-date}-{end-date}.nc``
 
 Similar standards exist for the standard paths (input directories); for the
 ESGF data nodes, these paths differ slightly, for example:
 
-* CMIP6 path for BADC: ``ROOT-BADC/[institute]/[dataset_name]/[experiment]/[ensemble]/[mip]/
-  [variable_short_name]/[grid]``;
-* CMIP6 path for ETHZ: ``ROOT-ETHZ/[experiment]/[mip]/[variable_short_name]/[dataset_name]/[ensemble]/[grid]``
+* CMIP6 path for BADC: ``ROOT-BADC/{institute}/{dataset_name}/{experiment}/{ensemble}/{mip}/
+  {variable_short_name}/{grid}``;
+* CMIP6 path for ETHZ: ``ROOT-ETHZ/{experiment}/{mip}/{variable_short_name}/{dataset_name}/{ensemble}/{grid}``
 
 From the ESMValTool user perspective the number of data input parameters is
 optimized to allow for ease of use. We detail this procedure in the next
@@ -130,7 +130,7 @@ MSWEP
 - Supported frequencies: ``mon``, ``day``, ``3hr``.
 - Tier: 3
 
-For example for monthly data, place the files in the ``/Tier3/MSWEP/latestversion/mon/pr`` subdirectory of your ``native6`` project location.
+For example for monthly data, place the files in the ``/Tier3/MSWEP/version/mon/pr`` subdirectory of your ``native6`` project location.
 
 .. note::
   For monthly data (``V220``), the data must be postfixed with the date, i.e. rename ``global_monthly_050deg.nc`` to ``global_monthly_050deg_197901-201710.nc``
@@ -168,9 +168,9 @@ The default naming conventions for input directories and files for CESM are
 
 * input directories: 3 different types supported:
    * ``/`` (run directory)
-   * ``[case]/[gcomp]/hist`` (short-term archiving)
-   * ``[case]/[gcomp]/proc/[tdir]/[tperiod]`` (post-processed data)
-* input files: ``[case].[scomp].[type].[string]*nc``
+   * ``{case}/{gcomp}/hist`` (short-term archiving)
+   * ``{case}/{gcomp}/proc/{tdir}/{tperiod}`` (post-processed data)
+* input files: ``{case}.{scomp}.{type}.{string}*nc``
 
 as configured in the :ref:`config-developer file <config-developer>` (using the
 default DRS ``drs: default`` in the :ref:`user configuration file`).
@@ -179,12 +179,12 @@ More information about CESM naming conventions are given `here
 
 .. note::
 
-   The ``[string]`` entry in the input file names above does not only
+   The ``{string}`` entry in the input file names above does not only
    correspond to the (optional) ``$string`` entry for `CESM model output files
    <https://www.cesm.ucar.edu/models/cesm2/naming_conventions.html#modelOutputFilenames>`__,
    but can also be used to read `post-processed files
    <https://www.cesm.ucar.edu/models/cesm2/naming_conventions.html#ppDataFilenames>`__.
-   In the latter case, ``[string]`` corresponds to the combination
+   In the latter case, ``{string}`` corresponds to the combination
    ``$SSTRING.$TSTRING``.
 
 Thus, example dataset entries could look like this:
@@ -244,8 +244,8 @@ model output.
 
 The default naming conventions for input directories and files for EMAC are
 
-* input directories: ``[exp]/[channel]``
-* input files: ``[exp]*[channel][postproc_flag].nc``
+* input directories: ``{exp}/{channel}``
+* input files: ``{exp}*{channel}{postproc_flag}.nc``
 
 as configured in the :ref:`config-developer file <config-developer>` (using the
 default DRS ``drs: default`` in the :ref:`user configuration file`).
@@ -313,8 +313,8 @@ ESMValTool is able to read native `ICON
 
 The default naming conventions for input directories and files for ICON are
 
-* input directories: ``[exp]`` or ``{exp}/outdata``
-* input files: ``[exp]_[var_type]*.nc``
+* input directories: ``{exp}`` or ``{exp}/outdata``
+* input files: ``{exp}_{var_type}*.nc``
 
 as configured in the :ref:`config-developer file <config-developer>` (using the
 default DRS ``drs: default`` in the :ref:`user configuration file`).
@@ -478,11 +478,11 @@ type of root paths they need the data from, e.g.:
 will tell the tool that the user needs data from a repository structured
 according to the BADC DRS structure, i.e.:
 
-``ROOT/[institute]/[dataset_name]/[experiment]/[ensemble]/[mip]/[variable_short_name]/[grid]``;
+``ROOT/{institute}/{dataset_name}/{experiment}/{ensemble}/{mip}/{variable_short_name}/{grid}``;
 
 setting the ``ROOT`` parameter is explained below. This is a
 strictly-structured repository tree and if there are any sort of irregularities
-(e.g. there is no ``[mip]`` directory) the data will not be found! ``BADC`` can
+(e.g. there is no ``{mip}`` directory) the data will not be found! ``BADC`` can
 be replaced with ``DKRZ`` or ``ETHZ`` depending on the existing ``ROOT``
 directory structure.
 The snippet
@@ -561,7 +561,7 @@ datasets are listed in any recipe, under either the ``datasets`` and/or
     - {dataset: HadGEM2-CC, project: CMIP5, exp: historical, ensemble: r1i1p1, start_year: 2001, end_year: 2004}
     - {dataset: UKESM1-0-LL, project: CMIP6, exp: historical, ensemble: r1i1p1f2, grid: gn, start_year: 2004, end_year: 2014}
 
-``_data_finder`` will use this information to find data for **all** the variables specified in ``diagnostics/variables``.
+The data finding feature will use this information to find data for **all** the variables specified in ``diagnostics/variables``.
 
 Recap and example
 =================
diff --git a/esmvalcore/_provenance.py b/esmvalcore/_provenance.py
index 2aa1ff1860..193a823c56 100644
--- a/esmvalcore/_provenance.py
+++ b/esmvalcore/_provenance.py
@@ -194,7 +194,7 @@ def _initialize_entity(self):
             for k, v in self.attributes.items()
             if k not in ('authors', 'projects')
         }
-        self.entity = self.provenance.entity('file:' + self.filename,
+        self.entity = self.provenance.entity(f'file:{self.filename}',
                                              attributes)
 
         attribute_to_authors(self.entity, self.attributes.get('authors', []))
diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py
index 66c1285c99..86411a3e8a 100644
--- a/esmvalcore/_recipe.py
+++ b/esmvalcore/_recipe.py
@@ -16,17 +16,6 @@
 from . import __version__
 from . import _recipe_checks as check
 from . import esgf
-from ._data_finder import (
-    _find_input_files,
-    _get_timerange_from_years,
-    _parse_period,
-    _truncate_dates,
-    dates_to_timerange,
-    get_input_filelist,
-    get_multiproduct_filename,
-    get_output_file,
-    get_start_end_date,
-)
 from ._provenance import TrackedFile, get_recipe_provenance
 from ._task import DiagnosticTask, ResumeTask, TaskSet
 from .cmor.check import CheckLevels
@@ -39,6 +28,16 @@
 )
 from .config._diagnostics import TAGS
 from .exceptions import InputFilesNotFound, RecipeError
+from .local import _dates_to_timerange as dates_to_timerange
+from .local import _get_multiproduct_filename as get_multiproduct_filename
+from .local import _get_output_file as get_output_file
+from .local import _get_start_end_date as get_start_end_date
+from .local import (
+    _get_timerange_from_years,
+    _parse_period,
+    _truncate_dates,
+    find_files,
+)
 from .preprocessor import (
     DEFAULT_ORDER,
     FINAL_STEPS,
@@ -225,7 +224,7 @@ def _augment(base, update):
 
 def _dataset_to_file(variable, config_user):
     """Find the first file belonging to dataset from variable info."""
-    (files, dirnames, filenames) = _get_input_files(variable, config_user)
+    (files, globs) = _get_input_files(variable, config_user)
     if not files and variable.get('derive'):
         required_vars = get_required(variable['short_name'],
                                      variable['project'])
@@ -233,12 +232,11 @@ def _dataset_to_file(variable, config_user):
             _augment(required_var, variable)
             _add_cmor_info(required_var, override=True)
             _add_extra_facets(required_var, config_user['extra_facets_dir'])
-            (files, dirnames,
-             filenames) = _get_input_files(required_var, config_user)
+            (files, globs) = _get_input_files(required_var, config_user)
             if files:
                 variable = required_var
                 break
-    check.data_availability(files, variable, dirnames, filenames)
+    check.data_availability(files, variable, globs)
     return files[0]
 
 
@@ -584,10 +582,13 @@ def _get_input_files(variable, config_user):
 
         variable['start_year'] = start_year
         variable['end_year'] = end_year
-    (input_files, dirnames,
-     filenames) = get_input_filelist(variable=variable,
-                                     rootpath=config_user['rootpath'],
-                                     drs=config_user['drs'])
+
+    variable = dict(variable)
+    if variable['project'] == 'CMIP5' and variable['frequency'] == 'fx':
+        variable['ensemble'] = 'r0i0p0'
+    if variable['frequency'] == 'fx':
+        variable.pop('timerange', None)
+    input_files, globs = find_files(debug=True, **variable)
 
     # Set up downloading from ESGF if requested.
     if (not config_user['offline']
@@ -596,8 +597,7 @@ def _get_input_files(variable, config_user):
             check.data_availability(
                 input_files,
                 variable,
-                dirnames,
-                filenames,
+                globs,
                 log=False,
             )
         except RecipeError:
@@ -611,15 +611,14 @@ def _get_input_files(variable, config_user):
                         DOWNLOAD_FILES.add(file)
                     input_files.append(str(local_copy))
 
-            dirnames.append('ESGF:')
+            globs.append('ESGF')
 
-    return (input_files, dirnames, filenames)
+    return (input_files, globs)
 
 
 def _get_ancestors(variable, config_user):
     """Get the input files for a single dataset and setup provenance."""
-    (input_files, dirnames,
-     filenames) = _get_input_files(variable, config_user)
+    (input_files, globs) = _get_input_files(variable, config_user)
 
     logger.debug(
         "Using input files for variable %s of dataset %s:\n%s",
@@ -629,7 +628,7 @@ def _get_ancestors(variable, config_user):
             f'{f} (will be downloaded)' if not os.path.exists(f) else str(f)
             for f in input_files),
     )
-    check.data_availability(input_files, variable, dirnames, filenames)
+    check.data_availability(input_files, variable, globs)
     logger.info("Found input files for %s",
                 variable['alias'].replace('_', ' '))
 
@@ -836,11 +835,10 @@ def _update_timerange(variable, config_user):
     check.valid_time_selection(timerange)
 
     if '*' in timerange:
-        (files, _, _) = _find_input_files(
-            variable, config_user['rootpath'], config_user['drs'])
+        facets = deepcopy(variable)
+        facets.pop('timerange', None)
+        files = find_files(**facets)
         if not files and not config_user.get('offline', True):
-            facets = deepcopy(variable)
-            facets.pop('timerange', None)
             files = [file.name for file in esgf.find_files(**facets)]
 
         if not files:
@@ -928,6 +926,8 @@ def _get_preprocessor_products(variables, profile, order, ancestor_products,
     preproc_dir = config_user['preproc_dir']
 
     for variable in variables:
+        if variable['frequency'] == 'fx':
+            variable.pop('timerange', None)
         _update_timerange(variable, config_user)
         variable['filename'] = get_output_file(variable,
                                                config_user['preproc_dir'])
@@ -1094,7 +1094,7 @@ def _get_single_preprocessor_task(variables,
 
     logger.info("PreprocessingTask %s created.", task.name)
     logger.debug("PreprocessingTask %s will create the files:\n%s", task.name,
-                 '\n'.join(p.filename for p in task.products))
+                 '\n'.join(str(p.filename) for p in task.products))
 
     return task
 
diff --git a/esmvalcore/_recipe_checks.py b/esmvalcore/_recipe_checks.py
index a58dfd7e75..948319544b 100644
--- a/esmvalcore/_recipe_checks.py
+++ b/esmvalcore/_recipe_checks.py
@@ -1,5 +1,4 @@
 """Module with functions to check a recipe."""
-import itertools
 import logging
 import os
 import re
@@ -10,8 +9,8 @@
 import isodate
 import yamale
 
-from ._data_finder import get_start_end_year
 from .exceptions import InputFilesNotFound, RecipeError
+from .local import _get_start_end_year
 from .preprocessor import TIME_PREPROCESSORS, PreprocessingTask
 from .preprocessor._multimodel import STATISTIC_MAPPING
 
@@ -94,28 +93,18 @@ def variable(var, required_keys):
                 missing, var.get('short_name'), var.get('diagnostic')))
 
 
-def _log_data_availability_errors(input_files, var, dirnames, filenames):
+def _log_data_availability_errors(input_files, var, patterns):
     """Check if the required input data is available."""
     var = dict(var)
     if not input_files:
         var.pop('filename', None)
         logger.error("No input files found for variable %s", var)
-        if dirnames and filenames:
-            patterns = itertools.product(dirnames, filenames)
-            patterns = [os.path.join(d, f) for (d, f) in patterns]
+        if patterns:
             if len(patterns) == 1:
                 msg = f': {patterns[0]}'
             else:
-                msg = '\n{}'.format('\n'.join(patterns))
+                msg = '\n{}'.format('\n'.join(str(p) for p in patterns))
             logger.error("Looked for files matching%s", msg)
-        elif dirnames and not filenames:
-            logger.error(
-                "Looked for files in %s, but did not find any file pattern "
-                "to match against", dirnames)
-        elif filenames and not dirnames:
-            logger.error(
-                "Looked for files matching %s, but did not find any existing "
-                "input directory", filenames)
         logger.error("Set 'log_level' to 'debug' to get more information")
 
 
@@ -145,10 +134,10 @@ def _group_years(years):
     return ", ".join(ranges)
 
 
-def data_availability(input_files, var, dirnames, filenames, log=True):
+def data_availability(input_files, var, patterns, log=True):
     """Check if input_files cover the required years."""
     if log:
-        _log_data_availability_errors(input_files, var, dirnames, filenames)
+        _log_data_availability_errors(input_files, var, patterns)
 
     if not input_files:
         raise InputFilesNotFound(
@@ -163,7 +152,7 @@ def data_availability(input_files, var, dirnames, filenames, log=True):
     available_years = set()
 
     for filename in input_files:
-        start, end = get_start_end_year(filename)
+        start, end = _get_start_end_year(filename)
         available_years.update(range(start, end + 1))
 
     missing_years = required_years - available_years
diff --git a/esmvalcore/cmor/_fixes/ipslcm/ipsl_cm6.py b/esmvalcore/cmor/_fixes/ipslcm/ipsl_cm6.py
index dd978b33af..d590daf7af 100644
--- a/esmvalcore/cmor/_fixes/ipslcm/ipsl_cm6.py
+++ b/esmvalcore/cmor/_fixes/ipslcm/ipsl_cm6.py
@@ -31,8 +31,8 @@ def fix_file(self, filepath, output_dir):
         However, we take care of ESMValTool policy re. dependencies licence
 
         """
-        if "_" + self.extra_facets.get("group",
-                                       "non-sense") + ".nc" not in filepath:
+        if "_" + self.extra_facets.get(
+                "group", "non-sense") + ".nc" not in str(filepath):
             # No need to filter the file
             logger.debug("Not filtering for %s", filepath)
             return filepath
@@ -47,11 +47,11 @@ def fix_file(self, filepath, output_dir):
 
         # Proceed with CDO selvar
         varname = self.extra_facets.get(VARNAME_KEY, self.vardef.short_name)
-        alt_filepath = filepath.replace(".nc", "_cdo_selected.nc")
+        alt_filepath = str(filepath).replace(".nc", "_cdo_selected.nc")
         outfile = self.get_fixed_filepath(output_dir, alt_filepath)
         tim1 = time.time()
         logger.debug("Using CDO for selecting %s in %s", varname, filepath)
-        command = ["cdo", "-selvar,%s" % varname, filepath, outfile]
+        command = ["cdo", "-selvar,%s" % varname, str(filepath), outfile]
         subprocess.run(command, check=True)
         logger.debug("CDO selection done in %.2f seconds", time.time() - tim1)
         return outfile
diff --git a/esmvalcore/config-developer.yml b/esmvalcore/config-developer.yml
index aec3c3df52..eaf00d67de 100644
--- a/esmvalcore/config-developer.yml
+++ b/esmvalcore/config-developer.yml
@@ -31,11 +31,11 @@ CMIP6:
   cmor_strict: true
   input_dir:
     default: '/'
-    BADC: '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{latestversion}'
-    DKRZ: '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{latestversion}'
-    ESGF: '{project}/{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{latestversion}'
+    BADC: '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{version}'
+    DKRZ: '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{version}'
+    ESGF: '{project}/{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{version}'
     ETHZ: '{exp}/{mip}/{short_name}/{dataset}/{ensemble}/{grid}/'
-    SYNDA: '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{latestversion}'
+    SYNDA: '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{version}'
   input_file: '{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc'
   output_file: '{project}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}_{grid}'
   cmor_type: 'CMIP6'
@@ -44,15 +44,15 @@ CMIP5:
   cmor_strict: true
   input_dir:
     default: '/'
-    BADC: '{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{mip}/{ensemble}/{latestversion}/{short_name}'
+    BADC: '{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{mip}/{ensemble}/{version}/{short_name}'
     BSC: '{type}/{project}/{exp}/{dataset.lower}'
     CP4CDS: '{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{mip}/{ensemble}/{short_name}/latest/'
-    DKRZ: '{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{mip}/{ensemble}/{latestversion}/{short_name}'
+    DKRZ: '{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{mip}/{ensemble}/{version}/{short_name}'
     ETHZ: '{exp}/{mip}/{short_name}/{dataset}/{ensemble}/'
-    ESGF: '{project.lower}/{product}/{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{mip}/{ensemble}/{latestversion}'
+    ESGF: '{project.lower}/{product}/{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{mip}/{ensemble}/{version}'
     RCAST: '{exp}/{mip}/{short_name}/{dataset}/{ensemble}/'
     SMHI: '{dataset}/{ensemble}/{exp}/{frequency}'
-    SYNDA: '{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{mip}/{ensemble}/{latestversion}'
+    SYNDA: '{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{mip}/{ensemble}/{version}'
   input_file: '{short_name}_{mip}_{dataset}_{exp}_{ensemble}*.nc'
   output_file: '{project}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}'
 
@@ -60,9 +60,9 @@ CMIP3:
   cmor_strict: true
   input_dir:
     default: '/'
-    BADC: '{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{short_name}/{ensemble}/{latestversion}'
+    BADC: '{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{short_name}/{ensemble}/{version}'
     DKRZ: '{exp}/{modeling_realm}/{frequency}/{short_name}/{dataset}/{ensemble}'
-    ESGF: '{project.lower}/{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{ensemble}/{short_name}/{latestversion}'
+    ESGF: '{project.lower}/{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{ensemble}/{short_name}/{version}'
     IPSL: '{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{ensemble}/{short_name}/{version}/{short_name}'
   input_file: '{short_name}_*.nc'
   output_file: '{project}_{institute}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}'
@@ -95,7 +95,7 @@ OBS6:
 native6:
   cmor_strict: false
   input_dir:
-    default: 'Tier{tier}/{dataset}/{latestversion}/{frequency}/{short_name}'
+    default: 'Tier{tier}/{dataset}/{version}/{frequency}/{short_name}'
   input_file:
     default: '*.nc'
   output_file: '{project}_{dataset}_{type}_{version}_{mip}_{short_name}'
@@ -106,7 +106,7 @@ obs4MIPs:
   cmor_strict: false
   input_dir:
     default: 'Tier{tier}/{dataset}'
-    ESGF: '{project}/{dataset}/{latestversion}'
+    ESGF: '{project}/{dataset}/{version}'
     RCAST: '/'
     IPSL: '{realm}/{short_name}/{freq}/{grid}/{institute}/{dataset}/{latest_version}'
   input_file:
@@ -146,8 +146,8 @@ CORDEX:
   input_dir:
     default: '/'
     spec: '{domain}/{institute}/{driver}/{exp}/{ensemble}/{dataset}/{rcm_version}/{mip}/{short_name}'
-    BADC: '{domain}/{institute}/{driver}/{exp}/{ensemble}/{dataset}/{rcm_version}/{mip}/{short_name}/{latestversion}'
-    ESGF: '{project.lower}/output/{domain}/{institute}/{driver}/{exp}/{ensemble}/{dataset}/{rcm_version}/{frequency}/{short_name}/{latestversion}'
+    BADC: '{domain}/{institute}/{driver}/{exp}/{ensemble}/{dataset}/{rcm_version}/{mip}/{short_name}/{version}'
+    ESGF: '{project.lower}/output/{domain}/{institute}/{driver}/{exp}/{ensemble}/{dataset}/{rcm_version}/{frequency}/{short_name}/{version}'
   input_file: '{short_name}_{domain}_{driver}_{exp}_{ensemble}_{dataset}_{rcm_version}_{mip}*.nc'
   output_file: '{project}_{dataset}_{rcm_version}_{driver}_{domain}_{mip}_{exp}_{ensemble}_{short_name}'
   cmor_type: 'CMIP5'
diff --git a/esmvalcore/config/_config.py b/esmvalcore/config/_config.py
index ae7441197d..595a4ded00 100644
--- a/esmvalcore/config/_config.py
+++ b/esmvalcore/config/_config.py
@@ -97,7 +97,15 @@ def load_config_developer(cfg_file):
         cfg['obs4MIPs'] = cfg.pop('obs4mips')
 
     for project, settings in cfg.items():
+        for site, drs in settings['input_dir'].items():
+            # Since v2.8, 'version' can be used instead of 'latestversion'
+            if isinstance(drs, list):
+                drs = [d.replace('{latestversion}', '{version}') for d in drs]
+            else:
+                drs = drs.replace('{latestversion}', '{version}')
+            settings['input_dir'][site] = drs
         CFG[project] = settings
+
     read_cmor_tables(cfg_file)
 
 
diff --git a/esmvalcore/esgf/_download.py b/esmvalcore/esgf/_download.py
index 6749c59aaa..ce4a0e8fc3 100644
--- a/esmvalcore/esgf/_download.py
+++ b/esmvalcore/esgf/_download.py
@@ -19,6 +19,7 @@
 import yaml
 from humanfriendly import format_size, format_timespan
 
+from ..local import LocalFile
 from ._logon import get_credentials
 from .facets import DATASET_MAP, FACETS
 
@@ -337,14 +338,16 @@ def local_file(self, dest_folder):
 
         Returns
         -------
-        Path
+        LocalFile
             The path where the file will be located after download.
         """
-        return Path(
+        file = LocalFile(
             dest_folder,
             *self.dataset.split('.'),
             self.name,
         ).absolute()
+        file.facets = self.facets
+        return file
 
     def download(self, dest_folder):
         """Download the file.
@@ -361,7 +364,7 @@ def download(self, dest_folder):
 
         Returns
         -------
-        Path
+        LocalFile
             The path where the file will be located after download.
         """
         local_file = self.local_file(dest_folder)
diff --git a/esmvalcore/esgf/_search.py b/esmvalcore/esgf/_search.py
index 7e3949596f..decbbc90fb 100644
--- a/esmvalcore/esgf/_search.py
+++ b/esmvalcore/esgf/_search.py
@@ -6,13 +6,13 @@
 import pyesgf.search
 import requests.exceptions
 
-from .._data_finder import (
+from ..config._esgf_pyclient import get_esgf_config
+from ..local import (
+    _get_start_end_date,
     _get_timerange_from_years,
     _parse_period,
     _truncate_dates,
-    get_start_end_date,
 )
-from ..config._esgf_pyclient import get_esgf_config
 from ._download import ESGFFile
 from .facets import DATASET_MAP, FACETS
 
@@ -169,7 +169,7 @@ def select_by_time(files, timerange):
     for file in files:
         start_date, end_date = _parse_period(timerange)
         try:
-            start, end = get_start_end_date(file.name)
+            start, end = _get_start_end_date(file.name)
         except ValueError:
             # If start and end year cannot be read from the filename
             # just select everything.
@@ -195,10 +195,21 @@ def find_files(*, project, short_name, dataset, **facets):
     dataset : str
         The name of the dataset.
     **facets : typing.Union[str, list[str]]
-        Any other search facets. The special value ``'*'`` will match anything.
-        If no ``version`` facet is specified, the function returns only the
-        latest version of each file, while other omitted facets will default
-        to ``'*'``.
+        Any other search facets. An ``'*'`` can be used to match
+        any value. By default, only the latest version of a file will
+        be returned. To select all versions use ``version='*'`` while other
+        omitted facets will default to ``'*'``. It is also
+        possible to specify multiple values for a facet, e.g.
+        ``exp=['historical', 'ssp585']`` will match any file that belongs
+        to either the historical or ssp585 experiment.
+        The ``timerange`` facet can be specified in `ISO 8601 format
+        <https://en.wikipedia.org/wiki/ISO_8601>`__.
+
+    Note
+    ----
+    A value of ``timerange='*'`` is supported, but combining a ``'*'`` with
+    a time or period :ref:`as supported in the recipe <datasets>` is currently
+    not supported and will return all found files.
 
     Examples
     --------
diff --git a/esmvalcore/_data_finder.py b/esmvalcore/local.py
similarity index 63%
rename from esmvalcore/_data_finder.py
rename to esmvalcore/local.py
index a2c4b90ff2..c9c3f1e5c4 100644
--- a/esmvalcore/_data_finder.py
+++ b/esmvalcore/local.py
@@ -1,34 +1,25 @@
-"""Data finder module for the ESMValTool."""
-import glob
+"""Find files on the local filesystem."""
+from __future__ import annotations
+
+import itertools
 import logging
 import os
 import re
+from glob import glob
 from pathlib import Path
+from typing import Any, Union
 
 import iris
 import isodate
 
+from .config import CFG
 from .config._config import get_project_config
 from .exceptions import RecipeError
+from .typing import Facets, FacetValue
 
 logger = logging.getLogger(__name__)
 
 
-def find_files(dirnames, filenames):
-    """Find files matching filenames in dirnames."""
-    logger.debug("Looking for files matching %s in %s", filenames, dirnames)
-
-    result = []
-    for dirname in dirnames:
-        for filename_pattern in filenames:
-            pat = os.path.join(dirname, filename_pattern)
-            files = glob.glob(pat)
-            files.sort()  # sorting makes it easier to see what was found
-            result.extend(files)
-
-    return result
-
-
 def _get_from_pattern(pattern, date_range_pattern, stem, group):
     """Get time, date or datetime from date range patterns in file names."""
     #
@@ -68,7 +59,7 @@ def _get_from_pattern(pattern, date_range_pattern, stem, group):
     return start_point, end_point
 
 
-def get_start_end_date(filename):
+def _get_start_end_date(filename):
     """Get the start and end dates as a string from a file name.
 
     Examples of allowed dates : 1980, 198001, 19801231,
@@ -118,13 +109,13 @@ def get_start_end_date(filename):
             break
 
     if start_date is None or end_date is None:
-        raise ValueError(f'File {filename} dates do not match a recognized'
+        raise ValueError(f'File {filename} dates do not match a recognized '
                          'pattern and time can not be read from the file')
 
     return start_date, end_date
 
 
-def dates_to_timerange(start_date, end_date):
+def _dates_to_timerange(start_date, end_date):
     """Convert ``start_date`` and ``end_date`` to ``timerange``.
 
     Note
@@ -162,16 +153,16 @@ def _get_timerange_from_years(variable):
     start_year = variable.get('start_year')
     end_year = variable.get('end_year')
     if start_year and end_year:
-        variable['timerange'] = dates_to_timerange(start_year, end_year)
+        variable['timerange'] = _dates_to_timerange(start_year, end_year)
     elif start_year:
-        variable['timerange'] = dates_to_timerange(start_year, start_year)
+        variable['timerange'] = _dates_to_timerange(start_year, start_year)
     elif end_year:
-        variable['timerange'] = dates_to_timerange(end_year, end_year)
+        variable['timerange'] = _dates_to_timerange(end_year, end_year)
     variable.pop('start_year', None)
     variable.pop('end_year', None)
 
 
-def get_start_end_year(filename):
+def _get_start_end_year(filename):
     """Get the start and end year from a file name.
 
     Examples of allowed dates : 1980, 198001, 19801231,
@@ -215,7 +206,7 @@ def get_start_end_year(filename):
             break
 
     if start_year is None or end_year is None:
-        raise ValueError(f'File {filename} dates do not match a recognized'
+        raise ValueError(f'File {filename} dates do not match a recognized '
                          'pattern and time can not be read from the file')
 
     return int(start_year), int(end_year)
@@ -293,7 +284,7 @@ def _truncate_dates(date, file_date):
     return int(date), int(file_date)
 
 
-def select_files(filenames, timerange):
+def _select_files(filenames, timerange):
     """Select files containing data between a given timerange.
 
     If the timerange is given as a period, the file selection
@@ -302,11 +293,15 @@ def select_files(filenames, timerange):
     Otherwise, the file selection occurs taking into account
     the time resolution of the file.
     """
+    if '*' in timerange:
+        # TODO: support * combined with a period
+        return filenames
+
     selection = []
 
     for filename in filenames:
         start_date, end_date = _parse_period(timerange)
-        start, end = get_start_end_date(filename)
+        start, end = _get_start_end_date(filename)
 
         start_date, start = _truncate_dates(start_date, start)
         end_date, end = _truncate_dates(end_date, end)
@@ -317,37 +312,40 @@ def select_files(filenames, timerange):
     return selection
 
 
-def _replace_tags(paths, variable):
+def _replace_tags(
+    paths: Union[str, list[str]],
+    variable: Facets,
+) -> list[Path]:
     """Replace tags in the config-developer's file with actual values."""
     if isinstance(paths, str):
-        paths = set((paths.strip('/'), ))
+        pathset = set((paths.strip('/'), ))
     else:
-        paths = set(path.strip('/') for path in paths)
-    tlist = set()
-    for path in paths:
+        pathset = set(path.strip('/') for path in paths)
+    tlist: set[str] = set()
+    for path in pathset:
         tlist = tlist.union(re.findall(r'{([^}]*)}', path))
     if 'sub_experiment' in variable:
-        new_paths = []
-        for path in paths:
-            new_paths.extend(
+        new_paths: set[str] = set()
+        for path in pathset:
+            new_paths.update(
                 (re.sub(r'(\b{ensemble}\b)', r'{sub_experiment}-\1', path),
                  re.sub(r'({ensemble})', r'{sub_experiment}-\1', path)))
             tlist.add('sub_experiment')
-        paths = new_paths
+        pathset = new_paths
 
     for tag in tlist:
         original_tag = tag
         tag, _, _ = _get_caps_options(tag)
 
-        if tag == 'latestversion':  # handled separately later
-            continue
         if tag in variable:
             replacewith = variable[tag]
+        elif tag == 'version':
+            replacewith = '*'
         else:
             raise RecipeError(f"Dataset key '{tag}' must be specified for "
                               f"{variable}, check your recipe entry")
-        paths = _replace_tag(paths, original_tag, replacewith)
-    return paths
+        pathset = _replace_tag(pathset, original_tag, replacewith)
+    return [Path(p) for p in pathset]
 
 
 def _replace_tag(paths, tag, replacewith):
@@ -383,37 +381,14 @@ def _apply_caps(original, lower, upper):
     return original
 
 
-def _resolve_latestversion(dirname_template):
-    """Resolve the 'latestversion' tag.
-
-    This implementation avoid globbing on centralized clusters with very
-    large data root dirs (i.e. ESGF nodes like Jasmin/DKRZ).
-    """
-    if '{latestversion}' not in dirname_template:
-        return dirname_template
-
-    # Find latest version
-    part1, part2 = dirname_template.split('{latestversion}')
-    part2 = part2.lstrip(os.sep)
-    if os.path.exists(part1):
-        versions = os.listdir(part1)
-        versions.sort(reverse=True)
-        for version in ['latest'] + versions:
-            dirname = os.path.join(part1, version, part2)
-            if os.path.isdir(dirname):
-                return dirname
-
-    return None
-
-
-def _select_drs(input_type, drs, project):
+def _select_drs(input_type, project):
     """Select the directory structure of input path."""
     cfg = get_project_config(project)
     input_path = cfg[input_type]
     if isinstance(input_path, str):
         return input_path
 
-    structure = drs.get(project, 'default')
+    structure = CFG['drs'].get(project, 'default')
     if structure in input_path:
         return input_path[structure]
 
@@ -422,87 +397,61 @@ def _select_drs(input_type, drs, project):
             structure, project))
 
 
-ROOTPATH_WARNED = set()
+_ROOTPATH_WARNED = set()
 
 
-def get_rootpath(rootpath, project):
+def _get_rootpath(project):
     """Select the rootpath."""
+    rootpath = CFG['rootpath']
     for key in (project, 'default'):
         if key in rootpath:
             nonexistent = tuple(p for p in rootpath[key]
                                 if not os.path.exists(p))
-            if nonexistent and (key, nonexistent) not in ROOTPATH_WARNED:
+            if nonexistent and (key, nonexistent) not in _ROOTPATH_WARNED:
                 logger.warning(
                     "'%s' rootpaths '%s' set in config-user.yml do not exist",
                     key, ', '.join(str(p) for p in nonexistent))
-                ROOTPATH_WARNED.add((key, nonexistent))
+                _ROOTPATH_WARNED.add((key, nonexistent))
             return rootpath[key]
     raise KeyError('default rootpath must be specified in config-user file')
 
 
-def _find_input_dirs(variable, rootpath, drs):
-    """Return a the full paths to input directories."""
+def _get_globs(variable):
+    """Compose the globs that will be used to look for files."""
     project = variable['project']
 
-    root = get_rootpath(rootpath, project)
-    path_template = _select_drs('input_dir', drs, project)
+    rootpaths = _get_rootpath(project)
 
-    dirnames = []
-    for dirname_template in _replace_tags(path_template, variable):
-        for base_path in root:
-            dirname = os.path.join(base_path, dirname_template)
-            dirname = _resolve_latestversion(dirname)
-            if dirname is None:
-                continue
-            matches = glob.glob(dirname)
-            matches = [match for match in matches if os.path.isdir(match)]
-            if matches:
-                for match in matches:
-                    dirnames.append(match)
-            else:
-                logger.debug("Skipping non-existent %s", dirname)
+    dirname_template = _select_drs('input_dir', project)
+    dirname_globs = _replace_tags(dirname_template, variable)
 
-    return dirnames
+    filename_template = _select_drs('input_file', project)
+    filename_globs = _replace_tags(filename_template, variable)
 
+    globs = sorted(r / d / f for r in rootpaths for d in dirname_globs
+                   for f in filename_globs)
+    return globs
 
-def _get_filenames_glob(variable, drs):
-    """Return patterns that can be used to look for input files."""
-    path_template = _select_drs('input_file', drs, variable['project'])
-    filenames_glob = _replace_tags(path_template, variable)
-    return filenames_glob
 
+def _get_input_filelist(variable):
+    """Return the full path to input files."""
+    variable = dict(variable)
+    if 'original_short_name' in variable:
+        variable['short_name'] = variable['original_short_name']
 
-def _find_input_files(variable, rootpath, drs):
-    """Find available input files.
+    globs = _get_globs(variable)
+    logger.debug("Looking for files matching %s", globs)
 
-    Return the files, the directory in which they are located in, and
-    the file name.
-    """
-    short_name = variable['short_name']
-    variable['short_name'] = variable['original_short_name']
-    input_dirs = _find_input_dirs(variable, rootpath, drs)
-    filenames_glob = _get_filenames_glob(variable, drs)
-    files = find_files(input_dirs, filenames_glob)
-    variable['short_name'] = short_name
-    return (files, input_dirs, filenames_glob)
+    files = list(Path(file) for glob_ in globs for file in glob(str(glob_)))
+    files.sort()  # sorting makes it easier to see what was found
 
+    if 'timerange' in variable:
+        files = _select_files(files, variable['timerange'])
 
-def get_input_filelist(variable, rootpath, drs):
-    """Return the full path to input files."""
-    # change ensemble to fixed r0i0p0 for fx variables
-    # this is needed and is not a duplicate effort
-    if variable['project'] == 'CMIP5' and variable['frequency'] == 'fx':
-        variable['ensemble'] = 'r0i0p0'
-    (files, dirnames, filenames) = _find_input_files(variable, rootpath, drs)
-    # do time gating only for non-fx variables
-    if variable['frequency'] != 'fx':
-        files = select_files(
-            files,
-            variable['timerange'])
-    return (files, dirnames, filenames)
-
-
-def get_output_file(variable, preproc_dir):
+    return files, globs
+
+
+def _get_output_file(variable: dict[str, Any], preproc_dir: Path) -> Path:
     """Return the full path to the output (preprocessed) file."""
     cfg = get_project_config(variable['project'])
 
@@ -510,22 +459,20 @@ def get_output_file(variable, preproc_dir):
     if isinstance(variable.get('exp'), (list, tuple)):
         variable = dict(variable)
         variable['exp'] = '-'.join(variable['exp'])
-
-    outfile = os.path.join(
+    outfile = _replace_tags(cfg['output_file'], variable)[0]
+    if 'timerange' in variable:
+        timerange = variable['timerange'].replace('/', '-')
+        outfile = Path(f'{outfile}_{timerange}')
+    outfile = Path(f"{outfile}.nc")
+    return Path(
         preproc_dir,
-        variable['diagnostic'],
-        variable['variable_group'],
-        _replace_tags(cfg['output_file'], variable)[0],
+        variable.get('diagnostic', ''),
+        variable.get('variable_group', ''),
+        outfile,
     )
-    if variable['frequency'] != 'fx':
-        timerange = variable['timerange'].replace('/', '-')
-        outfile += f'_{timerange}'
-
-    outfile += '.nc'
-    return outfile
 
 
-def get_multiproduct_filename(attributes, preproc_dir):
+def _get_multiproduct_filename(attributes: dict, preproc_dir: Path) -> Path:
     """Get ensemble/multi-model filename depending on settings."""
     relevant_keys = [
         'project', 'dataset', 'exp', 'ensemble_statistics',
@@ -547,7 +494,7 @@ def get_multiproduct_filename(attributes, preproc_dir):
     filename_segments.append(
         f"{attributes['timerange'].replace('/', '-')}.nc")
 
-    outfile = os.path.join(
+    outfile = Path(
         preproc_dir,
         attributes['diagnostic'],
         attributes['variable_group'],
@@ -555,3 +502,162 @@ def get_multiproduct_filename(attributes, preproc_dir):
     )
 
     return outfile
+
+
+def _path2facets(path: Path, drs: str) -> dict[str, str]:
+    """Extract facets from a path using a DRS like '{facet1}/{facet2}'."""
+    keys = []
+    for key in re.findall(r"{(.*?)}", drs):
+        key = key.split('.')[0]  # Remove trailing .lower and .upper
+        keys.append(key)
+    start, end = -len(keys) - 1, -1
+    values = path.parts[start:end]
+    facets = {key: values[idx] for idx, key in enumerate(keys)}
+    return facets
+
+
+def _filter_versions_called_latest(
+    files: list['LocalFile'],
+) -> list['LocalFile']:
+    """Filter out versions called 'latest' if they are duplicates.
+
+    On compute clusters it is usual to have a symbolic link to the
+    latest version called 'latest'. Those need to be skipped in order to
+    find valid version names and avoid duplicate results.
+    """
+    resolved_valid_versions = {
+        f.resolve(strict=False)
+        for f in files if f.facets.get('version') != 'latest'
+    }
+    return [
+        f for f in files if f.facets.get('version') != 'latest' or f.resolve(
+            strict=False) not in resolved_valid_versions
+    ]
+
+
+def _select_latest_version(files: list['LocalFile']) -> list['LocalFile']:
+    """Select only the latest version of files."""
+
+    def filename(file):
+        return file.name
+
+    def version(file):
+        return file.facets.get('version', '')
+
+    result = []
+    for _, group in itertools.groupby(sorted(files, key=filename),
+                                      key=filename):
+        duplicates = sorted(group, key=version)
+        latest = duplicates[-1]
+        result.append(latest)
+    return result
+
+
+def find_files(
+    *,
+    debug: bool = False,
+    **facets: FacetValue,
+) -> Union[list[LocalFile], tuple[list[LocalFile], list[Path]]]:
+    """Find files on the local filesystem.
+
+    The directories that are searched for files are defined in
+    :data:`esmvalcore.config.CFG` under the ``'rootpath'`` key using the
+    directory structure defined under the ``'drs'`` key.
+    If ``esmvalcore.config.CFG['rootpath']`` contains a key that matches the
+    value of the ``project`` facet, those paths will be used. If there is no
+    project specific key, the directories in
+    ``esmvalcore.config.CFG['rootpath']['default']`` will be searched.
+
+    See :ref:`findingdata` for extensive instructions on configuring ESMValCore
+    so it can find files locally.
+
+    Parameters
+    ----------
+    debug
+        When debug is set to :obj:`True`, the function will return a tuple
+        with the first element containing the files that were found
+        and the second element containing the :func:`glob.glob` patterns that
+        were used to search for files.
+    **facets
+        Facets used to search for files. An ``'*'`` can be used to match
+        any value. By default, only the latest version of a file will
+        be returned. To select all versions use ``version='*'``. It is also
+        possible to specify multiple values for a facet, e.g.
+        ``exp=['historical', 'ssp585']`` will match any file that belongs
+        to either the historical or ssp585 experiment.
+        The ``timerange`` facet can be specified in `ISO 8601 format
+        <https://en.wikipedia.org/wiki/ISO_8601>`__.
+
+    Note
+    ----
+    A value of ``timerange='*'`` is supported, but combining a ``'*'`` with
+    a time or period :ref:`as supported in the recipe <datasets>` is currently
+    not supported and will return all found files.
+
+    Examples
+    --------
+    Search for files containing surface air temperature from any CMIP6 model
+    for the historical experiment:
+
+    >>> esmvalcore.local.find_files(
+    ...     project='CMIP6',
+    ...     activity='CMIP',
+    ...     mip='Amon',
+    ...     short_name='tas',
+    ...     exp='historical',
+    ...     dataset='*',
+    ...     ensemble='*',
+    ...     grid='*',
+    ...     institute='*',
+    ... )  # doctest: +SKIP
+    [LocalFile('/home/bandela/climate_data/CMIP6/CMIP/BCC/BCC-ESM1/historical/r1i1p1f1/Amon/tas/gn/v20181214/tas_Amon_BCC-ESM1_historical_r1i1p1f1_gn_185001-201412.nc')]
+
+    Returns
+    -------
+    list[LocalFile]
+        The files that were found.
+    """  # pylint: disable=line-too-long
+    filenames, globs = _get_input_filelist(facets)
+    drs = _select_drs('input_dir', facets['project'])
+    if isinstance(drs, list):
+        # Not sure how to handle a list of DRSs
+        drs = ''
+    files = []
+    filter_latest = False
+    for filename in filenames:
+        file = LocalFile(filename)
+        file.facets.update(_path2facets(file, drs))
+        if file.facets.get('version') == 'latest':
+            filter_latest = True
+        files.append(file)
+
+    if filter_latest:
+        files = _filter_versions_called_latest(files)
+
+    if 'version' not in facets:
+        files = _select_latest_version(files)
+
+    if debug:
+        return files, globs
+    return files
+
+
+class LocalFile(type(Path())):  # type: ignore
+    """File on the local filesystem."""
+
+    @property
+    def facets(self) -> Facets:
+        """Facets describing the file.
+
+        Note
+        ----
+        When using :func:`find_files`, facets are read from the directory
+        structure. Facets stored in filenames are not yet supported.
+        """
+        if not hasattr(self, '_facets'):
+            self._facets: Facets = {}
+        return self._facets
+
+    @facets.setter
+    def facets(self, value: Facets):
+        self._facets = value
diff --git a/esmvalcore/preprocessor/__init__.py b/esmvalcore/preprocessor/__init__.py
index 8f806ecebe..c7ca5a8818 100644
--- a/esmvalcore/preprocessor/__init__.py
+++ b/esmvalcore/preprocessor/__init__.py
@@ -2,6 +2,7 @@
 import copy
 import inspect
 import logging
+from pathlib import Path
 from pprint import pformat
 
 from iris.cube import Cube
@@ -325,7 +326,7 @@ def _run_preproc_function(function, items, kwargs, input_files=None):
                         f"here; refer to the debug log for a full list)")
 
         # Make sure that the arguments are indexable
-        if isinstance(items, (PreprocessorFile, Cube, str)):
+        if isinstance(items, (PreprocessorFile, Cube, str, Path)):
             items = [items]
         if isinstance(items, set):
             items = list(items)
@@ -361,7 +362,7 @@ def preprocess(items, step, input_files=None, **settings):
 
     items = []
     for item in result:
-        if isinstance(item, (PreprocessorFile, Cube, str)):
+        if isinstance(item, (PreprocessorFile, Cube, str, Path)):
             items.append(item)
         else:
             items.extend(item)
diff --git a/esmvalcore/preprocessor/_ancillary_vars.py b/esmvalcore/preprocessor/_ancillary_vars.py
index 482f2c8803..dd77e6b946 100644
--- a/esmvalcore/preprocessor/_ancillary_vars.py
+++ b/esmvalcore/preprocessor/_ancillary_vars.py
@@ -1,6 +1,7 @@
 """Preprocessor functions for ancillary variables and cell measures."""
 
 import logging
+from pathlib import Path
 
 import dask.array as da
 import iris
@@ -158,7 +159,7 @@ def add_fx_variables(cube, fx_variables, check_level):
     for fx_info in fx_variables.values():
         if not fx_info:
             continue
-        if isinstance(fx_info['filename'], str):
+        if isinstance(fx_info['filename'], (str, Path)):
             fx_info['filename'] = [fx_info['filename']]
         fx_cube = _load_fx(cube, fx_info, check_level)
 
diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py
index fff3548539..3690863c48 100644
--- a/esmvalcore/preprocessor/_io.py
+++ b/esmvalcore/preprocessor/_io.py
@@ -135,6 +135,7 @@ def load(file, callback=None, ignore_warnings=None):
     ValueError
         Cubes are empty.
     """
+    file = str(file)
     logger.debug("Loading:\n%s", file)
     if ignore_warnings is None:
         ignore_warnings = []
diff --git a/esmvalcore/preprocessor/_regrid.py b/esmvalcore/preprocessor/_regrid.py
index eabbfd853e..e4edbce9fe 100644
--- a/esmvalcore/preprocessor/_regrid.py
+++ b/esmvalcore/preprocessor/_regrid.py
@@ -7,6 +7,7 @@
 import re
 from copy import deepcopy
 from decimal import Decimal
+from pathlib import Path
 from typing import Dict
 
 import iris
@@ -548,7 +549,7 @@ def regrid(cube, target_grid, scheme, lat_offset=True, lon_offset=True):
               reference: esmf_regrid.schemes:ESMFAreaWeighted
 
     """
-    if isinstance(target_grid, str):
+    if isinstance(target_grid, (str, Path)):
         if os.path.isfile(target_grid):
             target_grid = iris.load_cube(target_grid)
         else:
diff --git a/esmvalcore/typing.py b/esmvalcore/typing.py
new file mode 100644
index 0000000000..a217bf46ee
--- /dev/null
+++ b/esmvalcore/typing.py
@@ -0,0 +1,11 @@
+"""Type aliases for providing type hints."""
+from __future__ import annotations
+
+from numbers import Number
+from typing import Dict, Sequence, Union
+
+FacetValue = Union[str, Sequence[str], Number]
+"""Type describing a single facet."""
+
+Facets = Dict[str, FacetValue]
+"""Type describing a collection of facets."""
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 3bdd9a9c42..d40e0dbea0 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -1,9 +1,10 @@
 import os
+from pathlib import Path
 
 import iris
 import pytest
 
-from esmvalcore import _data_finder
+import esmvalcore.local
 from esmvalcore.config import CFG, _config
 from esmvalcore.config._config_object import CFG_DEFAULT
 
@@ -15,9 +16,9 @@ def session(tmp_path, monkeypatch):
     session.update(CFG_DEFAULT)
     session['output_dir'] = tmp_path / 'esmvaltool_output'
 
-    # The patched_data_finder fixture does not return the correct input
+    # The patched_datafinder fixture does not return the correct input
     # directory structure, so make sure it is set to flat for every project
-    session['drs'] = {}
+    monkeypatch.setitem(CFG, 'drs', {})
     for project in _config.CFG:
         monkeypatch.setitem(_config.CFG[project]['input_dir'], 'default', '/')
     return session
@@ -36,8 +37,8 @@ def create_test_file(filename, tracking_id=None):
     iris.save(cube, filename)
 
 
-def _get_filenames(root_path, filenames, tracking_id):
-    filename = filenames[0]
+def _get_filenames(root_path, filename, tracking_id):
+    filename = Path(filename).name
     filename = str(root_path / 'input' / filename)
     filenames = []
     if filename.endswith('[_.]*nc'):
@@ -71,14 +72,10 @@ def tracking_ids(i=0):
 
     tracking_id = tracking_ids()
 
-    def find_files(_, filenames):
-        # Any occurrence of [something] in filename should have
-        # been replaced before this function is called.
-        for filename in filenames:
-            assert '{' not in filename
-        return _get_filenames(tmp_path, filenames, tracking_id)
+    def glob(file_glob):
+        return _get_filenames(tmp_path, file_glob, tracking_id)
 
-    monkeypatch.setattr(_data_finder, 'find_files', find_files)
+    monkeypatch.setattr(esmvalcore.local, 'glob', glob)
 
 
 @pytest.fixture
@@ -91,22 +88,16 @@ def tracking_ids(i=0):
 
     tracking_id = tracking_ids()
 
-    def find_files(_, filenames):
-        # Any occurrence of [something] in filename should have
-        # been replaced before this function is called.
-        for filename in filenames:
-            assert '{' not in filename
-
+    def glob(filename):
         # Fail for specified fx variables
-        for filename in filenames:
-            if 'fx_' in filename:
-                return []
-            if 'sftlf' in filename:
-                return []
-            if 'IyrAnt_' in filename:
-                return []
-            if 'IyrGre_' in filename:
-                return []
-        return _get_filenames(tmp_path, filenames, tracking_id)
-
-    monkeypatch.setattr(_data_finder, 'find_files', find_files)
+        if 'fx_' in filename:
+            return []
+        if 'sftlf' in filename:
+            return []
+        if 'IyrAnt_' in filename:
+            return []
+        if 'IyrGre_' in filename:
+            return []
+        return _get_filenames(tmp_path, filename, tracking_id)
+
+    monkeypatch.setattr(esmvalcore.local, 'glob', glob)
diff --git a/tests/integration/data_finder.yml b/tests/integration/data_finder.yml
index 74b50980d5..7179014173 100644
--- a/tests/integration/data_finder.yml
+++ b/tests/integration/data_finder.yml
@@ -36,6 +36,7 @@ get_output_file:
       frequency: mon
       mip: Amon
       exp: amip
+      channel: Amon
       timerange: '1960/1980'
       diagnostic: test_diag
       preprocessor: test_preproc
@@ -70,6 +71,7 @@ get_output_file:
       frequency: mon
       mip: Amon
       exp: amip
+      var_type: atm_2d_ml
       timerange: '1960/1980'
       diagnostic: test_diag
       preprocessor: test_preproc
@@ -104,6 +106,8 @@ get_output_file:
       mip: Amon
       exp: amip
       case: f.e21.FHIST_BGC.f09_f09_mg17.CMIP6-AMIP.001_cosp1
+      gcomp: atm
+      scomp: cam
       type: h0
       timerange: '2000/2002'
       diagnostic: test_diag
@@ -275,7 +279,8 @@ get_input_filelist:
 
   - drs: default
     variable: *variable
-    dirs: null
+    dirs:
+      - ''
     file_patterns:
       - ta_Amon_HadGEM2-ES_historical_r1i1p1*.nc
     found_files: []
@@ -293,13 +298,48 @@ get_input_filelist:
       - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20120928/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc
       - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20120928/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc
     dirs:
-      - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20120928/ta
+      - INPE/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/*/ta
+      - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/*/ta
     file_patterns:
       - ta_Amon_HadGEM2-ES_historical_r1i1p1*.nc
     found_files:
       - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20120928/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc
       - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20120928/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc
 
+  - drs: BADC
+    variable:
+      <<: *variable
+      timerange: '2000/2005'
+      version: v20110329
+    available_files:
+      - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110329/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc
+      - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20120928/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc
+    dirs:
+      - INPE/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110329/ta
+      - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110329/ta
+    file_patterns:
+      - ta_Amon_HadGEM2-ES_historical_r1i1p1*.nc
+    found_files:
+      - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110329/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc
+
+  - drs: BADC
+    variable:
+      <<: *variable
+      ensemble: '*'
+      timerange: '2000/2005'
+    available_files:
+      - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110329/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc
+      - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r2i1p1/v20110329/ta/ta_Amon_HadGEM2-ES_historical_r2i1p1_198412-200511.nc
+      - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20120928/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc
+      - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r2i1p1/v20120928/ta/ta_Amon_HadGEM2-ES_historical_r2i1p1_198412-200511.nc
+    dirs:
+      - INPE/HadGEM2-ES/historical/mon/atmos/Amon/*/*/ta
+      - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/*/*/ta
+    file_patterns:
+      - ta_Amon_HadGEM2-ES_historical_**.nc
+    found_files:
+      - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20120928/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc
+      - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r2i1p1/v20120928/ta/ta_Amon_HadGEM2-ES_historical_r2i1p1_198412-200511.nc
 
   - drs: BADC
     variable:
@@ -316,12 +356,13 @@ get_input_filelist:
       - link_name: MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/latest
         target: v20120928
     dirs:
-      - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/latest/ta
+      - INPE/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/*/ta
+      - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/*/ta
     file_patterns:
       - ta_Amon_HadGEM2-ES_historical_r1i1p1*.nc
     found_files:
-      - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/latest/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc
-      - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/latest/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc
+      - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20120928/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc
+      - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20120928/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc
 
   - drs: DKRZ
     variable:
@@ -335,7 +376,8 @@ get_input_filelist:
       - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_195912-198411.nc
       - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc
     dirs:
-      - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta
+      - INPE/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/*/ta
+      - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/*/ta
     file_patterns:
       - ta_Amon_HadGEM2-ES_historical_r1i1p1*.nc
     found_files:
@@ -357,9 +399,12 @@ get_input_filelist:
       - MOHC/HadGEM2-ES/rcp45/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_rcp45_r1i1p1_200601-210012.nc
       - MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/v20110330/ta/ta_Amon_HadGEM2-ES_rcp85_r1i1p1_200601-210012.nc
     dirs:
-      - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/v20110330/ta
-      - MOHC/HadGEM2-ES/rcp45/mon/atmos/Amon/r1i1p1/v20110330/ta
-      - MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/v20110330/ta
+      - INPE/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/*/ta
+      - INPE/HadGEM2-ES/rcp45/mon/atmos/Amon/r1i1p1/*/ta
+      - INPE/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/*/ta
+      - MOHC/HadGEM2-ES/historical/mon/atmos/Amon/r1i1p1/*/ta
+      - MOHC/HadGEM2-ES/rcp45/mon/atmos/Amon/r1i1p1/*/ta
+      - MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/*/ta
     file_patterns:
       - ta_Amon_HadGEM2-ES_historical_r1i1p1*.nc
       - ta_Amon_HadGEM2-ES_rcp45_r1i1p1*.nc
@@ -434,7 +479,8 @@ get_input_filelist:
       - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Amon/ta/gn/v20200101/ta_Amon_HadGEM3-GC31-LL_historical_r1i1p1f1_gn_195001-199912.nc
       - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Amon/ta/gn/v20200101/ta_Amon_HadGEM3-GC31-LL_historical_r1i1p1f1_gn_200001-201412.nc
     dirs:
-      - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Amon/ta/gn/v20200101/
+      - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Amon/ta/gn/*/
+      - CMIP/NERC/HadGEM3-GC31-LL/historical/r1i1p1f1/Amon/ta/gn/*/
     file_patterns:
       - ta_Amon_HadGEM3-GC31-LL_historical_r1i1p1f1_gn*.nc
     found_files:
@@ -591,7 +637,7 @@ get_input_filelist:
       modeling_realm: [atmos]
       mip: fx
       exp: historical
-      ensemble: r1i1p1
+      ensemble: r0i0p0
       diagnostic: test_diag
       preprocessor: test_preproc
     available_files:
@@ -616,14 +662,15 @@ get_input_filelist:
       modeling_realm: [atmos]
       mip: fx
       exp: historical
-      ensemble: r1i1p1
+      ensemble: r0i0p0
       diagnostic: test_diag
       preprocessor: test_preproc
     available_files:
       - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r1i1p1/v20110330/sftlf/sftlf_fx_HadGEM2-ES_historical_r1i1p1.nc
       - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r0i0p0/v20110330/sftlf/sftlf_fx_HadGEM2-ES_historical_r0i0p0.nc
     dirs:
-      - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r0i0p0/v20110330/sftlf
+      - INPE/HadGEM2-ES/historical/fx/atmos/fx/r0i0p0/*/sftlf
+      - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r0i0p0/*/sftlf
     file_patterns:
       - sftlf_fx_HadGEM2-ES_historical_r0i0p0*.nc
     found_files:
@@ -641,13 +688,15 @@ get_input_filelist:
       modeling_realm: [atmos]
       mip: fx
       exp: historical
-      ensemble: r1i1p1
+      ensemble: r0i0p0
       diagnostic: test_diag
       preprocessor: test_preproc
     available_files:
-      - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r1i1p1/v20110330/sftlf/sftlf_fx_HadGEM2-ES_historical_r0i0p0.nc
-      - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r1i1p1/v20110330/areacella/areacella_fx_HadGEM2-ES_historical_r0i0p0.nc
-    dirs: []
+      - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r0i0p0/v20110330/sftlf/sftlf_fx_HadGEM2-ES_historical_r0i0p0.nc
+      - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r0i0p0/v20110330/areacella/areacella_fx_HadGEM2-ES_historical_r0i0p0.nc
+    dirs:
+      - INPE/HadGEM2-ES/historical/fx/atmos/fx/r0i0p0/*/orog/
+      - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r0i0p0/*/orog/
     file_patterns:
       - orog_fx_HadGEM2-ES_historical_r0i0p0*.nc
     found_files: []
@@ -674,7 +723,8 @@ get_input_filelist:
       - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Omon/areacello/gn/v20200101/areacello_Omon-GC31-LL_historical_r1i1p1f1_gn_199901-200012.nc
       - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Ofx/areacello/gn/v20200101/areacello_Ofx_HadGEM3-GC31-LL_historical_r1i1p1f1_gn.nc
     dirs:
-      - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Ofx/areacello/gn/v20200101/
+      - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Ofx/areacello/gn/*/
+      - CMIP/NERC/HadGEM3-GC31-LL/historical/r1i1p1f1/Ofx/areacello/gn/*/
     file_patterns:
       - areacello_Ofx_HadGEM3-GC31-LL_historical_r1i1p1f1_gn*.nc
     found_files:
@@ -703,7 +753,8 @@ get_input_filelist:
       - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Omon/areacello/gn/v20200101/areacello_Omon_HadGEM3-GC31-LL_historical_r1i1p1f1_gn_199901-200012.nc
       - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Ofx/areacello/gn/v20200101/areacello_Ofx_HadGEM3-GC31-LL_historical_r1i1p1f1_gn.nc
     dirs:
-      - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Omon/areacello/gn/v20200101/
+      - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Omon/areacello/gn/*/
+      - CMIP/NERC/HadGEM3-GC31-LL/historical/r1i1p1f1/Omon/areacello/gn/*/
     file_patterns:
       - areacello_Omon_HadGEM3-GC31-LL_historical_r1i1p1f1_gn*.nc
     found_files:
@@ -732,7 +783,8 @@ get_input_filelist:
       - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Omon/volcello/gn/v20200101/this_is_a_wrong_file.nc
       - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Ofx/volcello/gn/v20200101/volcello_Ofx_HadGEM3-GC31-LL_historical_r1i1p1f1_gn.nc
     dirs:
-      - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Omon/volcello/gn/v20200101/
+      - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Omon/volcello/gn/*/
+      - CMIP/NERC/HadGEM3-GC31-LL/historical/r1i1p1f1/Omon/volcello/gn/*/
     file_patterns:
       - volcello_Omon_HadGEM3-GC31-LL_historical_r1i1p1f1_gn*.nc
     found_files: []
@@ -757,7 +809,9 @@ get_input_filelist:
       preprocessor: test_preproc
     available_files:
       - CMIP/MOHC/HadGEM3-GC31-LL/historical/r0i0p0/Ofx/volcello/gn/v20200101/volcello_Ofx_HadGEM3-GC31-LL_historical_r0i0p0_gn.nc
-    dirs: []
+    dirs:
+      - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Ofx/volcello/gn/*/
+      - CMIP/NERC/HadGEM3-GC31-LL/historical/r1i1p1f1/Ofx/volcello/gn/*/
     file_patterns:
       - volcello_Ofx_HadGEM3-GC31-LL_historical_r1i1p1f1_gn*.nc
     found_files: []
@@ -775,7 +829,6 @@ get_input_filelist:
       mip: fx
       exp: historical
       ensemble: r1i1p1
-      timerange: '1999/2000'
       diagnostic: test_diag
       preprocessor: test_preproc
     available_files:
@@ -868,6 +921,30 @@ get_input_filelist:
       - OBS6_ERA-Interim_reanaly_42_Omon_deptho[_.]*nc
     found_files: []
 
+  - drs: default
+    variable:
+      short_name: tas
+      dataset: ERA5
+      project: native6
+      frequency: mon
+      mip: Amon
+      tier: 3
+      type: reanaly
+      timerange: '2000/2010'
+    available_files:
+      - Tier3/ERA5/1/mon/tas/era5_2m_temperature_2000_monthly.nc
+      - Tier3/ERA5/1/mon/tas/era5_2m_temperature_2001_monthly.nc
+    dirs:
+      - Tier3/ERA5/*/mon/tas
+    file_patterns:
+      - '*.nc'
+    found_files:
+      - Tier3/ERA5/1/mon/tas/era5_2m_temperature_2000_monthly.nc
+      - Tier3/ERA5/1/mon/tas/era5_2m_temperature_2001_monthly.nc
+    available_symlinks:
+      - link_name: Tier3/ERA5/latest
+        target: '1'
+
   # EMAC
 
   - drs: default
@@ -880,6 +957,8 @@ get_input_filelist:
       frequency: mon
       mip: Amon
       exp: amip
+      channel: Amon
+      postproc_flag: ''
       timerange: '200002/200003'
       diagnostic: test_diag
       preprocessor: test_preproc
@@ -952,6 +1031,7 @@ get_input_filelist:
       frequency: mon
       mip: Amon
       exp: amip
+      var_type: atm_2d_ml
       timerange: '200002/200003'
       diagnostic: test_diag
       preprocessor: test_preproc
@@ -1014,7 +1094,12 @@ get_input_filelist:
       frequency: mon
       mip: Amon
       case: f.e21.FHIST_BGC.f09_f09_mg17.CMIP6-AMIP.001_cosp1
+      gcomp: atm
+      scomp: cam
       type: h0
+      tdir: ''
+      tperiod: ''
+      string: ''
       timerange: '2000/2002'
       diagnostic: test_diag
       preprocessor: test_preproc
@@ -1024,6 +1109,7 @@ get_input_filelist:
       - f.e21.FHIST_BGC.f09_f09_mg17.CMIP6-AMIP.001_cosp1/atm/hist/f.e21.FHIST_BGC.f09_f09_mg17.CMIP6-AMIP.001_cosp1.cam.h0.2002.nc
     dirs:
       - ''
+      - f.e21.FHIST_BGC.f09_f09_mg17.CMIP6-AMIP.001_cosp1/atm/proc
       - f.e21.FHIST_BGC.f09_f09_mg17.CMIP6-AMIP.001_cosp1/atm/hist
     file_patterns:
       - f.e21.FHIST_BGC.f09_f09_mg17.CMIP6-AMIP.001_cosp1.cam.h0.*nc
diff --git a/tests/integration/test_data_finder.py b/tests/integration/test_data_finder.py
deleted file mode 100644
index 7e74266f45..0000000000
--- a/tests/integration/test_data_finder.py
+++ /dev/null
@@ -1,125 +0,0 @@
-"""Tests for _data_finder.py."""
-import os
-import shutil
-import tempfile
-
-import pytest
-import yaml
-
-import esmvalcore.config
-from esmvalcore._data_finder import (
-    _find_input_files,
-    get_input_filelist,
-    get_output_file,
-)
-
-# Load test configuration
-with open(os.path.join(os.path.dirname(__file__), 'data_finder.yml')) as file:
-    CONFIG = yaml.safe_load(file)
-
-
-def _augment_with_extra_facets(variable):
-    """Augment variable dict with extra facets."""
-    extra_facets = esmvalcore.config._config.get_extra_facets(
-        variable['project'],
-        variable['dataset'],
-        variable['mip'],
-        variable['short_name'],
-        (),
-    )
-    for (key, val) in extra_facets.items():
-        if key not in variable:
-            variable[key] = val
-
-
-def print_path(path):
-    """Print path."""
-    txt = path
-    if os.path.isdir(path):
-        txt += '/'
-    if os.path.islink(path):
-        txt += ' -> ' + os.readlink(path)
-    print(txt)
-
-
-def tree(path):
-    """Print path, similar to the the `tree` command."""
-    print_path(path)
-    for dirpath, dirnames, filenames in os.walk(path):
-        for dirname in dirnames:
-            print_path(os.path.join(dirpath, dirname))
-        for filename in filenames:
-            print_path(os.path.join(dirpath, filename))
-
-
-def create_file(filename):
-    """Create an empty file."""
-    dirname = os.path.dirname(filename)
-    if not os.path.exists(dirname):
-        os.makedirs(dirname)
-
-    with open(filename, 'a'):
-        pass
-
-
-def create_tree(path, filenames=None, symlinks=None):
-    """Create directory structure and files."""
-    for filename in filenames or []:
-        create_file(os.path.join(path, filename))
-
-    for symlink in symlinks or []:
-        link_name = os.path.join(path, symlink['link_name'])
-        os.symlink(symlink['target'], link_name)
-
-
-@pytest.mark.parametrize('cfg', CONFIG['get_output_file'])
-def test_get_output_file(cfg):
-    """Test getting output name for preprocessed files."""
-    _augment_with_extra_facets(cfg['variable'])
-    output_file = get_output_file(cfg['variable'], cfg['preproc_dir'])
-    assert output_file == cfg['output_file']
-
-
-@pytest.fixture
-def root():
-    """Root function for tests."""
-    dirname = tempfile.mkdtemp()
-    yield os.path.join(dirname, 'output1')
-    print("Directory structure was:")
-    tree(dirname)
-    shutil.rmtree(dirname)
-
-
-@pytest.mark.parametrize('cfg', CONFIG['get_input_filelist'])
-def test_get_input_filelist(root, cfg):
-    """Test retrieving input filelist."""
-    create_tree(root, cfg.get('available_files'),
-                cfg.get('available_symlinks'))
-
-    # Augment variable dict with extra facets
-    _augment_with_extra_facets(cfg['variable'])
-
-    # Find files
-    rootpath = {cfg['variable']['project']: [root]}
-    drs = {cfg['variable']['project']: cfg['drs']}
-    timerange = cfg['variable'].get('timerange')
-    if timerange and '*' in timerange:
-        (files, _, _) = _find_input_files(cfg['variable'], rootpath, drs)
-        ref_files = [
-            os.path.join(root, file) for file in cfg['found_files']]
-        # Test result
-        assert sorted(files) == sorted(ref_files)
-    else:
-        (input_filelist, dirnames,
-         filenames) = get_input_filelist(cfg['variable'], rootpath, drs)
-        # Test result
-        ref_files = [os.path.join(root, file) for file in cfg['found_files']]
-        if cfg['dirs'] is None:
-            ref_dirs = []
-        else:
-            ref_dirs = [os.path.join(root, dir) for dir in cfg['dirs']]
-        ref_patterns = cfg['file_patterns']
-
-        assert sorted(input_filelist) == sorted(ref_files)
-        assert sorted(dirnames) == sorted(ref_dirs)
-        assert sorted(filenames) == sorted(ref_patterns)
diff --git a/tests/integration/test_local.py b/tests/integration/test_local.py
new file mode 100644
index 0000000000..02982177a9
--- /dev/null
+++ b/tests/integration/test_local.py
@@ -0,0 +1,114 @@
+"""Tests for `esmvalcore.local`."""
+import os
+import pprint
+from pathlib import Path
+
+import pytest
+import yaml
+
+from esmvalcore.config import CFG
+from esmvalcore.local import LocalFile, _get_output_file, find_files
+
+# Load test configuration
+with open(os.path.join(os.path.dirname(__file__), 'data_finder.yml')) as file:
+    CONFIG = yaml.safe_load(file)
+
+
+def print_path(path):
+    """Print path."""
+    txt = path
+    if os.path.isdir(path):
+        txt += '/'
+    if os.path.islink(path):
+        txt += ' -> ' + os.readlink(path)
+    print(txt)
+
+
+def tree(path):
+    """Print path, similar to the the `tree` command."""
+    print_path(path)
+    for dirpath, dirnames, filenames in os.walk(path):
+        for dirname in dirnames:
+            print_path(os.path.join(dirpath, dirname))
+        for filename in filenames:
+            print_path(os.path.join(dirpath, filename))
+
+
+def create_file(filename):
+    """Create an empty file."""
+    dirname = os.path.dirname(filename)
+    if not os.path.exists(dirname):
+        os.makedirs(dirname)
+
+    with open(filename, 'a'):
+        pass
+
+
+def create_tree(path, filenames=None, symlinks=None):
+    """Create directory structure and files."""
+    for filename in filenames or []:
+        create_file(os.path.join(path, filename))
+
+    for symlink in symlinks or []:
+        link_name = os.path.join(path, symlink['link_name'])
+        os.symlink(symlink['target'], link_name)
+
+
+@pytest.mark.parametrize('cfg', CONFIG['get_output_file'])
+def test_get_output_file(cfg):
+    """Test getting output name for preprocessed files."""
+    output_file = _get_output_file(cfg['variable'], cfg['preproc_dir'])
+    expected = Path(cfg['output_file'])
+    assert output_file == expected
+
+
+@pytest.fixture
+def root(tmp_path):
+    """Root function for tests."""
+    dirname = str(tmp_path)
+    yield dirname
+    print("Directory structure was:")
+    tree(dirname)
+
+
+@pytest.mark.parametrize('cfg', CONFIG['get_input_filelist'])
+def test_find_files(monkeypatch, root, cfg):
+    """Test retrieving input filelist."""
+    print(f"Testing DRS {cfg['drs']} with variable:\n",
+          pprint.pformat(cfg['variable']))
+    project = cfg['variable']['project']
+    monkeypatch.setitem(CFG, 'drs', {project: cfg['drs']})
+    monkeypatch.setitem(CFG, 'rootpath', {project: root})
+    create_tree(root, cfg.get('available_files'),
+                cfg.get('available_symlinks'))
+
+    # Find files
+    input_filelist, globs = find_files(debug=True, **cfg['variable'])
+    # Test result
+    ref_files = [Path(root, file) for file in cfg['found_files']]
+    ref_globs = [
+        Path(root, d, f) for d in cfg['dirs'] for f in cfg['file_patterns']
+    ]
+    assert sorted([Path(f) for f in input_filelist]) == sorted(ref_files)
+    assert sorted([Path(g) for g in globs]) == sorted(ref_globs)
+
+
+def test_find_files_with_facets(monkeypatch, root):
+    """Test that a LocalFile with populated `facets` is returned."""
+    for cfg in CONFIG['get_input_filelist']:
+        if cfg['drs'] != 'default':
+            break
+
+    project = cfg['variable']['project']
+    monkeypatch.setitem(CFG, 'drs', {project: cfg['drs']})
+    monkeypatch.setitem(CFG, 'rootpath', {project: root})
+
+    create_tree(root, cfg.get('available_files'),
+                cfg.get('available_symlinks'))
+
+    # Find files
+    input_filelist = find_files(**cfg['variable'])
+    ref_files = [Path(root, file) for file in cfg['found_files']]
+    assert sorted([Path(f) for f in input_filelist]) == sorted(ref_files)
+    assert isinstance(input_filelist[0], LocalFile)
+    assert input_filelist[0].facets
diff --git a/tests/integration/test_provenance.py b/tests/integration/test_provenance.py
index 630a7c0517..091069f3b6 100644
--- a/tests/integration/test_provenance.py
+++ b/tests/integration/test_provenance.py
@@ -3,7 +3,7 @@
 
 
 def get_file_record(prov, filename):
-    records = prov.get_record('file:' + filename)
+    records = prov.get_record(f'file:{filename}')
     assert records
     return records[0]
 
diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py
index 06a7dcdbb0..d4a348871d 100644
--- a/tests/integration/test_recipe.py
+++ b/tests/integration/test_recipe.py
@@ -136,7 +136,7 @@ def _get_default_settings_for_chl(fix_dir, save_filename, preprocessor):
             'diagnostic': 'diagnostic_name',
             'ensemble': 'r1i1p1',
             'exp': 'historical',
-            'filename': fix_dir.replace('_fixed', '.nc'),
+            'filename': Path(fix_dir.replace('_fixed', '.nc')),
             'frequency': 'yr',
             'institute': ['CCCma'],
             'long_name': 'Total Chlorophyll Mass Concentration',
@@ -161,7 +161,7 @@ def _get_default_settings_for_chl(fix_dir, save_filename, preprocessor):
             'diagnostic': 'diagnostic_name',
             'ensemble': 'r1i1p1',
             'exp': 'historical',
-            'filename': fix_dir.replace('_fixed', '.nc'),
+            'filename': Path(fix_dir.replace('_fixed', '.nc')),
             'frequency': 'yr',
             'institute': ['CCCma'],
             'long_name': 'Total Chlorophyll Mass Concentration',
@@ -185,7 +185,7 @@ def _get_default_settings_for_chl(fix_dir, save_filename, preprocessor):
             'diagnostic': 'diagnostic_name',
             'ensemble': 'r1i1p1',
             'exp': 'historical',
-            'filename': fix_dir.replace('_fixed', '.nc'),
+            'filename': Path(fix_dir.replace('_fixed', '.nc')),
             'frequency': 'yr',
             'institute': ['CCCma'],
             'long_name': 'Total Chlorophyll Mass Concentration',
@@ -595,7 +595,7 @@ def test_default_fx_preprocessor(tmp_path, patched_datafinder, config_user):
             'diagnostic': 'diagnostic_name',
             'ensemble': 'r0i0p0',
             'exp': 'historical',
-            'filename': fix_dir.replace('_fixed', '.nc'),
+            'filename': Path(fix_dir.replace('_fixed', '.nc')),
             'frequency': 'fx',
             'institute': ['CCCma'],
             'long_name': 'Land Area Fraction',
@@ -619,7 +619,7 @@ def test_default_fx_preprocessor(tmp_path, patched_datafinder, config_user):
             'diagnostic': 'diagnostic_name',
             'ensemble': 'r0i0p0',
             'exp': 'historical',
-            'filename': fix_dir.replace('_fixed', '.nc'),
+            'filename': Path(fix_dir.replace('_fixed', '.nc')),
             'frequency': 'fx',
             'institute': ['CCCma'],
             'long_name': 'Land Area Fraction',
@@ -642,7 +642,7 @@ def test_default_fx_preprocessor(tmp_path, patched_datafinder, config_user):
             'diagnostic': 'diagnostic_name',
             'ensemble': 'r0i0p0',
             'exp': 'historical',
-            'filename': fix_dir.replace('_fixed', '.nc'),
+            'filename': Path(fix_dir.replace('_fixed', '.nc')),
             'frequency': 'fx',
             'institute': ['CCCma'],
             'long_name': 'Land Area Fraction',
@@ -874,7 +874,7 @@ def test_simple_cordex_recipe(tmp_path, patched_datafinder, config_user):
 
     recipe = get_recipe(tmp_path, content, config_user)
     variable = recipe.diagnostics['test']['preprocessor_output']['tas'][0]
-    filename = variable.pop('filename').split('/')[-1]
+    filename = variable.pop('filename').name
     assert (filename ==
             'CORDEX_MOHC-HadGEM3-RA_v1_ECMWF-ERAINT_AFR-44_mon_evaluation_'
             'r1i1p1_tas_1991-1993.nc')
@@ -939,7 +939,7 @@ def test_simple_cordex_recipe(tmp_path, patched_datafinder, config_user):
 def test_recipe_iso_timerange(tmp_path, patched_datafinder, config_user,
                               input_time, output_time):
     """Test recipe with timerange tag."""
-    content = dedent("""
+    content = dedent(f"""
         diagnostics:
           test:
             additional_datasets:
@@ -948,44 +948,46 @@ def test_recipe_iso_timerange(tmp_path, patched_datafinder, config_user,
                 exp: historical
                 ensemble: r2i1p1f1
                 grid: gn
-                timerange:
             variables:
               pr:
                 mip: 3hr
+                timerange: '{input_time}'
               areacella:
                 mip: fx
             scripts: null
         """)
 
-    recipe = yaml.safe_load(content)
-    (recipe['diagnostics']['test']['additional_datasets'][0]['timerange']
-     ) = input_time
-    content = yaml.safe_dump(recipe)
-
     recipe = get_recipe(tmp_path, content, config_user)
-    variable = recipe.diagnostics['test']['preprocessor_output']['pr'][0]
-    filename = variable.pop('filename').split('/')[-1]
-    assert (filename == 'CMIP6_HadGEM3-GC31-LL_3hr_historical_r2i1p1f1_'
-            f'pr_gn_{output_time}.nc')
-    fx_variable = (
-        recipe.diagnostics['test']['preprocessor_output']['areacella'][0])
-    fx_filename = fx_variable.pop('filename').split('/')[-1]
-    assert (fx_filename ==
-            'CMIP6_HadGEM3-GC31-LL_fx_historical_r2i1p1f1_areacella_gn.nc')
+    assert len(recipe.tasks) == 2
+    pr_task = [t for t in recipe.tasks if t.name.endswith('pr')][0]
+    assert len(pr_task.products) == 1
+    pr_product = pr_task.products.pop()
+
+    filename = ('CMIP6_HadGEM3-GC31-LL_3hr_historical_r2i1p1f1_'
+                f'pr_gn_{output_time}.nc')
+    assert pr_product.filename.name == filename
+
+    areacella_task = [t for t in recipe.tasks
+                      if t.name.endswith('areacella')][0]
+    assert len(areacella_task.products) == 1
+    areacella_product = areacella_task.products.pop()
+
+    filename = 'CMIP6_HadGEM3-GC31-LL_fx_historical_r2i1p1f1_areacella_gn.nc'
+    assert areacella_product.filename.name == filename
 
 
 @pytest.mark.parametrize('input_time,output_time', TEST_ISO_TIMERANGE)
 def test_recipe_iso_timerange_as_dataset(tmp_path, patched_datafinder,
                                          config_user, input_time, output_time):
     """Test recipe with timerange tag in the datasets section."""
-    content = dedent("""
+    content = dedent(f"""
         datasets:
           - dataset: HadGEM3-GC31-LL
             project: CMIP6
             exp: historical
             ensemble: r2i1p1f1
             grid: gn
-            timerange:
+            timerange: '{input_time}'
         diagnostics:
           test:
             variables:
@@ -996,18 +998,14 @@ def test_recipe_iso_timerange_as_dataset(tmp_path, patched_datafinder,
             scripts: null
         """)
 
-    recipe = yaml.safe_load(content)
-    (recipe['datasets'][0]['timerange']) = input_time
-    content = yaml.safe_dump(recipe)
-
     recipe = get_recipe(tmp_path, content, config_user)
     variable = recipe.diagnostics['test']['preprocessor_output']['pr'][0]
-    filename = variable.pop('filename').split('/')[-1]
+    filename = variable.pop('filename').name
     assert (filename == 'CMIP6_HadGEM3-GC31-LL_3hr_historical_r2i1p1f1_'
             f'pr_gn_{output_time}.nc')
     fx_variable = (
         recipe.diagnostics['test']['preprocessor_output']['areacella'][0])
-    fx_filename = fx_variable.pop('filename').split('/')[-1]
+    fx_filename = fx_variable.pop('filename').name
     assert (fx_filename ==
             'CMIP6_HadGEM3-GC31-LL_fx_historical_r2i1p1f1_areacella_gn.nc')
 
@@ -2584,8 +2582,8 @@ def test_empty_fxvar_dict(tmp_path, patched_datafinder, config_user):
     assert product.settings['add_fx_variables']['fx_variables'] == {}
 
 
-def test_user_defined_fxvar(tmp_path, patched_datafinder, config_user):
-    content = dedent("""
+@pytest.mark.parametrize('content', [
+    pytest.param(dedent("""
         preprocessors:
           landmask:
             mask_landsea:
@@ -2620,45 +2618,46 @@ def test_user_defined_fxvar(tmp_path, patched_datafinder, config_user):
                 additional_datasets:
                   - {dataset: CanESM2}
             scripts: null
-        """)
-    recipe = get_recipe(tmp_path, content, config_user)
-
-    # Check custom fx variables
-    task = recipe.tasks.pop()
-    product = task.products.pop()
-
-    # landsea
-    settings = product.settings['mask_landsea']
-    assert len(settings) == 1
-    assert settings['mask_out'] == 'sea'
-    fx_variables = product.settings['add_fx_variables']['fx_variables']
-    assert isinstance(fx_variables, dict)
-    assert len(fx_variables) == 4
-    assert '_fx_' in fx_variables['sftlf']['filename']
-    assert '_piControl_' in fx_variables['sftlf']['filename']
-
-    # landseaice
-    settings = product.settings['mask_landseaice']
-    assert len(settings) == 1
-    assert settings['mask_out'] == 'sea'
-    assert '_fx_' in fx_variables['sftgif']['filename']
-    assert '_piControl_' in fx_variables['sftgif']['filename']
-
-    # volume statistics
-    settings = product.settings['volume_statistics']
-    assert len(settings) == 1
-    assert settings['operator'] == 'mean'
-    assert 'volcello' in fx_variables
-
-    # area statistics
-    settings = product.settings['area_statistics']
-    assert len(settings) == 1
-    assert settings['operator'] == 'mean'
-    assert '_fx_' in fx_variables['areacello']['filename']
-    assert '_piControl_' in fx_variables['areacello']['filename']
-
-
-def test_user_defined_fxlist(tmp_path, patched_datafinder, config_user):
+        """),
+                 id='fx_variables_as_dict_of_dicts'),
+    pytest.param(dedent("""
+        preprocessors:
+          landmask:
+            mask_landsea:
+              mask_out: sea
+              fx_variables: [{'short_name': 'sftlf', 'exp': 'piControl'}]
+            mask_landseaice:
+              mask_out: sea
+              fx_variables: [{'short_name': 'sftgif', 'exp': 'piControl'}]
+            volume_statistics:
+              operator: mean
+            area_statistics:
+              operator: mean
+              fx_variables: [{'short_name': 'areacello', 'mip': 'fx',
+                         'exp': 'piControl'}]
+        diagnostics:
+          diagnostic_name:
+            variables:
+              gpp:
+                preprocessor: landmask
+                project: CMIP5
+                mip: Lmon
+                exp: historical
+                start_year: 2000
+                end_year: 2005
+                ensemble: r1i1p1
+                additional_datasets:
+                  - {dataset: CanESM2}
+            scripts: null
+        """),
+                 id='fx_variables_as_list_of_dicts'),
+])
+def test_user_defined_fxvar(
+    tmp_path,
+    patched_datafinder,
+    config_user,
+    content,
+):
     content = dedent("""
         preprocessors:
           landmask:
@@ -2702,15 +2701,15 @@ def test_user_defined_fxlist(tmp_path, patched_datafinder, config_user):
     fx_variables = product.settings['add_fx_variables']['fx_variables']
     assert isinstance(fx_variables, dict)
     assert len(fx_variables) == 4
-    assert '_fx_' in fx_variables['sftlf']['filename']
-    assert '_piControl_' in fx_variables['sftlf']['filename']
+    assert '_fx_' in fx_variables['sftlf']['filename'].name
+    assert '_piControl_' in fx_variables['sftlf']['filename'].name
 
     # landseaice
     settings = product.settings['mask_landseaice']
     assert len(settings) == 1
     assert settings['mask_out'] == 'sea'
-    assert '_fx_' in fx_variables['sftlf']['filename']
-    assert '_piControl_' in fx_variables['sftlf']['filename']
+    assert '_fx_' in fx_variables['sftlf']['filename'].name
+    assert '_piControl_' in fx_variables['sftlf']['filename'].name
 
     # volume statistics
     settings = product.settings['volume_statistics']
@@ -2722,8 +2721,8 @@ def test_user_defined_fxlist(tmp_path, patched_datafinder, config_user):
     settings = product.settings['area_statistics']
     assert len(settings) == 1
     assert settings['operator'] == 'mean'
-    assert '_fx_' in fx_variables['areacello']['filename']
-    assert '_piControl_' in fx_variables['areacello']['filename']
+    assert '_fx_' in fx_variables['areacello']['filename'].name
+    assert '_piControl_' in fx_variables['areacello']['filename'].name
 
 
 def test_landmask_no_fx(tmp_path, patched_failing_datafinder, config_user):
@@ -2779,14 +2778,17 @@ def test_fx_vars_fixed_mip_cmip6(tmp_path, patched_datafinder, config_user):
     content = dedent("""
         preprocessors:
           preproc:
-           area_statistics:
-             operator: mean
-             fx_variables:
-               sftgif:
-                 mip: fx
-               volcello:
-                 ensemble: r2i1p1f1
-                 mip: Ofx
+            volume_statistics:
+              operator: mean
+              fx_variables:
+                volcello:
+                  ensemble: r2i1p1f1
+                  mip: Ofx
+            mask_landseaice:
+              mask_out: ice
+              fx_variables:
+                sftgif:
+                  mip: fx
 
         diagnostics:
           diagnostic_name:
@@ -2813,9 +2815,9 @@ def test_fx_vars_fixed_mip_cmip6(tmp_path, patched_datafinder, config_user):
     assert len(task.products) == 1
     product = task.products.pop()
 
-    # Check area_statistics
-    assert 'area_statistics' in product.settings
-    settings = product.settings['area_statistics']
+    # Check volume_statistics
+    assert 'volume_statistics' in product.settings
+    settings = product.settings['volume_statistics']
     assert len(settings) == 1
     assert settings['operator'] == 'mean'
 
@@ -2823,9 +2825,9 @@ def test_fx_vars_fixed_mip_cmip6(tmp_path, patched_datafinder, config_user):
     fx_variables = product.settings['add_fx_variables']['fx_variables']
     assert isinstance(fx_variables, dict)
     assert len(fx_variables) == 2
-    assert '_fx_' in fx_variables['sftgif']['filename']
-    assert '_r2i1p1f1_' in fx_variables['volcello']['filename']
-    assert '_Ofx_' in fx_variables['volcello']['filename']
+    assert '_fx_' in fx_variables['sftgif']['filename'].name
+    assert '_r2i1p1f1_' in fx_variables['volcello']['filename'].name
+    assert '_Ofx_' in fx_variables['volcello']['filename'].name
 
 
 def test_fx_vars_invalid_mip_cmip6(tmp_path, patched_datafinder, config_user):
@@ -2960,11 +2962,11 @@ def test_fx_vars_mip_search_cmip6(tmp_path, patched_datafinder, config_user):
     fx_variables = product.settings['add_fx_variables']['fx_variables']
     assert isinstance(fx_variables, dict)
     assert len(fx_variables) == 5
-    assert '_fx_' in fx_variables['areacella']['filename']
-    assert '_Ofx_' in fx_variables['areacello']['filename']
-    assert '_Efx_' in fx_variables['clayfrac']['filename']
-    assert '_fx_' in fx_variables['sftlf']['filename']
-    assert '_Ofx_' in fx_variables['sftof']['filename']
+    assert '_fx_' in fx_variables['areacella']['filename'].name
+    assert '_Ofx_' in fx_variables['areacello']['filename'].name
+    assert '_Efx_' in fx_variables['clayfrac']['filename'].name
+    assert '_fx_' in fx_variables['sftlf']['filename'].name
+    assert '_Ofx_' in fx_variables['sftof']['filename'].name
 
 
 def test_fx_list_mip_search_cmip6(tmp_path, patched_datafinder, config_user):
@@ -2972,15 +2974,18 @@ def test_fx_list_mip_search_cmip6(tmp_path, patched_datafinder, config_user):
     content = dedent("""
         preprocessors:
           preproc:
-           area_statistics:
-             operator: mean
-             fx_variables: [
-               'areacella',
-               'areacello',
-               'clayfrac',
-               'sftlf',
-               'sftof',
-               ]
+            area_statistics:
+              operator: mean
+              fx_variables: [
+                'areacella',
+                'areacello',
+              ]
+            mask_landsea:
+              mask_out: sea
+              fx_variables: [
+                'sftlf',
+                'sftof',
+              ]
 
         diagnostics:
           diagnostic_name:
@@ -3016,12 +3021,11 @@ def test_fx_list_mip_search_cmip6(tmp_path, patched_datafinder, config_user):
     # Check add_fx_variables
     fx_variables = product.settings['add_fx_variables']['fx_variables']
     assert isinstance(fx_variables, dict)
-    assert len(fx_variables) == 5
-    assert '_fx_' in fx_variables['areacella']['filename']
-    assert '_Ofx_' in fx_variables['areacello']['filename']
-    assert '_Efx_' in fx_variables['clayfrac']['filename']
-    assert '_fx_' in fx_variables['sftlf']['filename']
-    assert '_Ofx_' in fx_variables['sftof']['filename']
+    assert len(fx_variables) == 4
+    assert '_fx_' in fx_variables['areacella']['filename'].name
+    assert '_Ofx_' in fx_variables['areacello']['filename'].name
+    assert '_fx_' in fx_variables['sftlf']['filename'].name
+    assert '_Ofx_' in fx_variables['sftof']['filename'].name
 
 
 def test_fx_vars_volcello_in_ofx_cmip6(tmp_path, patched_datafinder,
@@ -3070,8 +3074,8 @@ def test_fx_vars_volcello_in_ofx_cmip6(tmp_path, patched_datafinder,
     fx_variables = product.settings['add_fx_variables']['fx_variables']
     assert isinstance(fx_variables, dict)
     assert len(fx_variables) == 1
-    assert '_Omon_' not in fx_variables['volcello']['filename']
-    assert '_Ofx_' in fx_variables['volcello']['filename']
+    assert '_Omon_' not in fx_variables['volcello']['filename'].name
+    assert '_Ofx_' in fx_variables['volcello']['filename'].name
 
 
 def test_fx_dicts_volcello_in_ofx_cmip6(tmp_path, patched_datafinder,
@@ -3119,9 +3123,9 @@ def test_fx_dicts_volcello_in_ofx_cmip6(tmp_path, patched_datafinder,
     fx_variables = product.settings['add_fx_variables']['fx_variables']
     assert isinstance(fx_variables, dict)
     assert len(fx_variables) == 1
-    assert '_Oyr_' in fx_variables['volcello']['filename'][0]
-    assert '_piControl_' in fx_variables['volcello']['filename'][0]
-    assert '_Omon_' not in fx_variables['volcello']['filename'][0]
+    assert '_Oyr_' in fx_variables['volcello']['filename'][0].name
+    assert '_piControl_' in fx_variables['volcello']['filename'][0].name
+    assert '_Omon_' not in fx_variables['volcello']['filename'][0].name
 
 
 def test_fx_vars_list_no_preproc_cmip6(tmp_path, patched_datafinder,
@@ -3221,8 +3225,8 @@ def test_fx_vars_volcello_in_omon_cmip6(tmp_path, patched_failing_datafinder,
     fx_variables = product.settings['add_fx_variables']['fx_variables']
     assert isinstance(fx_variables, dict)
     assert len(fx_variables) == 1
-    assert '_Ofx_' not in fx_variables['volcello']['filename'][0]
-    assert '_Omon_' in fx_variables['volcello']['filename'][0]
+    assert '_Ofx_' not in fx_variables['volcello']['filename'][0].name
+    assert '_Omon_' in fx_variables['volcello']['filename'][0].name
 
 
 def test_fx_vars_volcello_in_oyr_cmip6(tmp_path, patched_failing_datafinder,
@@ -3269,8 +3273,8 @@ def test_fx_vars_volcello_in_oyr_cmip6(tmp_path, patched_failing_datafinder,
     fx_variables = product.settings['add_fx_variables']['fx_variables']
     assert isinstance(fx_variables, dict)
     assert len(fx_variables) == 1
-    assert '_Ofx_' not in fx_variables['volcello']['filename'][0]
-    assert '_Oyr_' in fx_variables['volcello']['filename'][0]
+    assert '_Ofx_' not in fx_variables['volcello']['filename'][0].name
+    assert '_Oyr_' in fx_variables['volcello']['filename'][0].name
 
 
 def test_fx_vars_volcello_in_fx_cmip5(tmp_path, patched_datafinder,
@@ -3315,8 +3319,8 @@ def test_fx_vars_volcello_in_fx_cmip5(tmp_path, patched_datafinder,
     fx_variables = product.settings['add_fx_variables']['fx_variables']
     assert isinstance(fx_variables, dict)
     assert len(fx_variables) == 1
-    assert '_fx_' in fx_variables['volcello']['filename']
-    assert '_Omon_' not in fx_variables['volcello']['filename']
+    assert '_fx_' in fx_variables['volcello']['filename'].name
+    assert '_Omon_' not in fx_variables['volcello']['filename'].name
 
 
 def test_wrong_project(tmp_path, patched_datafinder, config_user):
@@ -3434,8 +3438,8 @@ def test_unique_fx_var_in_multiple_mips_cmip6(tmp_path,
     content = dedent("""
         preprocessors:
           preproc:
-           area_statistics:
-             operator: mean
+           mask_landseaice:
+             mask_out: ice
              fx_variables:
                sftgif:
 
@@ -3464,11 +3468,11 @@ def test_unique_fx_var_in_multiple_mips_cmip6(tmp_path,
     assert len(task.products) == 1
     product = task.products.pop()
 
-    # Check area_statistics
-    assert 'area_statistics' in product.settings
-    settings = product.settings['area_statistics']
+    # Check mask_landseaice
+    assert 'mask_landseaice' in product.settings
+    settings = product.settings['mask_landseaice']
     assert len(settings) == 1
-    assert settings['operator'] == 'mean'
+    assert settings['mask_out'] == 'ice'
 
     # Check add_fx_variables
     # Due to failing datafinder, only files in LImon are found even though
@@ -3479,7 +3483,7 @@ def test_unique_fx_var_in_multiple_mips_cmip6(tmp_path,
     sftgif_files = fx_variables['sftgif']['filename']
     assert isinstance(sftgif_files, list)
     assert len(sftgif_files) == 1
-    assert '_LImon_' in sftgif_files[0]
+    assert '_LImon_' in sftgif_files[0].name
 
 
 def test_multimodel_mask(tmp_path, patched_datafinder, config_user):
@@ -3639,8 +3643,8 @@ def test_dataset_to_file_derived_var(mock_get_input_files,
                                      mock_data_availability, config_user):
     """Test ``_dataset_to_file`` with derived variable."""
     mock_get_input_files.side_effect = [
-        ([], [], []),
-        ([sentinel.out_file], [sentinel.dirname], [sentinel.filename]),
+        ([], []),
+        ([sentinel.out_file], [sentinel.globs]),
     ]
     variable = {
         'dataset': 'ICON',
diff --git a/tests/integration/test_recipe_checks.py b/tests/integration/test_recipe_checks.py
index 065a6e1301..08e9d39624 100644
--- a/tests/integration/test_recipe_checks.py
+++ b/tests/integration/test_recipe_checks.py
@@ -1,4 +1,5 @@
 """Integration tests for :mod:`esmvalcore._recipe_checks`."""
+import os.path
 from typing import Any, List
 from unittest import mock
 
@@ -11,13 +12,8 @@
 from esmvalcore.preprocessor import PreprocessorFile
 
 ERR_ALL = 'Looked for files matching%s'
-ERR_D = ('Looked for files in %s, but did not find any file pattern to match '
-         'against')
-ERR_F = ('Looked for files matching %s, but did not find any existing input '
-         'directory')
 ERR_RANGE = 'No input data available for years {} in files:\n{}'
 VAR = {
-    'filename': 'a/c.nc',
     'frequency': 'mon',
     'short_name': 'tas',
     'timerange': '2020/2025',
@@ -26,7 +22,6 @@
     'end_year': 2025
 }
 FX_VAR = {
-    'filename': 'a/b.nc',
     'frequency': 'fx',
     'short_name': 'areacella',
 }
@@ -69,17 +64,8 @@ def test_data_availability_data(mock_logger, input_files, var, error):
 
 DATA_AVAILABILITY_NO_DATA: List[Any] = [
     ([], [], None),
-    ([], None, None),
-    (None, [], None),
-    (None, None, None),
-    (['dir1'], [], (ERR_D, ['dir1'])),
-    (['dir1', 'dir2'], [], (ERR_D, ['dir1', 'dir2'])),
-    (['dir1'], None, (ERR_D, ['dir1'])),
-    (['dir1', 'dir2'], None, (ERR_D, ['dir1', 'dir2'])),
-    ([], ['a*.nc'], (ERR_F, ['a*.nc'])),
-    ([], ['a*.nc', 'b*.nc'], (ERR_F, ['a*.nc', 'b*.nc'])),
-    (None, ['a*.nc'], (ERR_F, ['a*.nc'])),
-    (None, ['a*.nc', 'b*.nc'], (ERR_F, ['a*.nc', 'b*.nc'])),
+    ([''], ['a*.nc'], (ERR_ALL, ': a*.nc')),
+    ([''], ['a*.nc', 'b*.nc'], (ERR_ALL, '\na*.nc\nb*.nc')),
     (['1'], ['a'], (ERR_ALL, ': 1/a')),
     (['1'], ['a', 'b'], (ERR_ALL, '\n1/a\n1/b')),
     (['1', '2'], ['a'], (ERR_ALL, '\n1/a\n2/a')),
@@ -100,10 +86,13 @@ def test_data_availability_no_data(mock_logger, dirnames, filenames, error):
         'start_year': 2020,
         'end_year': 2025
     }
+    patterns = [
+        os.path.join(d, f) for d in dirnames for f in filenames
+    ]
     error_first = ('No input files found for variable %s', var_no_filename)
     error_last = ("Set 'log_level' to 'debug' to get more information", )
     with pytest.raises(RecipeError) as rec_err:
-        check.data_availability([], var, dirnames, filenames)
+        check.data_availability([], var, patterns)
     assert str(rec_err.value) == 'Missing data for alias: tas'
     if error is None:
         assert mock_logger.error.call_count == 2
@@ -137,7 +126,7 @@ def test_data_availability_no_data(mock_logger, dirnames, filenames, error):
     '*/P2Y21DT12H00M00S',
     '1/301',
     '1/*',
-    '*/301'
+    '*/301',
 ]
 
 
@@ -161,7 +150,7 @@ def test_valid_time_selection(timerange):
 
 
 @pytest.mark.parametrize('timerange,message', BAD_TIMERANGES)
-def test_valid_time_selection_rehections(timerange, message):
+def test_valid_time_selection_rejections(timerange, message):
     """Check that bad definitions raise RecipeError."""
     with pytest.raises(check.RecipeError) as rec_err:
         check.valid_time_selection(timerange)
@@ -189,7 +178,7 @@ def test_data_availability_nonexistent(tmp_path):
     )
     dest_folder = tmp_path
     input_files = [esmvalcore.esgf.ESGFFile([result]).local_file(dest_folder)]
-    check.data_availability(input_files, var, dirnames=[], filenames=[])
+    check.data_availability(input_files, var, patterns=[])
 
 
 def test_reference_for_bias_preproc_empty():
diff --git a/tests/sample_data/experimental/test_run_recipe.py b/tests/sample_data/experimental/test_run_recipe.py
index 053385d821..86f31a438a 100644
--- a/tests/sample_data/experimental/test_run_recipe.py
+++ b/tests/sample_data/experimental/test_run_recipe.py
@@ -40,7 +40,7 @@ def recipe():
 
 @pytest.mark.use_sample_data
 @pytest.mark.parametrize('task', (None, 'example/ta'))
-def test_run_recipe(task, recipe, tmp_path):
+def test_run_recipe(monkeypatch, task, recipe, tmp_path):
     """Test running a basic recipe using sample data.
 
     Recipe contains no provenance and no diagnostics.
@@ -50,12 +50,13 @@ def test_run_recipe(task, recipe, tmp_path):
     assert isinstance(recipe, Recipe)
     assert isinstance(recipe._repr_html_(), str)
 
+    sample_data_config = esmvaltool_sample_data.get_rootpaths()
+    monkeypatch.setitem(CFG, 'rootpath', sample_data_config['rootpath'])
+    monkeypatch.setitem(CFG, 'drs', {'CMIP6': 'SYNDA'})
     session = CFG.start_session(recipe.path.stem)
     session.clear()
     session.update(CFG_DEFAULT)
     session['output_dir'] = tmp_path / 'esmvaltool_output'
-    session.update(esmvaltool_sample_data.get_rootpaths())
-    session['drs'] = {'CMIP6': 'SYNDA'}
     session['max_parallel_tasks'] = 1
     session['remove_preproc_dir'] = False
 
@@ -82,14 +83,14 @@ def test_run_recipe(task, recipe, tmp_path):
 
 
 @pytest.mark.use_sample_data
-def test_run_recipe_diagnostic_failing(recipe, tmp_path):
+def test_run_recipe_diagnostic_failing(monkeypatch, recipe, tmp_path):
     """Test running a single diagnostic using sample data.
 
     Recipe contains no provenance and no diagnostics.
     """
     TAGS.set_tag_values(AUTHOR_TAGS)
 
-    CFG['output_dir'] = tmp_path
+    monkeypatch.setitem(CFG, 'output_dir', tmp_path)
 
     session = CFG.start_session(recipe.path.stem)
 
diff --git a/tests/unit/data_finder/__init__.py b/tests/unit/local/__init__.py
similarity index 100%
rename from tests/unit/data_finder/__init__.py
rename to tests/unit/local/__init__.py
diff --git a/tests/unit/local/test_facets.py b/tests/unit/local/test_facets.py
new file mode 100644
index 0000000000..1f7f3c35af
--- /dev/null
+++ b/tests/unit/local/test_facets.py
@@ -0,0 +1,25 @@
+from pathlib import Path
+
+from esmvalcore.local import LocalFile, _path2facets
+
+
+def test_path2facets():
+    """Test `_path2facets1."""
+    filepath = Path("/climate_data/value1/value2/filename.nc")
+    drs = "{facet1}/{facet2.lower}"
+
+    expected = {
+        'facet1': 'value1',
+        'facet2': 'value2',
+    }
+
+    result = _path2facets(filepath, drs)
+
+    assert result == expected
+
+
+def test_localfile():
+    file = LocalFile('/a/b.nc')
+    file.facets = {'a': 'A'}
+    assert Path(file) == Path('/a/b.nc')
+    assert file.facets == {'a': 'A'}
diff --git a/tests/unit/data_finder/test_replace_tags.py b/tests/unit/local/test_replace_tags.py
similarity index 54%
rename from tests/unit/data_finder/test_replace_tags.py
rename to tests/unit/local/test_replace_tags.py
index 9215e8ebd5..38d0f63f75 100644
--- a/tests/unit/data_finder/test_replace_tags.py
+++ b/tests/unit/local/test_replace_tags.py
@@ -1,8 +1,10 @@
-"""Tests for _replace_tags in _data_finder.py."""
+"""Tests for `_replace_tags` in `esmvalcore.local`."""
+from pathlib import Path
+
 import pytest
 
-from esmvalcore._data_finder import _replace_tags
 from esmvalcore.exceptions import RecipeError
+from esmvalcore.local import _replace_tags
 
 VARIABLE = {
     'project': 'CMIP6',
@@ -18,20 +20,23 @@
 
 
 def test_replace_tags():
-    """Tests for get_start_end_year function."""
+    """Tests for `_replace_tags` function."""
     path = _replace_tags(
         '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/'
-        '{grid}/{latestversion}', VARIABLE)
+        '{grid}/{version}', VARIABLE)
     input_file = _replace_tags(
         '{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc', VARIABLE)
     output_file = _replace_tags(
         '{project}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}', VARIABLE)
     assert path == [
-        'act/HMA/ACCURATE-MODEL/experiment/r1i1p1f1/Amon/tas/gr/'
-        '{latestversion}'
+        Path('act/HMA/ACCURATE-MODEL/experiment/r1i1p1f1/Amon/tas/gr/*')
+    ]
+    assert input_file == [
+        Path('tas_Amon_ACCURATE-MODEL_experiment_r1i1p1f1_gr*.nc')
+    ]
+    assert output_file == [
+        Path('CMIP6_ACCURATE-MODEL_Amon_experiment_r1i1p1f1_tas')
     ]
-    assert input_file == ['tas_Amon_ACCURATE-MODEL_experiment_r1i1p1f1_gr*.nc']
-    assert output_file == ['CMIP6_ACCURATE-MODEL_Amon_experiment_r1i1p1f1_tas']
 
 
 def test_replace_tags_missing_facet():
@@ -45,34 +50,38 @@ def test_replace_tags_missing_facet():
 
 
 def test_replace_tags_list_of_str():
-    assert sorted(
-        _replace_tags(('folder/subfolder/{short_name}', 'folder2/{short_name}',
-                       'subfolder/{short_name}'), VARIABLE)) == sorted([
-                           'folder2/tas',
-                           'folder/subfolder/tas',
-                           'subfolder/tas',
-                       ])
+    paths = [
+        'folder/subfolder/{short_name}',
+        'folder2/{short_name}',
+        'subfolder/{short_name}',
+    ]
+    reference = [
+        Path('folder/subfolder/tas'),
+        Path('folder2/tas'),
+        Path('subfolder/tas'),
+    ]
+    assert sorted(_replace_tags(paths, VARIABLE)) == reference
 
 
 def test_replace_tags_with_subexperiment():
-    """Tests for get_start_end_year function."""
+    """Tests for `_replace_tags` function."""
     variable = {'sub_experiment': '199411', **VARIABLE}
-    path = _replace_tags(
+    paths = _replace_tags(
         '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/'
-        '{grid}/{latestversion}', variable)
+        '{grid}/{version}', variable)
     input_file = _replace_tags(
         '{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc', variable)
     output_file = _replace_tags(
         '{project}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}', variable)
-    assert sorted(path) == sorted([
-        'act/HMA/ACCURATE-MODEL/experiment/r1i1p1f1/Amon/tas/gr/'
-        '{latestversion}',
-        'act/HMA/ACCURATE-MODEL/experiment/199411-r1i1p1f1/Amon/tas/gr/'
-        '{latestversion}'
-    ])
+    expected_paths = [
+        Path(
+            'act/HMA/ACCURATE-MODEL/experiment/199411-r1i1p1f1/Amon/tas/gr/*'),
+        Path('act/HMA/ACCURATE-MODEL/experiment/r1i1p1f1/Amon/tas/gr/*'),
+    ]
+    assert sorted(paths) == expected_paths
     assert input_file == [
-        'tas_Amon_ACCURATE-MODEL_experiment_199411-r1i1p1f1_gr*.nc'
+        Path('tas_Amon_ACCURATE-MODEL_experiment_199411-r1i1p1f1_gr*.nc')
     ]
     assert output_file == [
-        'CMIP6_ACCURATE-MODEL_Amon_experiment_199411-r1i1p1f1_tas'
+        Path('CMIP6_ACCURATE-MODEL_Amon_experiment_199411-r1i1p1f1_tas')
     ]
diff --git a/tests/unit/data_finder/test_select_files.py b/tests/unit/local/test_select_files.py
similarity index 87%
rename from tests/unit/data_finder/test_select_files.py
rename to tests/unit/local/test_select_files.py
index a3dcf7618b..162cfc89c5 100644
--- a/tests/unit/data_finder/test_select_files.py
+++ b/tests/unit/local/test_select_files.py
@@ -1,4 +1,4 @@
-from esmvalcore._data_finder import select_files
+from esmvalcore.local import _select_files
 
 
 def test_select_files():
@@ -10,7 +10,7 @@ def test_select_files():
         "pr_Amon_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_197001-197412.nc",
     ]
 
-    result = select_files(files, '1962/1967')
+    result = _select_files(files, '1962/1967')
 
     expected = [
         "pr_Amon_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_196001-196412.nc",
@@ -29,7 +29,7 @@ def test_select_files_monthly_resolution():
         "pr_Amon_EC-Earth3_dcppA-hindcast_s1960-r1i1p1f1_gr_196311-196410.nc",
     ]
 
-    result = select_files(files, '196201/196205')
+    result = _select_files(files, '196201/196205')
 
     expected = [
         "pr_Amon_EC-Earth3_dcppA-hindcast_s1960-r1i1p1f1_gr_196111-196210.nc"
@@ -48,7 +48,7 @@ def test_select_files_daily_resolution():
         filename + "19621101-19631031.nc"
     ]
 
-    result = select_files(files, '19600101/19611215')
+    result = _select_files(files, '19600101/19611215')
 
     expected = [
         filename + "19601101-19611031.nc",
@@ -73,10 +73,10 @@ def test_select_files_sub_daily_resolution():
         filename + "19621101T0300-19631031T2100.nc",
     ]
 
-    result_no_separator = select_files(
+    result_no_separator = _select_files(
         files_no_separator,
         '19600101T0900/19610101T09HH00MM')
-    result_separator = select_files(
+    result_separator = _select_files(
         files_separator,
         '19600101T0900/19610101T0900')
 
@@ -113,8 +113,8 @@ def test_select_files_time_period():
         filename_datetime + "196211010300-196310312100.nc",
     ]
 
-    result_date = select_files(files_date, '196211/P2Y5M')
-    result_datetime = select_files(files_datetime, '19601101T1300/P1Y0M0DT6H')
+    result_date = _select_files(files_date, '196211/P2Y5M')
+    result_datetime = _select_files(files_datetime, '19601101T1300/P1Y0M0DT6H')
 
     expected_date = [
         filename_date + "196211-196310.nc",
@@ -142,9 +142,9 @@ def test_select_files_varying_format():
         filename + "196211010300-196310312100.nc",
     ]
 
-    result_yearly = select_files(files, '1960/1962')
-    result_monthly = select_files(files, '196011/196210')
-    result_daily = select_files(files, '19601101/19601105')
+    result_yearly = _select_files(files, '1960/1962')
+    result_monthly = _select_files(files, '196011/196210')
+    result_daily = _select_files(files, '19601101/19601105')
 
     assert result_yearly == files
     assert result_monthly == files[0:2]
diff --git a/tests/unit/data_finder/test_get_start_end_year.py b/tests/unit/local/test_time.py
similarity index 67%
rename from tests/unit/data_finder/test_get_start_end_year.py
rename to tests/unit/local/test_time.py
index 1fb02c2e9b..257cf0ed4e 100644
--- a/tests/unit/data_finder/test_get_start_end_year.py
+++ b/tests/unit/local/test_time.py
@@ -1,12 +1,13 @@
-"""Unit tests for :func:`esmvalcore._data_finder.regrid._stock_cube`"""
-
+"""Unit tests for time related functions in `esmvalcore.local`."""
 import iris
 import pytest
 
-from esmvalcore._data_finder import (
+from esmvalcore.local import (
+    _dates_to_timerange,
+    _get_start_end_date,
+    _get_start_end_year,
     _get_timerange_from_years,
-    get_start_end_date,
-    get_start_end_year,
+    _truncate_dates,
 )
 
 FILENAME_CASES = [
@@ -61,32 +62,32 @@
 
 @pytest.mark.parametrize('case', FILENAME_CASES)
 def test_get_start_end_year(case):
-    """Tests for get_start_end_year function."""
+    """Tests for _get_start_end_year function."""
     filename, case_start, case_end = case
     if case_start is None and case_end is None:
         # If the filename is inconclusive or too difficult
         # we resort to reading the file, which fails here
         # because the file is not there.
         with pytest.raises(ValueError):
-            get_start_end_year(filename)
+            _get_start_end_year(filename)
     else:
-        start, end = get_start_end_year(filename)
+        start, end = _get_start_end_year(filename)
         assert case_start == start
         assert case_end == end
 
 
 @pytest.mark.parametrize('case', FILENAME_DATE_CASES)
 def test_get_start_end_date(case):
-    """Tests for get_start_end_date function."""
+    """Tests for _get_start_end_date function."""
     filename, case_start, case_end = case
     if case_start is None and case_end is None:
         # If the filename is inconclusive or too difficult
         # we resort to reading the file, which fails here
         # because the file is not there.
         with pytest.raises(ValueError):
-            get_start_end_date(filename)
+            _get_start_end_date(filename)
     else:
-        start, end = get_start_end_date(filename)
+        start, end = _get_start_end_date(filename)
         assert case_start == start
         assert case_end == end
 
@@ -101,7 +102,7 @@ def test_read_time_from_cube(monkeypatch, tmp_path):
                                 units='days since 1990-01-01')
     cube.add_dim_coord(time, 0)
     iris.save(cube, temp_file)
-    start, end = get_start_end_year(temp_file)
+    start, end = _get_start_end_year(temp_file)
     assert start == 1990
     assert end == 1991
 
@@ -116,7 +117,7 @@ def test_read_datetime_from_cube(monkeypatch, tmp_path):
                                 units='days since 1990-01-01')
     cube.add_dim_coord(time, 0)
     iris.save(cube, temp_file)
-    start, end = get_start_end_date(temp_file)
+    start, end = _get_start_end_date(temp_file)
     assert start == '19900101'
     assert end == '19910102'
 
@@ -128,14 +129,14 @@ def test_raises_if_unable_to_deduce(monkeypatch, tmp_path):
     cube = iris.cube.Cube([0, 0], var_name='var')
     iris.save(cube, temp_file)
     with pytest.raises(ValueError):
-        get_start_end_date(temp_file)
+        _get_start_end_date(temp_file)
 
 
 def test_fails_if_no_date_present():
     """Test raises if no date is present."""
     with pytest.raises((ValueError, OSError)):
-        get_start_end_date('var_whatever')
-        get_start_end_year('var_whatever')
+        _get_start_end_date('var_whatever')
+        _get_start_end_year('var_whatever')
 
 
 def test_get_timerange_from_years():
@@ -176,3 +177,65 @@ def test_get_timerange_from_end_year():
 
     assert 'end_year' not in variable
     assert variable['timerange'] == '2002/2002'
+
+
+TEST_DATES_TO_TIMERANGE = [
+    (2000, 2000, '2000/2000'),
+    (1, 2000, '0001/2000'),
+    (2000, 1, '2000/0001'),
+    (1, 2, '0001/0002'),
+    ('2000', '2000', '2000/2000'),
+    ('1', '2000', '0001/2000'),
+    (2000, '1', '2000/0001'),
+    ('1', 2, '0001/0002'),
+    ('*', '*', '*/*'),
+    (2000, '*', '2000/*'),
+    ('2000', '*', '2000/*'),
+    (1, '*', '0001/*'),
+    ('1', '*', '0001/*'),
+    ('*', 2000, '*/2000'),
+    ('*', '2000', '*/2000'),
+    ('*', 1, '*/0001'),
+    ('*', '1', '*/0001'),
+    ('P5Y', 'P5Y', 'P5Y/P5Y'),
+    (2000, 'P5Y', '2000/P5Y'),
+    ('2000', 'P5Y', '2000/P5Y'),
+    (1, 'P5Y', '0001/P5Y'),
+    ('1', 'P5Y', '0001/P5Y'),
+    ('P5Y', 2000, 'P5Y/2000'),
+    ('P5Y', '2000', 'P5Y/2000'),
+    ('P5Y', 1, 'P5Y/0001'),
+    ('P5Y', '1', 'P5Y/0001'),
+    ('*', 'P5Y', '*/P5Y'),
+    ('P5Y', '*', 'P5Y/*'),
+]
+
+
+@pytest.mark.parametrize('start_date,end_date,expected_timerange',
+                         TEST_DATES_TO_TIMERANGE)
+def test_dates_to_timerange(start_date, end_date, expected_timerange):
+    """Test ``_dates_to_timerange``."""
+    timerange = _dates_to_timerange(start_date, end_date)
+    assert timerange == expected_timerange
+
+
+TEST_TRUNCATE_DATES = [
+    ('2000', '2000', (2000, 2000)),
+    ('200001', '2000', (2000, 2000)),
+    ('2000', '200001', (2000, 2000)),
+    ('200001', '2000', (2000, 2000)),
+    ('200001', '200001', (200001, 200001)),
+    ('20000102', '200001', (200001, 200001)),
+    ('200001', '20000102', (200001, 200001)),
+    ('20000102', '20000102', (20000102, 20000102)),
+    ('20000102T23:59:59', '20000102', (20000102, 20000102)),
+    ('20000102', '20000102T23:59:59', (20000102, 20000102)),
+    ('20000102T235959', '20000102T01:02:03', (20000102235959, 20000102010203)),
+]
+
+
+@pytest.mark.parametrize('date,date_file,expected_output', TEST_TRUNCATE_DATES)
+def test_truncate_dates(date, date_file, expected_output):
+    """Test ``_truncate_dates``."""
+    output = _truncate_dates(date, date_file)
+    assert output == expected_output
diff --git a/tests/unit/test_data_finder.py b/tests/unit/test_data_finder.py
deleted file mode 100644
index 5210b9b2cb..0000000000
--- a/tests/unit/test_data_finder.py
+++ /dev/null
@@ -1,65 +0,0 @@
-"""Unit tests for ``_data_finder.py``."""
-import pytest
-
-from esmvalcore._data_finder import _truncate_dates, dates_to_timerange
-
-TEST_DATES_TO_TIMERANGE = [
-    (2000, 2000, '2000/2000'),
-    (1, 2000, '0001/2000'),
-    (2000, 1, '2000/0001'),
-    (1, 2, '0001/0002'),
-    ('2000', '2000', '2000/2000'),
-    ('1', '2000', '0001/2000'),
-    (2000, '1', '2000/0001'),
-    ('1', 2, '0001/0002'),
-    ('*', '*', '*/*'),
-    (2000, '*', '2000/*'),
-    ('2000', '*', '2000/*'),
-    (1, '*', '0001/*'),
-    ('1', '*', '0001/*'),
-    ('*', 2000, '*/2000'),
-    ('*', '2000', '*/2000'),
-    ('*', 1, '*/0001'),
-    ('*', '1', '*/0001'),
-    ('P5Y', 'P5Y', 'P5Y/P5Y'),
-    (2000, 'P5Y', '2000/P5Y'),
-    ('2000', 'P5Y', '2000/P5Y'),
-    (1, 'P5Y', '0001/P5Y'),
-    ('1', 'P5Y', '0001/P5Y'),
-    ('P5Y', 2000, 'P5Y/2000'),
-    ('P5Y', '2000', 'P5Y/2000'),
-    ('P5Y', 1, 'P5Y/0001'),
-    ('P5Y', '1', 'P5Y/0001'),
-    ('*', 'P5Y', '*/P5Y'),
-    ('P5Y', '*', 'P5Y/*'),
-]
-
-
-@pytest.mark.parametrize('start_date,end_date,expected_timerange',
-                         TEST_DATES_TO_TIMERANGE)
-def test_dates_to_timerange(start_date, end_date, expected_timerange):
-    """Test ``dates_to_timerange``."""
-    timerange = dates_to_timerange(start_date, end_date)
-    assert timerange == expected_timerange
-
-
-TEST_TRUNCATE_DATES = [
-    ('2000', '2000', (2000, 2000)),
-    ('200001', '2000', (2000, 2000)),
-    ('2000', '200001', (2000, 2000)),
-    ('200001', '2000', (2000, 2000)),
-    ('200001', '200001', (200001, 200001)),
-    ('20000102', '200001', (200001, 200001)),
-    ('200001', '20000102', (200001, 200001)),
-    ('20000102', '20000102', (20000102, 20000102)),
-    ('20000102T23:59:59', '20000102', (20000102, 20000102)),
-    ('20000102', '20000102T23:59:59', (20000102, 20000102)),
-    ('20000102T235959', '20000102T01:02:03', (20000102235959, 20000102010203)),
-]
-
-
-@pytest.mark.parametrize('date,date_file,expected_output', TEST_TRUNCATE_DATES)
-def test_truncate_dates(date, date_file, expected_output):
-    """Test ``_truncate_dates``."""
-    output = _truncate_dates(date, date_file)
-    assert output == expected_output
diff --git a/tests/unit/test_recipe.py b/tests/unit/test_recipe.py
index d74f570370..30c53aaf89 100644
--- a/tests/unit/test_recipe.py
+++ b/tests/unit/test_recipe.py
@@ -1,4 +1,5 @@
 from collections import defaultdict
+from pathlib import Path
 from unittest import mock
 
 import iris
@@ -260,9 +261,9 @@ def test_search_esgf(mocker, tmp_path, local_availability, already_downloaded):
     local_files = local_file_options[local_availability]
 
     mocker.patch.object(_recipe,
-                        'get_input_filelist',
+                        'find_files',
                         autospec=True,
-                        return_value=(list(local_files), [], []))
+                        return_value=(list(local_files), []))
     mocker.patch.object(
         _recipe.esgf,
         'find_files',
@@ -310,9 +311,9 @@ def test_search_esgf_timerange(mocker, tmp_path, timerange):
     esgf_files = create_esgf_search_results()
 
     mocker.patch.object(_recipe,
-                        '_find_input_files',
+                        'find_files',
                         autospec=True,
-                        return_value=([], [], []))
+                        return_value=[])
     mocker.patch.object(
         _recipe.esgf,
         'find_files',
@@ -464,8 +465,10 @@ def test_update_multiproduct_multi_model_statistics():
     assert len(output) == 2
 
     filenames = [p.filename for p in output]
-    assert '/preproc/d/var/CMIP6_MultiModelMean_2002-2004.nc' in filenames
-    assert '/preproc/d/var/CMIP6_MultiModelStd_Dev_2002-2004.nc' in filenames
+    assert Path(
+        '/preproc/d/var/CMIP6_MultiModelMean_2002-2004.nc') in filenames
+    assert Path(
+        '/preproc/d/var/CMIP6_MultiModelStd_Dev_2002-2004.nc') in filenames
 
     for product in output:
         for attr in common_attributes:
@@ -480,12 +483,12 @@ def test_update_multiproduct_multi_model_statistics():
             assert product.attributes['start_year'] == 2002
             assert 'end_year' in product.attributes
             assert product.attributes['end_year'] == 2004
-        if 'MultiModelStd_Dev' in product.filename:
+        if 'MultiModelStd_Dev' in str(product.filename):
             assert product.attributes['alias'] == 'MultiModelStd_Dev'
             assert product.attributes['dataset'] == 'MultiModelStd_Dev'
             assert (product.attributes['multi_model_statistics'] ==
                     'MultiModelStd_Dev')
-        elif 'MultiModelMean' in product.filename:
+        elif 'MultiModelMean' in str(product.filename):
             assert product.attributes['alias'] == 'MultiModelMean'
             assert product.attributes['dataset'] == 'MultiModelMean'
             assert (product.attributes['multi_model_statistics'] ==
@@ -498,8 +501,8 @@ def test_update_multiproduct_multi_model_statistics():
     assert len(stats) == 2
     assert 'mean' in stats
     assert 'std_dev' in stats
-    assert 'MultiModelMean' in stats['mean'].filename
-    assert 'MultiModelStd_Dev' in stats['std_dev'].filename
+    assert 'MultiModelMean' in str(stats['mean'].filename)
+    assert 'MultiModelStd_Dev' in str(stats['std_dev'].filename)
 
 
 def test_update_multiproduct_ensemble_statistics():
@@ -536,8 +539,8 @@ def test_update_multiproduct_ensemble_statistics():
 
     assert len(output) == 1
     product = list(output)[0]
-    assert (product.filename ==
-            '/preproc/d/var/CMIP6_CanESM2_EnsembleMedian_2000-2000.nc')
+    assert product.filename == Path(
+        '/preproc/d/var/CMIP6_CanESM2_EnsembleMedian_2000-2000.nc')
 
     for attr in common_attributes:
         assert attr in product.attributes
@@ -559,8 +562,8 @@ def test_update_multiproduct_ensemble_statistics():
     stats = output_products['CMIP6_CanESM2']
     assert len(stats) == 1
     assert 'median' in stats
-    assert (stats['median'].filename ==
-            '/preproc/d/var/CMIP6_CanESM2_EnsembleMedian_2000-2000.nc')
+    assert stats['median'].filename == Path(
+        '/preproc/d/var/CMIP6_CanESM2_EnsembleMedian_2000-2000.nc')
 
 
 def test_update_multiproduct_no_product():