diff --git a/doc/sphinx/source/api/esmvaltool.diag_scripts.portrait_plot.rst b/doc/sphinx/source/api/esmvaltool.diag_scripts.portrait_plot.rst
new file mode 100644
index 0000000000..68e18613db
--- /dev/null
+++ b/doc/sphinx/source/api/esmvaltool.diag_scripts.portrait_plot.rst
@@ -0,0 +1,11 @@
+
+.. _api.esmvaltool.diag_scripts.portrait_plot:
+
+Portrait Plot
+=============
+
+
+.. automodule:: esmvaltool.diag_scripts.portrait_plot
+  :no-members:
+  :no-inherited-members:
+  :no-show-inheritance:
diff --git a/doc/sphinx/source/api/esmvaltool.rst b/doc/sphinx/source/api/esmvaltool.rst
index b080b81ac8..6eda5f5912 100644
--- a/doc/sphinx/source/api/esmvaltool.rst
+++ b/doc/sphinx/source/api/esmvaltool.rst
@@ -29,3 +29,4 @@ Diagnostic Scripts
    esmvaltool.diag_scripts.ocean
    esmvaltool.diag_scripts.psyplot_diag
    esmvaltool.diag_scripts.seaborn_diag
+   esmvaltool.diag_scripts.portrait_plot
diff --git a/doc/sphinx/source/recipes/figures/portrait/portrait_plot.png b/doc/sphinx/source/recipes/figures/portrait/portrait_plot.png
new file mode 100644
index 0000000000..a6020c2d37
Binary files /dev/null and b/doc/sphinx/source/recipes/figures/portrait/portrait_plot.png differ
diff --git a/doc/sphinx/source/recipes/index.rst b/doc/sphinx/source/recipes/index.rst
index 9943989b8f..33fbb13126 100644
--- a/doc/sphinx/source/recipes/index.rst
+++ b/doc/sphinx/source/recipes/index.rst
@@ -23,6 +23,7 @@ large variety of input data.
 
    recipe_model_evaluation
    recipe_monitor
+   recipe_portrait
    recipe_psyplot
    recipe_seaborn
 
diff --git a/doc/sphinx/source/recipes/recipe_perfmetrics.rst b/doc/sphinx/source/recipes/recipe_perfmetrics.rst
index 067b65af85..d2f82dbffc 100644
--- a/doc/sphinx/source/recipes/recipe_perfmetrics.rst
+++ b/doc/sphinx/source/recipes/recipe_perfmetrics.rst
@@ -3,12 +3,27 @@
 Performance metrics for essential climate parameters
 ====================================================
 
+.. note::
+  Some of the results of this diagnostics can also be reproduced utilizing
+  python diagnostics:
+  Portrait plot: :ref:`recipe_portrait`
+  Monitoring: :ref:`recipe_monitor`
+
 Overview
 --------
 
-The goal is to create a standard recipe for the calculation of performance metrics to quantify the ability of the models to reproduce the climatological mean annual cycle for selected "Essential Climate Variables" (ECVs) plus some additional corresponding diagnostics and plots to better understand and interpret the results.
+The goal is to create a standard recipe for the calculation of performance
+metrics to quantify the ability of the models to reproduce the climatological
+mean annual cycle for selected "Essential Climate Variables" (ECVs) plus some
+additional corresponding diagnostics and plots to better understand and
+interpret the results.
+
+The recipe can be used to calculate performance metrics at different vertical
+levels (e.g., 5, 30, 200, 850 hPa as in
+`Gleckler et al. (2008) <http://dx.doi.org/10.1029/2007JD008972>`_) and in
+different regions. As an additional reference, we consider
+`Righi et al. (2015) <https://doi.org/10.5194/gmd-8-733-2015>`_.
 
-The recipe can be used to calculate performance metrics at different vertical levels (e.g., 5, 30, 200, 850 hPa as in `Gleckler et al. (2008) <http://dx.doi.org/10.1029/2007JD008972>`_ and in different regions. As an additional reference, we consider `Righi et al. (2015) <https://doi.org/10.5194/gmd-8-733-2015>`_.
 
 Available recipes and diagnostics
 -----------------------------------
@@ -21,12 +36,19 @@ Recipes are stored in recipes/
 
 Diagnostics are stored in diag_scripts/perfmetrics/
 
-* main.ncl: calculates and (optionally) plots annual/seasonal cycles, zonal means, lat-lon fields and time-lat-lon fields. The calculated fields can also be plotted as difference w.r.t. a given reference dataset. main.ncl also calculates RMSD, bias and taylor metrics. Input data have to be regridded to a common grid in the preprocessor. Each plot type is created by a separated routine, as detailed below.
+* main.ncl: calculates and (optionally) plots annual/seasonal cycles, zonal
+  means, lat-lon fields and time-lat-lon fields. The calculated fields can also
+  be plotted as difference w.r.t. a given reference dataset. main.ncl also
+  calculates RMSD, bias and taylor metrics. Input data have to be regridded to
+  a common grid in the preprocessor. Each plot type is created by a separated
+  routine, as detailed below.
 * cycle.ncl: creates an annual/seasonal cycle plot.
 * zonal.ncl: creates a zonal (lat-pressure) plot.
 * latlon.ncl: creates a lat-lon plot.
-* cycle_latlon.ncl: precalculates the metrics for a time-lat-lon field, with different options for normalization.
-* collect.ncl: collects and plots the metrics previously calculated by cycle_latlon.ncl.
+* cycle_latlon.ncl: precalculates the metrics for a time-lat-lon field, with
+  different options for normalization.
+* collect.ncl: collects and plots the metrics previously calculated by
+  cycle_latlon.ncl.
 
 User settings in recipe
 -----------------------
@@ -37,9 +59,12 @@ User settings in recipe
 
    *Required settings (scripts)*
 
-   * plot_type: cycle (time), zonal (plev, lat), latlon (lat, lon), cycle_latlon (time, lat, lon), cycle_zonal (time, plev, lat)
+   * plot_type: cycle (time), zonal (plev, lat), latlon (lat, lon), cycle_latlon
+     (time, lat, lon), cycle_zonal (time, plev, lat)
    * time_avg: type of time average (monthlyclim, seasonalclim, annualclim)
-   * region: selected region (global, trop, nhext, shext, nhtrop, shtrop, nh, sh, nhmidlat, shmidlat, nhpolar, shpolar, eq)
+   * region: selected region (global, trop, nhext, shext, nhtrop, shtrop, nh,
+     sh, nhmidlat, shmidlat, nhpolar, shpolar, eq)
+
 
    *Optional settings (scripts)*
 
@@ -51,9 +76,12 @@ User settings in recipe
    * projection: map projection for plot_type latlon (default: CylindricalEquidistant)
    * plot_diff: draws difference plots (default: False)
    * calc_grading: calculates grading metrics (default: False)
-   * stippling: uses stippling to mark statistically significant differences (default: False = mask out non-significant differences in gray)
-   * show_global_avg: diplays the global avaerage of the input field as string at the top-right of lat-lon plots (default: False)
-   * annots: choose the annotation style, e.g. ```alias``` which would display the alias of the dataset as title (applies to plot_type zonal and cycle_zonal)
+   * stippling: uses stippling to mark statistically significant differences
+     (default: False = mask out non-significant differences in gray)
+   * show_global_avg: displays the global avaerage of the input field as string
+     at the top-right of lat-lon plots (default: False)
+   * annots: choose the annotation style, e.g. ```alias``` which would display
+     the alias of the dataset as title (applies to plot_type zonal and cycle_zonal)
    * metric: chosen grading metric(s) (if calc_grading is True)
    * normalization: metric normalization (for RMSD and BIAS metrics only)
    * abs_levs: list of contour levels for absolute plot
@@ -114,8 +142,8 @@ User settings in recipe
 
    *Optional settings (scripts)*
 
-   * label_lo: adds lower triange for values outside range
-   * label_hi: adds upper triange for values outside range
+   * label_lo: adds lower triangle for values outside range
+   * label_hi: adds upper triangle for values outside range
    * cm_interval: min and max color of the color table
    * cm_reverse: reverses the color table
    * sort: sorts datasets in alphabetic order (excluding MMM)
@@ -157,14 +185,21 @@ Variables
 Observations and reformat scripts
 ---------------------------------
 
-The following list shows the currently used observational data sets for this recipe with their variable names and the reference to their respective reformat scripts in parentheses. Please note that obs4MIPs data can be used directly without any reformating. For non-obs4MIPs data use `esmvaltool data info DATASET` or see headers of cmorization scripts (in `/esmvaltool/cmorizers/data/formatters/datasets/
-<https://github.com/ESMValGroup/ESMValTool/blob/main/esmvaltool/cmorizers/data/formatters/datasets/>`_) for downloading and processing instructions.
+The following list shows the currently used observational data sets for this
+recipe with their variable names and the reference to their respective reformat
+scripts in parentheses. Please note that obs4MIPs data can be used directly
+without any reformatitng. For non-obs4MIPs data use `esmvaltool data info DATASET`
+or see headers of cmorization scripts (in `/esmvaltool/cmorizers/data/formatters/datasets/
+<https://github.com/ESMValGroup/ESMValTool/blob/main/esmvaltool/cmorizers/data/formatters/datasets/>`_)
+for downloading and processing instructions.
+
 #.  recipe_perfmetrics_CMIP5.yml
 
     * AIRS (hus - obs4MIPs)
     * CERES-EBAF (rlut, rlutcs, rsut, rsutcs - obs4MIPs)
     * ERA-Interim (tas, ta, ua, va, zg, hus - esmvaltool/cmorizers/data/formatters/datasets/era-interim.py)
-    * ESACCI-AEROSOL (od550aer, od870aer, od550abs, od550lt1aer - esmvaltool/cmorizers/data/formatters/datasets/esacci-aerosol.ncl)
+    * ESACCI-AEROSOL (od550aer, od870aer, od550abs, od550lt1aer -
+      esmvaltool/cmorizers/data/formatters/datasets/esacci-aerosol.ncl)
     * ESACCI-CLOUD (clt - esmvaltool/cmorizers/data/formatters/datasets/esacci-cloud.ncl)
     * ESACCI-OZONE (toz - esmvaltool/cmorizers/data/formatters/datasets/esacci-ozone.ncl)
     * ESACCI-SOILMOISTURE (sm - esmvaltool/cmorizers/data/formatters/datasets/esacci_soilmoisture.ncl)
@@ -190,9 +225,13 @@ The following list shows the currently used observational data sets for this rec
 References
 ----------
 
-* Gleckler, P. J., K. E. Taylor, and C. Doutriaux, Performance metrics for climate models, J. Geophys. Res., 113, D06104, doi: 10.1029/2007JD008972 (2008).
+* Gleckler, P. J., K. E. Taylor, and C. Doutriaux, Performance metrics for climate models, J.
+  Geophys. Res., 113, D06104, doi: 10.1029/2007JD008972 (2008).
+
+* Righi, M., Eyring, V., Klinger, C., Frank, F., Gottschaldt, K.-D., Jöckel, P.,
+  and Cionni, I.: Quantitative evaluation of ozone and selected climate parameters in a set of EMAC simulations,
+  Geosci. Model Dev., 8, 733, doi: 10.5194/gmd-8-733-2015 (2015).
 
-* Righi, M., Eyring, V., Klinger, C., Frank, F., Gottschaldt, K.-D., Jöckel, P., and Cionni, I.: Quantitative evaluation of oone and selected climate parameters in a set of EMAC simulations, Geosci. Model Dev., 8, 733, doi: 10.5194/gmd-8-733-2015 (2015).
 
 Example plots
 -------------
@@ -200,17 +239,24 @@ Example plots
 .. figure:: /recipes/figures/perfmetrics/perfmetrics_fig_1.png
    :width: 90%
 
-   Annual cycle of globally averaged temperature at 850 hPa (time period 1980-2005) for different CMIP5 models (historical simulation) (thin colored lines) in comparison to ERA-Interim (thick yellow line) and NCEP-NCAR-R1 (thick black dashed line) reanalysis data.
+   Annual cycle of globally averaged temperature at 850 hPa (time period 1980-2005)
+   for different CMIP5 models (historical simulation) (thin colored lines) in comparison to
+   ERA-Interim (thick yellow line) and NCEP-NCAR-R1 (thick black dashed line) reanalysis data.
 
 .. figure:: /recipes/figures/perfmetrics/perfmetrics_fig_2.png
    :width: 90%
 
-   Taylor diagram of globally averaged temperature at 850 hPa (ta) and longwave cloud radiative effect (lwcre) for different CMIP5 models (historical simulation, 1980-2005). Reference data (REF) are ERA-Interim for temperature (1980-2005) and CERES-EBAF (2001-2012) for longwave cloud radiative effect.
+   Taylor diagram of globally averaged temperature at 850 hPa (ta) and longwave cloud
+   radiative effect (lwcre) for different CMIP5 models (historical simulation, 1980-2005).
+   Reference data (REF) are ERA-Interim for temperature (1980-2005) and CERES-EBAF (2001-2012)
+   for longwave cloud radiative effect.
 
 .. figure:: /recipes/figures/perfmetrics/perfmetrics_fig_3.png
    :width: 90%
 
-   Difference in annual mean of zonally averaged temperature (time period 1980-2005) between the CMIP5 model MPI-ESM-MR (historical simulation) and ERA-Interim. Stippled areas indicdate differences that are statistically significant at a 95% confidence level.
+   Difference in annual mean of zonally averaged temperature (time period 1980-2005) between the
+   CMIP5 model MPI-ESM-MR (historical simulation) and ERA-Interim. Stippled areas indicdate
+   differences that are statistically significant at a 95% confidence level.
 
 .. figure:: /recipes/figures/perfmetrics/perfmetrics_fig_4.png
    :width: 90%
@@ -221,4 +267,9 @@ Example plots
    :width: 90%
    :align: center
 
-   Relative space-time root-mean-square deviation (RMSD) calculated from the climatological seasonal cycle of CMIP5 simulations. A relative performance is displayed, with blue shading indicating better and red shading indicating worse performance than the median of all model results. A diagonal split of a grid square shows the relative error with respect to the reference data set (lower right triangle) and the alternative data set (upper left triangle). White boxes are used when data are not available for a given model and variable.
+   Relative space-time root-mean-square deviation (RMSD) calculated from the climatological
+   seasonal cycle of CMIP5 simulations. A relative performance is displayed, with blue shading
+   indicating better and red shading indicating worse performance than the median of all model results.
+   A diagonal split of a grid square shows the relative error with respect to the reference data set
+   (lower right triangle) and the alternative data set (upper left triangle).
+   White boxes are used when data are not available for a given model and variable.
diff --git a/doc/sphinx/source/recipes/recipe_portrait.rst b/doc/sphinx/source/recipes/recipe_portrait.rst
new file mode 100644
index 0000000000..38fb5b4489
--- /dev/null
+++ b/doc/sphinx/source/recipes/recipe_portrait.rst
@@ -0,0 +1,95 @@
+.. _recipe_portrait:
+
+Portrait plot
+=============
+
+
+Overview
+--------
+Portrait plots are a flexible way to visualize performance metrics for multiple
+datasets and up to four references. In this recipe ``recipe_portrait_CMIP.yml``
+the normalized Root Mean Squared Deviation (RMSD) of global mean seasonal
+climatologies is calculated for a selection of CMIP models.
+In the example recipe, for each variable up to two observation based datasets
+are used as reference.
+See :ref:`variables` for complete list of references.
+The recipe uses preprocessor functions (distance metrics, global mean,
+climate statistics) to calculate a scalar metric for each combination of
+dataset, variable and reference, which is plotted by the ``portrait_plot.py``
+diagnostic script.
+
+
+User settings in recipe
+-----------------------
+
+By default cells are plotted for combinations of ``short_name``,
+``dataset``, ``project`` and ``split``,
+where ``split`` is an optional extra_facet for variables.
+However, this can be customized using the ``x_by``,
+``y_by``, ``group_by`` and ``split_by`` script settings.
+For a complete and detailed list of settings, see the
+:doc:`diagnostic documentation </api/esmvaltool.diag_scripts.portrait_plot>`.
+While this allows very flexible use for any kind of data, there are some
+limitations as well: The grouping (subplots) and normalization is always
+applied along the x-axis.
+With default settings this means normalizing all metrics for each variable
+and grouping all datasets by project.
+
+To plot distance metrics like RMSE, pearson R, bias etc. the
+:func:`distance_metric <esmvalcore.preprocessor.distance_metric>` preprocessor
+or custom diagnostics can be used.
+
+
+
+.. _variables:
+
+Variables and Datasets
+------------------------
+
+.. note::
+
+   The recipe generally works for any variable that is preprocessed correctly.
+   To use different preprocessors or reference datasets it could be useful
+   to create different variable groups and link them with the same extra_facet
+   like ``variable_name``. See recipe for examples. Listed below are the variables
+   used to produce the example figure.
+
+
+The following list shows which observational dataset is used as reference for
+each variable in this recipe. All variables are atmospheric monthly means.
+For 3D variables the selected pressure level is specified in parentheses.
+
+* clt (Ref1: ESACCI-CLOUD, Ref2: PATMOS-x)
+* pr (Ref1: GPCP-V2.2)
+* rlut, rsut (Ref1: CERES-EBAF)
+* tas (Ref1: ERA-Interim, Ref2: NCEP-NCAR-R1)
+* ts (Ref1: ESACCI-SST, Ref2: HadISST)
+* ua (200 hPa, Ref1: ERA-Interim, Ref2: NCEP-NCAR-R1)
+* zg (500 hPa, Ref1: ERA-Interim, Ref2: NCEP-NCAR-R1)
+
+
+References
+----------
+
+* Gleckler, P. J., K. E. Taylor, and C. Doutriaux, Performance metrics for climate models, J.
+  Geophys. Res., 113, D06104, doi: 10.1029/2007JD008972 (2008).
+
+* Righi, M., Eyring, V., Klinger, C., Frank, F., Gottschaldt, K.-D., Jöckel, P.,
+  and Cionni, I.: Quantitative evaluation of ozone and selected climate parameters in a set of EMAC simulations,
+  Geosci. Model Dev., 8, 733, doi: 10.5194/gmd-8-733-2015 (2015).
+
+
+Example plots
+-------------
+
+.. _fig_portrait_plot:
+
+.. figure:: /recipes/figures/portrait/portrait_plot.png
+   :width: 90%
+   :align: center
+
+
+   Relative space-time root-mean-square deviation (RMSD) calculated from the climatological
+   seasonal cycle of CMIP5 and CMIP6 simulations. A relative performance is displayed, with blue shading
+   indicating better and red shading indicating worse performance than the median of all model results.
+   A diagonal split of a grid square shows the relative error with respect to the reference data set.
diff --git a/esmvaltool/config-references.yml b/esmvaltool/config-references.yml
index 114ffea267..1ece453343 100644
--- a/esmvaltool/config-references.yml
+++ b/esmvaltool/config-references.yml
@@ -24,6 +24,12 @@ authors:
     institute: DLR, Germany
     email: bjoern.broetz@dlr.de
     orcid:
+  cammarano_diego:
+    name: Cammarano, Diego
+    institute: DLR, Germany
+    email: diego.cammarano@dlr.de
+    github: diegokam
+    orcid:
   debeire_kevin:
     name: Debeire, Kevin
     institute: DLR, Germany
@@ -241,7 +247,7 @@ authors:
     name: Gillett, Nathan
     institute: CCCma, ECCC, Canada
     orcid: https://orcid.org/0000-0002-2957-0002
-    github: npgillett    
+    github: npgillett
   gonzalez-reviriego_nube:
     name: Gonzalez-Reviriego, Nube
     institute: BSC, Spain
diff --git a/esmvaltool/diag_scripts/portrait_plot.py b/esmvaltool/diag_scripts/portrait_plot.py
new file mode 100644
index 0000000000..d8a19b52a9
--- /dev/null
+++ b/esmvaltool/diag_scripts/portrait_plot.py
@@ -0,0 +1,519 @@
+"""Portrait Plot Diagnostic.
+
+Plot performance metrics of multiple datasets vs up to four references
+A :doc:`documented example recipe </recipes/recipe_portrait>` to use this
+diagnostic is provided as ``recipes/recipe_portrait_CMIP.yml``.
+
+Description
+-----------
+This diagnostic provides plot functionalities for performance metrics,
+and is written to be as flexible as possible to be adaptable to further use
+cases. X and Y axis, grouping parameter and splits for each rectangle can be
+configured in the recipe. All ``*_by`` parameters can be set to any metadata
+key. To split by 'reference' this key needs to be set as extra_facet in recipe.
+
+Authors
+-------
+- Lukas Lindenlaub (Universität Bremen, Germany)
+- Diego Cammarano
+
+Configuration parameters through recipe:
+----------------------------------------
+axes_properties: dict, optional
+    Dictionary that gets passed to :meth:`matplotlib.axes.Axes.set`.
+    Subplots can be widely customized.
+    E.g. xlabel, ylabel, yticklabels, xmargin...
+    By default {}.
+cbar_kwargs: dict, optional
+    Dictionary that gets passed to :meth:`matplotlib.pyplot.colorbar`.
+    E.g. label, ticks...
+    By default {}.
+default_split: str, optional
+    Data labeled with this string, will be used as main rectangles. All other
+    splits will be plotted as overlays. This can be used to choose the base
+    reference, while all references are labeled for the legend. If None, the
+    first split will be used as default.
+    By default None.
+dpi: int, optional
+    Dots per inch for the figure. By default 300.
+domain: str, optional
+    Domain for provenance. By default 'global'.
+figsize: tuple of float, optional
+    [width, height] of the figure in inches. The final figure will be saved
+    with bbox_inches="tight", which can change the resulting aspect ratio.
+    By default [7.5, 3.5].
+group_by: str, optional
+    Split portrait groups into multiple groups (one matrix per group).
+    By default 'project'.
+legend: dict, optional
+    Customize, if, how and where the legend is plotted. The 'best' position
+    and size of the legend depends on multiple parameters of the figure
+    (i.e. lengths of labels, aspect ratio of the plots...). Might require
+    manual adjustment of ``x``, ``y`` and ``size`` to fit the figure layout.
+    Keys (each optional) that will be handled are:
+
+    position: str or None, optional
+        Position of the legend. Can be 'right' or 'left'.
+        Or set to None to disable plotting the legend. By default 'right'.
+    size: float, optional
+        Size of the legend in Inches. By default 0.3.
+    x_offset: float, optional
+        Manually adjust horizontal position to save space or fix overlap.
+        Number given in Inches. By default 0.
+    y_offset: float, optional
+        Manually adjust vertical position to save space or fix overlap.
+        Number given in Inches. By default 0.
+
+matplotlib_rc_params: dict, optional
+    Optional :class:`matplotlib.RcParams` used to customize matplotlib plots.
+    Options given here will be passed to :func:`matplotlib.rc_context` and used
+    for all plots produced with this diagnostic. Note: fontsizes specified here
+    might be overwritten by the plot-type-specific option ``fontsize`` (see
+    below).
+nan_color: str or None, optional
+    Matplotlib named color or hexcode for NaN values. If set to None,
+    no triangles are plotted for NaN values.
+    By default 'white'.
+normalize: str or None, optional
+    ('mean', 'median', 'centered_mean', 'centered_median', None).
+    Divide by median or mean if not None. Subtract median/mean afterwards if
+    centered.
+    By default 'centered_median'.
+plot_kwargs: dict, optional
+    Dictionary that gets passed as kwargs to
+    :meth:`matplotlib.axes.Axes.imshow`. Colormaps will be converted to 11
+    discrete steps automatically.
+    Default colormap is ``cmap='RdYlBu_r'`` with limits ``vmin=-0.5`` and
+    ``vmax=0.5``.
+plot_legend: bool, optional
+    If True, a legend is plotted, when multiple splits are given.
+    By default True.
+split_by: str, optional
+    The rectangles can be split into 2-4 triangles. This is used to show
+    metrics for different references. For this case there is no need to change
+    this parameter. Multiple variables can be set in the recipe with ``split``
+    assigned as extra_facet to label the different references. Data without
+    a split assigned will be plotted as main rectangles, this can be changed
+    by setting default_split parameter.
+    By default 'split'.
+x_by: str, optional
+    Metadata key for x coordinate.
+    By default 'alias'.
+y_by: str, optional
+    Metadata key for y coordinate.
+    By default 'variable_group'.
+
+"""
+
+import itertools
+import logging
+
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import numpy as np
+import xarray as xr
+from matplotlib import patches
+from mpl_toolkits.axes_grid1 import ImageGrid
+
+from esmvaltool.diag_scripts.shared import (
+    ProvenanceLogger,
+    get_diagnostic_filename,
+    get_plot_filename,
+    group_metadata,
+    run_diagnostic,
+    select_metadata,
+)
+
+log = logging.getLogger(__name__)
+
+
+def get_provenance(cfg):
+    """Return provenance for this diagnostic."""
+    return {
+        'ancestors': list(cfg["input_data"].keys()),
+        'authors': ["lindenlaub_lukas", "cammarano_diego"],
+        'caption': 'RMSE performance metric',
+        'domains': [cfg["domain"]],
+        'plot_types': ['portrait'],
+        'references': [
+            'gleckler08jgr',
+        ],
+        'statistics': ['rmsd'],
+    }
+
+
+def unify_limits(grid):
+    """Ensure same limits for all subplots."""
+    vmin, vmax = np.inf, -np.inf
+    images = [ax.get_images()[0] for ax in grid]
+    for img in images:
+        vmin = min(vmin, img.get_clim()[0])
+        vmax = max(vmax, img.get_clim()[1])
+    for img in images:
+        img.set_clim(vmin, vmax)
+
+
+def plot_matrix(data, row_labels, col_labels, axe, plot_kwargs):
+    """Create an image for given data."""
+    img = axe.imshow(data, **plot_kwargs)
+    # Show all ticks and label them with the respective list entries.
+    axe.set_xticks(np.arange(data.shape[1]), labels=col_labels)
+    axe.set_yticks(np.arange(data.shape[0]), labels=row_labels)
+    # Rotate the tick labels and set their alignment.
+    plt.setp(
+        axe.get_xticklabels(),
+        rotation=90,
+        ha="right",
+        va="center",
+        rotation_mode="anchor",
+    )
+    axe.set_xticks(np.arange(data.shape[1] + 1) - 0.5, minor=True)
+    axe.set_yticks(np.arange(data.shape[0] + 1) - 0.5, minor=True)
+    axe.grid(which="minor", color="black", linestyle="-", linewidth=0.8)
+    axe.tick_params(which="both", bottom=False, left=False)
+    return img
+
+
+def remove_reference(metas):
+    """Remove reference for metric from list of metadata."""
+    for meta in list(metas):  # list() creates a copy to allow remove in place
+        if meta.get("reference_for_metric", False):
+            metas.remove(meta)
+
+
+def add_missing_facets(cfg, metas):
+    """Ensure that all facets are present in metadata."""
+    for meta in metas:
+        facet_config = ["x_by", "y_by", "group_by", "split_by"]
+        facets = [cfg[key] for key in facet_config]
+        for facet in facets:
+            meta.setdefault(facet, "unknown")
+
+
+def open_file(metadata, **selection):
+    """Try to find a single file for selection and return data.
+
+    If multiple files are found, raise an error. If no file is found,
+    return np.nan.
+    """
+    metas = select_metadata(metadata, **selection)
+    if len(metas) > 1:
+        raise ValueError(f"Multiple files found for {selection}")
+    if len(metas) < 1:
+        log.debug("No files found for %s", selection)
+        return np.nan
+    log.debug("File found for %s", selection)
+    das = xr.open_dataset(metas[0]["filename"])
+    varname = list(das.data_vars.keys())[0]
+    try:
+        return das[varname].values.item()
+    except ValueError as exc:
+        msg = f"Expected scalar in input file {metas[0]['filename']}."
+        raise ValueError(msg) from exc
+
+
+def load_data(cfg, metas):
+    """Load all netcdf files from metadata into xarray dataset.
+
+    The dataset contains all relevant information for the plot. Coord
+    names are metadata keys, ordered as x, y, group, split. The default
+    reference is None, or if all references are named the first from the
+    list.
+    """
+    coords = {  # order matters: x, y, group, split
+        cfg["x_by"]: list(group_metadata(metas, cfg["x_by"]).keys()),
+        cfg["y_by"]: list(group_metadata(metas, cfg["y_by"]).keys()),
+        cfg["group_by"]: list(group_metadata(metas, cfg["group_by"]).keys()),
+        cfg["split_by"]: list(group_metadata(metas, cfg["split_by"]).keys()),
+    }
+    shape = [len(coord) for coord in coords.values()]
+    var_data = xr.DataArray(np.full(shape, np.nan), dims=list(coords.keys()))
+    data = xr.Dataset({"var": var_data}, coords=coords)
+    # loop over each cell (coord combination) and load data if existing
+    for coord_tuple in itertools.product(*coords.values()):
+        selection = dict(zip(coords.keys(), coord_tuple))
+        data['var'].loc[selection] = open_file(metas, **selection)
+    if cfg["default_split"] is None:
+        cfg["default_split"] = data.coords[cfg["split_by"]].values[0]
+    log.debug("Using %s as default split", cfg["default_split"])
+    log.debug("Loaded Data: %s", data)
+    return data
+
+
+def split_legend(cfg, grid, data):
+    """Create legend for references, based on split coordinate in the dataset.
+
+    Mpl handles axes positions in relative figure coordinates. To anchor the
+    legend to the origin of the first graph (bottom left) with fixed size,
+    without messing up the layout for changing figure sizes, a few extra steps
+    are required.
+    NOTE: maybe ``mpl_toolkits.axes_grid1.axes_divider.AxesDivider`` simplifies
+    this a bit by using ``append_axes``.
+    """
+    grid[0].get_figure().canvas.draw()  # set axes position in figure
+    size = cfg["legend"]["size"]  # rect width in physical size (inch)
+    fig_size = grid[0].get_figure().get_size_inches()  # physical figure size
+    ax_size = (size / fig_size[0], size / fig_size[1])  # legend (fig coords)
+    gaps = [0.3 / fig_size[0], 0.3 / fig_size[1]]  # margins (fig coords)
+    # anchor legend on origin of first plot or colorbar
+    anchor = grid[0].get_position().bounds  # relative figure coordinates
+    if cfg["legend"]["position"] == "right":
+        cbar_x = grid.cbar_axes[0].get_position().bounds[0]
+        gaps[0] *= 0.8  # compensate colorbar padding
+        anchor = (cbar_x + gaps[0] + cfg["legend"]["x_offset"],
+                  anchor[1] - gaps[1] - ax_size[1] + cfg["legend"]["y_offset"])
+    else:
+        anchor = (anchor[0] - gaps[0] - ax_size[0] + cfg["legend"]["x_offset"],
+                  anchor[1] - gaps[1] - ax_size[1] + cfg["legend"]["y_offset"])
+    # create legend as empty imshow like axes in figure coordinates
+    axes = {"main": grid[0].get_figure().add_axes([*anchor, *ax_size])}
+    axes["main"].imshow(np.zeros((1, 1)))  # same axes properties as main plot
+    axes["main"].set_xticks([])
+    axes["main"].set_yticks([])
+    axes["twiny"], axes["twinx"] = [axes["main"].twiny(), axes["main"].twinx()]
+    axes["twinx"].set_yticks([])
+    axes["twiny"].set_xticks([])
+    label_at = [  # order matches get_triangle_nodes (halves and quarters)
+        axes["main"].set_ylabel,  # left
+        axes["twinx"].set_ylabel,  # right
+        axes["main"].set_xlabel,  # bottom
+        axes["twiny"].set_xlabel,  # top
+    ]
+    for i, label in enumerate(data.coords[cfg["split_by"]].values):
+        nodes = get_triangle_nodes(i, len(data.coords[cfg["split_by"]].values))
+        axes["main"].add_patch(
+            patches.Polygon(nodes,
+                            closed=True,
+                            facecolor=["#bbb", "#ccc", "#ddd", "#eee"][i],
+                            edgecolor="black",
+                            linewidth=0.5,
+                            fill=True))
+        label_at[i](label)
+
+
+def overlay_reference(cfg, axe, data, triangle):
+    """Create triangular overlays for given data and axes."""
+    # use same colors as in main plot
+    cmap = axe.get_images()[0].get_cmap()
+    norm = axe.get_images()[0].norm
+    if cfg["nan_color"] is not None:
+        cmap.set_bad(cfg["nan_color"])
+    for i, j in itertools.product(*map(range, data.shape)):
+        if np.isnan(data[i, j]) and cfg["nan_color"] is None:
+            continue
+        color = cmap(norm(data[i, j]))
+        edges = [(e[0] + j, e[1] + i) for e in triangle]
+        patch = patches.Polygon(
+            edges,
+            closed=True,
+            facecolor=color,
+            edgecolor="black",
+            linewidth=0.5,
+            fill=True,
+        )
+        axe.add_patch(patch)
+
+
+def plot_group(cfg, axe, data, title=None):
+    """Create matrix for one subplot in ax using plt.imshow.
+
+    By default split None is used, if all splits are named the first is
+    used. Other splits will be added by overlaying triangles.
+    """
+    split = data.sel({cfg["split_by"]: cfg["default_split"]})
+    plot_matrix(
+        split.values.T,  # 2d numpy array
+        split.coords[cfg["y_by"]].values,  # y_labels
+        split.coords[cfg["x_by"]].values,  # x_labels
+        axe,
+        cfg["plot_kwargs"],
+    )
+    if title is not None:
+        axe.set_title(title)
+    axe.set(**cfg["axes_properties"])
+
+
+def get_triangle_nodes(position, total_count=2):
+    """Return list of nodes with relative x, y coordinates.
+
+    The nodes of the triangle are given as list of three tuples. Each tuple
+    contains relative coordinates (-0.5 to +0.5). For total of <= 2 a top left
+    (position=0) and bottom right (position=1) rectangle is returned.
+    For higher counts (3 or 4) one quartile is returned for each position.
+    NOTE: Order matters. Ensure axis labels for the legend match when changing.
+    """
+    if total_count < 3:
+        halves = [
+            [(0.5, -0.5), (-0.5, -0.5), (-0.5, 0.5)],  # top left
+            [(0.5, -0.5), (0.5, 0.5), (-0.5, 0.5)],  # bottom right
+        ]
+        return halves[position]
+    quarters = [
+        [(-0.5, -0.5), (0, 0), (-0.5, 0.5)],  # left
+        [(0.5, -0.5), (0, 0), (0.5, 0.5)],  # right
+        [(-0.5, 0.5), (0, 0), (0.5, 0.5)],  # bottom
+        [(-0.5, -0.5), (0, 0), (0.5, -0.5)],  # top
+    ]
+    return quarters[position]
+
+
+def plot_overlays(cfg, grid, data):
+    """Call overlay_reference for each split in data and each group in grid."""
+    split_count = data.shape[3]
+    group_count = data.shape[2]
+    for i in range(group_count):
+        if split_count < 2:
+            log.debug("No additional splits for overlay.")
+            break
+        if split_count > 4:
+            log.warning("Too many splits for overlay, only 3 will be plotted.")
+        group_data = data.isel({cfg["group_by"]: i})
+        group_data = group_data.dropna(cfg["x_by"], how="all")
+        for sss in range(split_count):
+            split = group_data.isel({cfg["split_by"]: sss})
+            split_label = split.coords[cfg["split_by"]].values.item()
+            if split_label == cfg["default_split"]:
+                log.debug("Skipping default split for overlay.")
+                continue
+            nodes = get_triangle_nodes(sss, split_count)
+            overlay_reference(cfg, grid[i], split.values.T, nodes)
+
+
+def plot(cfg, data):
+    """Create figure with subplots for each group and save to NetCDF.
+
+    Sets same color range and overlays additional references based on
+    the content of data (xr.DataArray).
+    """
+    fig = plt.figure(1, cfg["figsize"])
+    group_count = len(data.coords[cfg["group_by"]])
+    grid = ImageGrid(
+        fig,
+        111,  # similar to subplot(111)
+        cbar_mode="single",
+        cbar_location="right",
+        cbar_pad=0.1,
+        cbar_size=0.2,
+        nrows_ncols=(1, group_count),
+        axes_pad=0.1,
+    )
+    # remap colorbar to 10 discrete steps
+    cmap = mpl.cm.get_cmap(cfg["plot_kwargs"]["cmap"], 10)
+    cfg["plot_kwargs"]["cmap"] = cmap
+    for i in range(group_count):
+        group = data.isel({cfg["group_by"]: i})
+        group = group.dropna(cfg["x_by"], how="all")
+        title = None
+        if group_count > 1:
+            title = group.coords[cfg["group_by"]].values.item()
+        plot_group(cfg, grid[i], group, title=title)
+    # use same colorrange and colorbar for all subplots:
+    unify_limits(grid)
+    # set cb of first image as single cb for the figure
+    grid.cbar_axes[0].colorbar(grid[0].get_images()[0], **cfg["cbar_kwargs"])
+    if data.shape[3] > 1:
+        plot_overlays(cfg, grid, data)
+    if cfg["plot_legend"] and data.shape[3] > 1:
+        split_legend(cfg, grid, data)
+    basename = "portrait_plot"
+    fname = get_plot_filename(basename, cfg)
+    plt.savefig(fname, bbox_inches="tight", dpi=cfg["dpi"])
+    with ProvenanceLogger(cfg) as prov_logger:
+        prov_logger.log(fname, get_provenance(cfg))
+    log.info("Figure saved:")
+    log.info(fname)
+
+
+def normalize(array, method, dims):
+    """Divide and shift values along dims depending on method."""
+    shift = 0
+    norm = 1
+    if "mean" in method:
+        norm = array.mean(dim=dims)
+    elif "median" in method:
+        norm = array.median(dim=dims)
+    if "centered" in method:
+        shift = norm
+    normalized = (array - shift) / norm
+    return normalized
+
+
+def set_defaults(cfg):
+    """Set default values for most important config parameters."""
+    cfg.setdefault("axes_properties", {})
+    cfg.setdefault("cbar_kwargs", {})
+    cfg.setdefault("default_split", None)
+    cfg.setdefault("dpi", 300)
+    cfg.setdefault("domain", "global")
+    cfg.setdefault("figsize", (7.5, 3.5))
+    cfg.setdefault("group_by", "project")
+    cfg.setdefault("legend", {})
+    cfg["legend"].setdefault("position", "right")
+    cfg["legend"].setdefault("size", 0.3)
+    cfg["legend"].setdefault("x_offset", 0)
+    cfg["legend"].setdefault("y_offset", 0)
+    cfg.setdefault("matplotlib_rc_params", {})
+    cfg.setdefault("nan_color", 'white')
+    cfg.setdefault("normalize", "centered_median")
+    cfg.setdefault("plot_kwargs", {})
+    cfg["plot_kwargs"].setdefault("cmap", "RdYlBu_r")
+    cfg["plot_kwargs"].setdefault("vmin", -0.5)
+    cfg["plot_kwargs"].setdefault("vmax", 0.5)
+    cfg.setdefault("plot_legend", True)
+    cfg.setdefault("split_by", "split")  # extra facet
+    cfg.setdefault("x_by", "alias")
+    cfg.setdefault("y_by", "variable_group")
+
+
+def sort_data(cfg, dataset):
+    """Sort the dataset along by custom or alphabetical order."""
+    dataset = dataset.sortby([
+        dataset[cfg["x_by"]].str.lower(), dataset[cfg["y_by"]].str.lower(),
+        dataset[cfg["group_by"]].str.lower(),
+        dataset[cfg["split_by"]].str.lower()
+    ])
+    if cfg["x_by"] in ["alias", "dataset"]:
+        # NOTE: not clean, but it works for many cases
+        mm_stats = [
+            v for v in dataset[cfg["x_by"]].values
+            if "Mean" in v or "Median" in v or "Percentile" in v
+        ]
+        others = [
+            v for v in dataset[cfg["x_by"]].values
+            if "Mean" not in v and "Median" not in v and "Percentile" not in v
+        ]
+        new_order = mm_stats + others
+        dataset = dataset.reindex({cfg["x_by"]: new_order})
+    return dataset
+
+
+def save_to_netcdf(cfg, data):
+    """Save the final dataset to a NetCDF file."""
+    basename = "portrait"
+    fname = get_diagnostic_filename(basename, cfg, extension='nc')
+    data.to_netcdf(fname)
+    log.info("NetCDF file saved:")
+    log.info(fname)
+    with ProvenanceLogger(cfg) as prov_logger:
+        prov_logger.log(fname, get_provenance(cfg))
+
+
+def main(cfg):
+    """Run the diagnostic."""
+    set_defaults(cfg)
+    metas = list(cfg["input_data"].values())
+    remove_reference(metas)
+    add_missing_facets(cfg, metas)
+    dataset = load_data(cfg, metas)
+    dataset = sort_data(cfg, dataset)
+    if cfg["normalize"] is not None:
+        dataset["var"] = normalize(dataset["var"], cfg["normalize"],
+                                   [cfg["x_by"], cfg["group_by"]])
+    with mpl.rc_context(cfg['matplotlib_rc_params']):
+        plot(cfg, dataset["var"])
+    save_to_netcdf(cfg, dataset["var"])
+
+
+if __name__ == '__main__':
+    with run_diagnostic() as config:
+        main(config)
diff --git a/esmvaltool/recipes/recipe_portrait_CMIP.yml b/esmvaltool/recipes/recipe_portrait_CMIP.yml
new file mode 100644
index 0000000000..b09eb1ec72
--- /dev/null
+++ b/esmvaltool/recipes/recipe_portrait_CMIP.yml
@@ -0,0 +1,213 @@
+# ESMValTool
+---
+documentation:
+  title: Performance metrics plots.
+  description: >
+    Compare performance of CMIP simulations to a reference dataset.
+  authors:
+    - cammarano_diego
+    - lindenlaub_lukas
+  maintainer:
+    - lindenlaub_lukas
+  references:
+    - eyring21ipcc
+    - gleckler08jgr
+
+cmip5: &CMIP5
+  project: CMIP5
+  ensemble: r1i1p1
+
+datasets:
+  # cmip5
+  - {<<: *CMIP5, dataset: ACCESS1-0}
+  - {<<: *CMIP5, dataset: CESM1-BGC}
+  - {<<: *CMIP5, dataset: CNRM-CM5}
+  - {<<: *CMIP5, dataset: GFDL-ESM2M}
+  - {<<: *CMIP5, dataset: HadGEM2-CC}
+  - {<<: *CMIP5, dataset: IPSL-CM5B-LR}
+  - {<<: *CMIP5, dataset: MIROC-ESM}
+  - {<<: *CMIP5, dataset: MPI-ESM-LR}
+  - {<<: *CMIP5, dataset: MRI-CGCM3}
+  # cmip6
+  - {dataset: ACCESS-ESM1-5, institute: CSIRO}
+  - {dataset: CESM2, institute: NCAR}
+  - {dataset: CNRM-CM6-1, ensemble: r1i1p1f2, grid: gr}
+  - {dataset: GFDL-CM4, grid: gr1}
+  - {dataset: MIROC-ES2L, ensemble: r1i1p1f2}
+  - {dataset: MPI-ESM1-2-LR}
+  - {dataset: MRI-ESM2-0}
+  - {dataset: UKESM1-0-LL, ensemble: r1i1p1f2}
+
+preprocessors:
+  default: &default  # common preprocessor settings
+    regrid:
+      target_grid: 3x3
+      scheme: linear
+    distance_metric:
+      metric: weighted_rmse
+    climate_statistics:
+      operator: mean
+      period: month
+    mask_fillvalues:
+      threshold_fraction: 0.95
+    multi_model_statistics:
+      span: overlap
+      statistics:
+        - operator: mean
+        - operator: percentile
+          percent: 50
+      groupby: ['project']
+      # exclude all possible reference datasets
+      exclude: [
+        AIRS-2-1,
+        CERES-EBAF,
+        ERA-Interim,
+        ESACCI-AEROSOL,
+        ESACCI-CLOUD,
+        ESACCI-OZONE,
+        ESACCI-SOILMOISTURE,
+        ESACCI-SST,
+        GPCP-SG,
+        HadISST,
+        NCEP-NCAR-R1,
+        MODIS,
+        NIWA-BS,
+        PATMOS-x]
+  pp200:  # only add/overwrite var specific settings
+    <<: *default
+    extract_levels:
+      levels: 20000
+      scheme: linear
+      coordinate: air_pressure
+  pp500:  # only add/overwrite var specific settings
+    <<: *default
+    extract_levels:
+      levels: 50000
+      scheme: linear
+      coordinate: air_pressure
+  thr10:  # only add/overwrite var specific settings
+    <<: *default
+    mask_fillvalues:
+      threshold_fraction: 0.10
+
+var_default: &var_default
+  mip: Amon
+  project: CMIP6
+  exp: historical
+  ensemble: r1i1p1f1
+  preprocessor: default
+  grid: gn
+  start_year: 2000
+  end_year: 2002
+  split: Ref1  # first triangle
+
+diagnostics:
+  portrait_rmse:
+    themes: [aerosols, phys, clouds, atmDyn, chem, ghg]
+    realms: [atmos, land, atmosChem, ocean]
+    variables:
+      zg: &zg
+        <<: *var_default
+        short_name: zg
+        variable: zg500
+        preprocessor: pp500
+        additional_datasets:
+          - {dataset: ERA-Interim, project: OBS6, type: reanaly,
+             version: 1, tier: 3, reference_for_metric: true}
+      zg_2:
+        <<: *zg
+        split: Ref2
+        additional_datasets:
+          - {dataset: NCEP-NCAR-R1, project: OBS6, type: reanaly,
+             version: 1, tier: 2, reference_for_metric: true}
+      clt: &clt
+        <<: *var_default
+        short_name: clt
+        variable: clt
+        additional_datasets:
+          - {dataset: ESACCI-CLOUD, project: OBS, type: sat,
+             version: AVHRR-AMPM-fv3.0, tier: 2, reference_for_metric: true}
+      clt_2:
+        <<: *clt
+        split: Ref2
+        additional_datasets:
+          - {dataset: PATMOS-x, project: OBS, type: sat, version: NOAA, tier: 2, reference_for_metric: true}
+      ts: &ts
+        <<: *var_default
+        short_name: ts
+        variable: ts
+        preprocessor: default
+        additional_datasets:
+          - {dataset: ESACCI-SST, project: OBS, type: sat, version: 2.2, tier: 2, reference_for_metric: true}
+      ts_2:
+        <<: *ts
+        split: Ref2
+        additional_datasets:
+          - {dataset: HadISST, project: OBS, type: reanaly, version: 1, tier: 2, reference_for_metric: true}
+      tas: &tas
+        <<: *var_default
+        short_name: tas
+        variable: tas
+        additional_datasets:
+          - {dataset: ERA-Interim, project: OBS6, type: reanaly,
+             version: 1, tier: 3, reference_for_metric: true}
+      tas_2:
+        <<: *tas
+        split: Ref2
+        additional_datasets:
+          - {dataset: NCEP-NCAR-R1, project: OBS6, type: reanaly,
+             version: 1, tier: 2, reference_for_metric: true}
+      pr:
+        <<: *var_default
+        variable: pr
+        split: Ref1
+        additional_datasets:
+          - {dataset: GPCP-SG, project: OBS, type: atmos,
+             version: 2.3, tier: 2, reference_for_metric: true}
+      rlut:
+        <<: *var_default
+        variable: rlut
+        start_year: 2001
+        end_year: 2003
+        additional_datasets:
+          - {dataset: CERES-EBAF, project: OBS, type: sat, version: Ed4.2,
+             tier: 2, reference_for_metric: true}
+      rsut:
+        <<: *var_default
+        variable: rsut
+        start_year: 2001
+        end_year: 2003
+        additional_datasets:
+          - {dataset: CERES-EBAF, project: OBS, type: sat, version: Ed4.2,
+             tier: 2, reference_for_metric: true}
+      ua200: &ua200
+        <<: *var_default
+        variable: ua200
+        short_name: ua
+        preprocessor: pp200
+        split: Ref1
+        additional_datasets:
+          - {dataset: ERA-Interim, project: OBS6, type: reanaly,
+             version: 1, tier: 3, reference_for_metric: true}
+      ua200_2:
+        <<: *ua200
+        split: Ref2
+        additional_datasets:
+          - {dataset: NCEP-NCAR-R1, project: OBS6, type: reanaly,
+             version: 1, tier: 2, reference_for_metric: true}
+
+    scripts:
+      portrait:
+        script: portrait_plot.py
+        x_by: dataset
+        y_by: variable  # extra_facet
+        group_by: project
+        normalize: "centered_median"
+        default_split: Ref1
+        nan_color: null
+        plot_kwargs:
+          vmin: -0.5
+          vmax: +0.5
+        cbar_kwargs:
+          label: Relative RMSE
+          extend: both
diff --git a/esmvaltool/recipes/testing/recipe_portrait_CMIP_fast.yml b/esmvaltool/recipes/testing/recipe_portrait_CMIP_fast.yml
new file mode 100644
index 0000000000..002df5b4a8
--- /dev/null
+++ b/esmvaltool/recipes/testing/recipe_portrait_CMIP_fast.yml
@@ -0,0 +1,160 @@
+# ESMValTool
+---
+documentation:
+  title: Performance metrics plots.
+  description: >
+    Test recipe for the performance comparison of CMIP simulations to a reference dataset.
+  authors:
+    - lindenlaub_lukas
+  maintainer:
+    - lindenlaub_lukas
+  references:
+    - eyring21ipcc
+    - gleckler08jgr
+
+cmip5: &CMIP5
+  project: CMIP5
+  ensemble: r1i1p1
+
+datasets:
+  # cmip5
+  - {<<: *CMIP5, dataset: ACCESS1-0}
+  - {<<: *CMIP5, dataset: CESM1-BGC}
+  - {<<: *CMIP5, dataset: GFDL-ESM2M}
+  - {<<: *CMIP5, dataset: MIROC-ESM}
+  - {<<: *CMIP5, dataset: MRI-CGCM3}
+  # cmip6
+  - {dataset: ACCESS-ESM1-5, institute: CSIRO}
+  - {dataset: CESM2, institute: NCAR}
+  - {dataset: GFDL-CM4, grid: gr1}
+  - {dataset: MIROC-ES2L, ensemble: r1i1p1f2}
+  - {dataset: MRI-ESM2-0}
+
+
+preprocessors:
+  default: &default  # common preprocessor settings
+    regrid:
+      target_grid: 3x3
+      scheme: linear
+    distance_metric:
+      metric: weighted_rmse
+    climate_statistics:
+      operator: mean
+      period: month
+    mask_fillvalues:
+      threshold_fraction: 0.95
+    multi_model_statistics:
+      span: overlap
+      statistics:
+        - operator: mean
+        - operator: percentile
+          percent: 50
+      groupby: ['project']
+      # exclude all possible reference datasets
+      exclude: [
+        AIRS-2-1,
+        CERES-EBAF,
+        ERA-Interim,
+        ESACCI-AEROSOL,
+        ESACCI-CLOUD,
+        ESACCI-OZONE,
+        ESACCI-SOILMOISTURE,
+        ESACCI-SST,
+        GPCP-SG,
+        HadISST,
+        NCEP-NCAR-R1,
+        MODIS,
+        NIWA-BS,
+        PATMOS-x]
+  pp200:  # only add/overwrite var specific settings
+    <<: *default
+    extract_levels:
+      levels: 20000
+      scheme: linear
+      coordinate: air_pressure
+  pp500:  # only add/overwrite var specific settings
+    <<: *default
+    extract_levels:
+      levels: 50000
+      scheme: linear
+      coordinate: air_pressure
+  thr10:  # only add/overwrite var specific settings
+    <<: *default
+    mask_fillvalues:
+      threshold_fraction: 0.10
+
+var_default: &var_default
+  mip: Amon
+  project: CMIP6
+  exp: historical
+  ensemble: r1i1p1f1
+  preprocessor: default
+  grid: gn
+  start_year: 2000
+  end_year: 2002
+  split: Ref1  # first triangle
+
+diagnostics:
+  portrait_rmse:
+    themes: [aerosols, phys, clouds, atmDyn, chem, ghg]
+    realms: [atmos, land, atmosChem, ocean]
+    variables:
+      tas: &tas
+        <<: *var_default
+        short_name: tas
+        variable: tas
+        additional_datasets:
+          - {dataset: ERA-Interim, project: OBS6, type: reanaly,
+             version: 1, tier: 3, reference_for_metric: true}
+      tas_2:
+        <<: *tas
+        split: Ref2
+        additional_datasets:
+          - {dataset: NCEP-NCAR-R1, project: OBS6, type: reanaly,
+             version: 1, tier: 2, reference_for_metric: true}
+      pr:
+        <<: *var_default
+        variable: pr
+        split: Ref1
+        additional_datasets:
+          - {dataset: GPCP-SG, project: OBS, type: atmos,
+             version: 2.3, tier: 2, reference_for_metric: true}
+      rsut:
+        <<: *var_default
+        variable: rsut
+        start_year: 2001
+        end_year: 2003
+        additional_datasets:
+          - {dataset: CERES-EBAF, project: OBS, type: sat, version: Ed4.2,
+             tier: 2, reference_for_metric: true}
+      ua200: &ua200
+        <<: *var_default
+        variable: ua200
+        short_name: ua
+        preprocessor: pp200
+        split: Ref1
+        additional_datasets:
+          - {dataset: ERA-Interim, project: OBS6, type: reanaly,
+             version: 1, tier: 3, reference_for_metric: true}
+      ua200_2:
+        <<: *ua200
+        split: Ref2
+        additional_datasets:
+          - {dataset: NCEP-NCAR-R1, project: OBS6, type: reanaly,
+             version: 1, tier: 2, reference_for_metric: true}
+
+    scripts:
+      portrait:
+        script: portrait_plot.py
+        x_by: dataset
+        y_by: variable  # extra_facet
+        group_by: project
+        normalize: "centered_median"
+        default_split: Ref1
+        nan_color: null
+        plot_kwargs:
+          vmin: -0.5
+          vmax: +0.5
+        cbar_kwargs:
+          label: Relative RMSE
+          extend: both